summaryrefslogtreecommitdiffstats
path: root/drivers/dma
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/dma')
-rw-r--r--drivers/dma/Kconfig167
-rw-r--r--drivers/dma/Makefile14
-rw-r--r--drivers/dma/at_hdmac.c1220
-rw-r--r--drivers/dma/at_hdmac_regs.h354
-rw-r--r--drivers/dma/coh901318.c1323
-rw-r--r--drivers/dma/coh901318_lli.c318
-rw-r--r--drivers/dma/coh901318_lli.h124
-rw-r--r--drivers/dma/dmaengine.c1053
-rw-r--r--drivers/dma/dmatest.c627
-rw-r--r--drivers/dma/dw_dmac.c1456
-rw-r--r--drivers/dma/dw_dmac_regs.h229
-rw-r--r--drivers/dma/fsldma.c1312
-rw-r--r--drivers/dma/fsldma.h201
-rw-r--r--drivers/dma/ioat/Makefile2
-rw-r--r--drivers/dma/ioat/dca.c684
-rw-r--r--drivers/dma/ioat/dma.c1238
-rw-r--r--drivers/dma/ioat/dma.h353
-rw-r--r--drivers/dma/ioat/dma_v2.c908
-rw-r--r--drivers/dma/ioat/dma_v2.h192
-rw-r--r--drivers/dma/ioat/dma_v3.c1281
-rw-r--r--drivers/dma/ioat/hw.h217
-rw-r--r--drivers/dma/ioat/pci.c210
-rw-r--r--drivers/dma/ioat/registers.h249
-rw-r--r--drivers/dma/iop-adma.c1772
-rw-r--r--drivers/dma/iovlock.c279
-rw-r--r--drivers/dma/ipu/Makefile1
-rw-r--r--drivers/dma/ipu/ipu_idmac.c1867
-rw-r--r--drivers/dma/ipu/ipu_intern.h176
-rw-r--r--drivers/dma/ipu/ipu_irq.c413
-rw-r--r--drivers/dma/mv_xor.c1381
-rw-r--r--drivers/dma/mv_xor.h183
-rw-r--r--drivers/dma/ppc4xx/Makefile1
-rw-r--r--drivers/dma/ppc4xx/adma.c5027
-rw-r--r--drivers/dma/ppc4xx/adma.h195
-rw-r--r--drivers/dma/ppc4xx/dma.h223
-rw-r--r--drivers/dma/ppc4xx/xor.h110
-rw-r--r--drivers/dma/shdma.c876
-rw-r--r--drivers/dma/shdma.h67
-rw-r--r--drivers/dma/txx9dmac.c1361
-rw-r--r--drivers/dma/txx9dmac.h308
40 files changed, 27972 insertions, 0 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
new file mode 100644
index 00000000000..e02d74b1e89
--- /dev/null
+++ b/drivers/dma/Kconfig
@@ -0,0 +1,167 @@
+#
+# DMA engine configuration
+#
+
+menuconfig DMADEVICES
+ bool "DMA Engine support"
+ depends on HAS_DMA
+ help
+ DMA engines can do asynchronous data transfers without
+ involving the host CPU. Currently, this framework can be
+ used to offload memory copies in the network stack and
+ RAID operations in the MD driver. This menu only presents
+ DMA Device drivers supported by the configured arch, it may
+ be empty in some cases.
+
+if DMADEVICES
+
+comment "DMA Devices"
+
+config ASYNC_TX_DISABLE_CHANNEL_SWITCH
+ bool
+
+config INTEL_IOATDMA
+ tristate "Intel I/OAT DMA support"
+ depends on PCI && X86
+ select DMA_ENGINE
+ select DCA
+ select ASYNC_TX_DISABLE_CHANNEL_SWITCH
+ select ASYNC_TX_DISABLE_PQ_VAL_DMA
+ select ASYNC_TX_DISABLE_XOR_VAL_DMA
+ help
+ Enable support for the Intel(R) I/OAT DMA engine present
+ in recent Intel Xeon chipsets.
+
+ Say Y here if you have such a chipset.
+
+ If unsure, say N.
+
+config INTEL_IOP_ADMA
+ tristate "Intel IOP ADMA support"
+ depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX
+ select DMA_ENGINE
+ help
+ Enable support for the Intel(R) IOP Series RAID engines.
+
+config DW_DMAC
+ tristate "Synopsys DesignWare AHB DMA support"
+ depends on AVR32
+ select DMA_ENGINE
+ default y if CPU_AT32AP7000
+ help
+ Support the Synopsys DesignWare AHB DMA controller. This
+ can be integrated in chips such as the Atmel AT32ap7000.
+
+config AT_HDMAC
+ tristate "Atmel AHB DMA support"
+ depends on ARCH_AT91SAM9RL || ARCH_AT91SAM9G45
+ select DMA_ENGINE
+ help
+ Support the Atmel AHB DMA controller. This can be integrated in
+ chips such as the Atmel AT91SAM9RL.
+
+config FSL_DMA
+ tristate "Freescale Elo and Elo Plus DMA support"
+ depends on FSL_SOC
+ select DMA_ENGINE
+ ---help---
+ Enable support for the Freescale Elo and Elo Plus DMA controllers.
+ The Elo is the DMA controller on some 82xx and 83xx parts, and the
+ Elo Plus is the DMA controller on 85xx and 86xx parts.
+
+config MV_XOR
+ bool "Marvell XOR engine support"
+ depends on PLAT_ORION
+ select DMA_ENGINE
+ ---help---
+ Enable support for the Marvell XOR engine.
+
+config MX3_IPU
+ bool "MX3x Image Processing Unit support"
+ depends on ARCH_MX3
+ select DMA_ENGINE
+ default y
+ help
+ If you plan to use the Image Processing unit in the i.MX3x, say
+ Y here. If unsure, select Y.
+
+config MX3_IPU_IRQS
+ int "Number of dynamically mapped interrupts for IPU"
+ depends on MX3_IPU
+ range 2 137
+ default 4
+ help
+ Out of 137 interrupt sources on i.MX31 IPU only very few are used.
+ To avoid bloating the irq_desc[] array we allocate a sufficient
+ number of IRQ slots and map them dynamically to specific sources.
+
+config TXX9_DMAC
+ tristate "Toshiba TXx9 SoC DMA support"
+ depends on MACH_TX49XX || MACH_TX39XX
+ select DMA_ENGINE
+ help
+ Support the TXx9 SoC internal DMA controller. This can be
+ integrated in chips such as the Toshiba TX4927/38/39.
+
+config SH_DMAE
+ tristate "Renesas SuperH DMAC support"
+ depends on SUPERH && SH_DMA
+ depends on !SH_DMA_API
+ select DMA_ENGINE
+ help
+ Enable support for the Renesas SuperH DMA controllers.
+
+config COH901318
+ bool "ST-Ericsson COH901318 DMA support"
+ select DMA_ENGINE
+ depends on ARCH_U300
+ help
+ Enable support for ST-Ericsson COH 901 318 DMA.
+
+config AMCC_PPC440SPE_ADMA
+ tristate "AMCC PPC440SPe ADMA support"
+ depends on 440SPe || 440SP
+ select DMA_ENGINE
+ select ARCH_HAS_ASYNC_TX_FIND_CHANNEL
+ help
+ Enable support for the AMCC PPC440SPe RAID engines.
+
+config ARCH_HAS_ASYNC_TX_FIND_CHANNEL
+ bool
+
+config DMA_ENGINE
+ bool
+
+comment "DMA Clients"
+ depends on DMA_ENGINE
+
+config NET_DMA
+ bool "Network: TCP receive copy offload"
+ depends on DMA_ENGINE && NET
+ default (INTEL_IOATDMA || FSL_DMA)
+ help
+ This enables the use of DMA engines in the network stack to
+ offload receive copy-to-user operations, freeing CPU cycles.
+
+ Say Y here if you enabled INTEL_IOATDMA or FSL_DMA, otherwise
+ say N.
+
+config ASYNC_TX_DMA
+ bool "Async_tx: Offload support for the async_tx api"
+ depends on DMA_ENGINE
+ help
+ This allows the async_tx api to take advantage of offload engines for
+ memcpy, memset, xor, and raid6 p+q operations. If your platform has
+ a dma engine that can perform raid operations and you have enabled
+ MD_RAID456 say Y.
+
+ If unsure, say N.
+
+config DMATEST
+ tristate "DMA Test client"
+ depends on DMA_ENGINE
+ help
+ Simple DMA test client. Say N unless you're debugging a
+ DMA Device driver.
+
+endif
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
new file mode 100644
index 00000000000..807053d4823
--- /dev/null
+++ b/drivers/dma/Makefile
@@ -0,0 +1,14 @@
+obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
+obj-$(CONFIG_NET_DMA) += iovlock.o
+obj-$(CONFIG_DMATEST) += dmatest.o
+obj-$(CONFIG_INTEL_IOATDMA) += ioat/
+obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
+obj-$(CONFIG_FSL_DMA) += fsldma.o
+obj-$(CONFIG_MV_XOR) += mv_xor.o
+obj-$(CONFIG_DW_DMAC) += dw_dmac.o
+obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
+obj-$(CONFIG_MX3_IPU) += ipu/
+obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
+obj-$(CONFIG_SH_DMAE) += shdma.o
+obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o
+obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
new file mode 100644
index 00000000000..efc1a61ca23
--- /dev/null
+++ b/drivers/dma/at_hdmac.c
@@ -0,0 +1,1220 @@
+/*
+ * Driver for the Atmel AHB DMA Controller (aka HDMA or DMAC on AT91 systems)
+ *
+ * Copyright (C) 2008 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ *
+ * This supports the Atmel AHB DMA Controller,
+ *
+ * The driver has currently been tested with the Atmel AT91SAM9RL
+ * and AT91SAM9G45 series.
+ */
+
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include "at_hdmac_regs.h"
+
+/*
+ * Glossary
+ * --------
+ *
+ * at_hdmac : Name of the ATmel AHB DMA Controller
+ * at_dma_ / atdma : ATmel DMA controller entity related
+ * atc_ / atchan : ATmel DMA Channel entity related
+ */
+
+#define ATC_DEFAULT_CFG (ATC_FIFOCFG_HALFFIFO)
+#define ATC_DEFAULT_CTRLA (0)
+#define ATC_DEFAULT_CTRLB (ATC_SIF(0) \
+ |ATC_DIF(1))
+
+/*
+ * Initial number of descriptors to allocate for each channel. This could
+ * be increased during dma usage.
+ */
+static unsigned int init_nr_desc_per_channel = 64;
+module_param(init_nr_desc_per_channel, uint, 0644);
+MODULE_PARM_DESC(init_nr_desc_per_channel,
+ "initial descriptors per channel (default: 64)");
+
+
+/* prototypes */
+static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx);
+
+
+/*----------------------------------------------------------------------*/
+
+static struct at_desc *atc_first_active(struct at_dma_chan *atchan)
+{
+ return list_first_entry(&atchan->active_list,
+ struct at_desc, desc_node);
+}
+
+static struct at_desc *atc_first_queued(struct at_dma_chan *atchan)
+{
+ return list_first_entry(&atchan->queue,
+ struct at_desc, desc_node);
+}
+
+/**
+ * atc_alloc_descriptor - allocate and return an initilized descriptor
+ * @chan: the channel to allocate descriptors for
+ * @gfp_flags: GFP allocation flags
+ *
+ * Note: The ack-bit is positioned in the descriptor flag at creation time
+ * to make initial allocation more convenient. This bit will be cleared
+ * and control will be given to client at usage time (during
+ * preparation functions).
+ */
+static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan,
+ gfp_t gfp_flags)
+{
+ struct at_desc *desc = NULL;
+ struct at_dma *atdma = to_at_dma(chan->device);
+ dma_addr_t phys;
+
+ desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys);
+ if (desc) {
+ memset(desc, 0, sizeof(struct at_desc));
+ INIT_LIST_HEAD(&desc->tx_list);
+ dma_async_tx_descriptor_init(&desc->txd, chan);
+ /* txd.flags will be overwritten in prep functions */
+ desc->txd.flags = DMA_CTRL_ACK;
+ desc->txd.tx_submit = atc_tx_submit;
+ desc->txd.phys = phys;
+ }
+
+ return desc;
+}
+
+/**
+ * atc_desc_get - get an unused descriptor from free_list
+ * @atchan: channel we want a new descriptor for
+ */
+static struct at_desc *atc_desc_get(struct at_dma_chan *atchan)
+{
+ struct at_desc *desc, *_desc;
+ struct at_desc *ret = NULL;
+ unsigned int i = 0;
+ LIST_HEAD(tmp_list);
+
+ spin_lock_bh(&atchan->lock);
+ list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) {
+ i++;
+ if (async_tx_test_ack(&desc->txd)) {
+ list_del(&desc->desc_node);
+ ret = desc;
+ break;
+ }
+ dev_dbg(chan2dev(&atchan->chan_common),
+ "desc %p not ACKed\n", desc);
+ }
+ spin_unlock_bh(&atchan->lock);
+ dev_vdbg(chan2dev(&atchan->chan_common),
+ "scanned %u descriptors on freelist\n", i);
+
+ /* no more descriptor available in initial pool: create one more */
+ if (!ret) {
+ ret = atc_alloc_descriptor(&atchan->chan_common, GFP_ATOMIC);
+ if (ret) {
+ spin_lock_bh(&atchan->lock);
+ atchan->descs_allocated++;
+ spin_unlock_bh(&atchan->lock);
+ } else {
+ dev_err(chan2dev(&atchan->chan_common),
+ "not enough descriptors available\n");
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * atc_desc_put - move a descriptor, including any children, to the free list
+ * @atchan: channel we work on
+ * @desc: descriptor, at the head of a chain, to move to free list
+ */
+static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc)
+{
+ if (desc) {
+ struct at_desc *child;
+
+ spin_lock_bh(&atchan->lock);
+ list_for_each_entry(child, &desc->tx_list, desc_node)
+ dev_vdbg(chan2dev(&atchan->chan_common),
+ "moving child desc %p to freelist\n",
+ child);
+ list_splice_init(&desc->tx_list, &atchan->free_list);
+ dev_vdbg(chan2dev(&atchan->chan_common),
+ "moving desc %p to freelist\n", desc);
+ list_add(&desc->desc_node, &atchan->free_list);
+ spin_unlock_bh(&atchan->lock);
+ }
+}
+
+/**
+ * atc_assign_cookie - compute and assign new cookie
+ * @atchan: channel we work on
+ * @desc: descriptor to asign cookie for
+ *
+ * Called with atchan->lock held and bh disabled
+ */
+static dma_cookie_t
+atc_assign_cookie(struct at_dma_chan *atchan, struct at_desc *desc)
+{
+ dma_cookie_t cookie = atchan->chan_common.cookie;
+
+ if (++cookie < 0)
+ cookie = 1;
+
+ atchan->chan_common.cookie = cookie;
+ desc->txd.cookie = cookie;
+
+ return cookie;
+}
+
+/**
+ * atc_dostart - starts the DMA engine for real
+ * @atchan: the channel we want to start
+ * @first: first descriptor in the list we want to begin with
+ *
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first)
+{
+ struct at_dma *atdma = to_at_dma(atchan->chan_common.device);
+
+ /* ASSERT: channel is idle */
+ if (atc_chan_is_enabled(atchan)) {
+ dev_err(chan2dev(&atchan->chan_common),
+ "BUG: Attempted to start non-idle channel\n");
+ dev_err(chan2dev(&atchan->chan_common),
+ " channel: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n",
+ channel_readl(atchan, SADDR),
+ channel_readl(atchan, DADDR),
+ channel_readl(atchan, CTRLA),
+ channel_readl(atchan, CTRLB),
+ channel_readl(atchan, DSCR));
+
+ /* The tasklet will hopefully advance the queue... */
+ return;
+ }
+
+ vdbg_dump_regs(atchan);
+
+ /* clear any pending interrupt */
+ while (dma_readl(atdma, EBCISR))
+ cpu_relax();
+
+ channel_writel(atchan, SADDR, 0);
+ channel_writel(atchan, DADDR, 0);
+ channel_writel(atchan, CTRLA, 0);
+ channel_writel(atchan, CTRLB, 0);
+ channel_writel(atchan, DSCR, first->txd.phys);
+ dma_writel(atdma, CHER, atchan->mask);
+
+ vdbg_dump_regs(atchan);
+}
+
+/**
+ * atc_chain_complete - finish work for one transaction chain
+ * @atchan: channel we work on
+ * @desc: descriptor at the head of the chain we want do complete
+ *
+ * Called with atchan->lock held and bh disabled */
+static void
+atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
+{
+ dma_async_tx_callback callback;
+ void *param;
+ struct dma_async_tx_descriptor *txd = &desc->txd;
+
+ dev_vdbg(chan2dev(&atchan->chan_common),
+ "descriptor %u complete\n", txd->cookie);
+
+ atchan->completed_cookie = txd->cookie;
+ callback = txd->callback;
+ param = txd->callback_param;
+
+ /* move children to free_list */
+ list_splice_init(&desc->tx_list, &atchan->free_list);
+ /* move myself to free_list */
+ list_move(&desc->desc_node, &atchan->free_list);
+
+ /* unmap dma addresses */
+ if (!atchan->chan_common.private) {
+ struct device *parent = chan2parent(&atchan->chan_common);
+ if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+ dma_unmap_single(parent,
+ desc->lli.daddr,
+ desc->len, DMA_FROM_DEVICE);
+ else
+ dma_unmap_page(parent,
+ desc->lli.daddr,
+ desc->len, DMA_FROM_DEVICE);
+ }
+ if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+ dma_unmap_single(parent,
+ desc->lli.saddr,
+ desc->len, DMA_TO_DEVICE);
+ else
+ dma_unmap_page(parent,
+ desc->lli.saddr,
+ desc->len, DMA_TO_DEVICE);
+ }
+ }
+
+ /*
+ * The API requires that no submissions are done from a
+ * callback, so we don't need to drop the lock here
+ */
+ if (callback)
+ callback(param);
+
+ dma_run_dependencies(txd);
+}
+
+/**
+ * atc_complete_all - finish work for all transactions
+ * @atchan: channel to complete transactions for
+ *
+ * Eventually submit queued descriptors if any
+ *
+ * Assume channel is idle while calling this function
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_complete_all(struct at_dma_chan *atchan)
+{
+ struct at_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ dev_vdbg(chan2dev(&atchan->chan_common), "complete all\n");
+
+ BUG_ON(atc_chan_is_enabled(atchan));
+
+ /*
+ * Submit queued descriptors ASAP, i.e. before we go through
+ * the completed ones.
+ */
+ if (!list_empty(&atchan->queue))
+ atc_dostart(atchan, atc_first_queued(atchan));
+ /* empty active_list now it is completed */
+ list_splice_init(&atchan->active_list, &list);
+ /* empty queue list by moving descriptors (if any) to active_list */
+ list_splice_init(&atchan->queue, &atchan->active_list);
+
+ list_for_each_entry_safe(desc, _desc, &list, desc_node)
+ atc_chain_complete(atchan, desc);
+}
+
+/**
+ * atc_cleanup_descriptors - cleanup up finished descriptors in active_list
+ * @atchan: channel to be cleaned up
+ *
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_cleanup_descriptors(struct at_dma_chan *atchan)
+{
+ struct at_desc *desc, *_desc;
+ struct at_desc *child;
+
+ dev_vdbg(chan2dev(&atchan->chan_common), "cleanup descriptors\n");
+
+ list_for_each_entry_safe(desc, _desc, &atchan->active_list, desc_node) {
+ if (!(desc->lli.ctrla & ATC_DONE))
+ /* This one is currently in progress */
+ return;
+
+ list_for_each_entry(child, &desc->tx_list, desc_node)
+ if (!(child->lli.ctrla & ATC_DONE))
+ /* Currently in progress */
+ return;
+
+ /*
+ * No descriptors so far seem to be in progress, i.e.
+ * this chain must be done.
+ */
+ atc_chain_complete(atchan, desc);
+ }
+}
+
+/**
+ * atc_advance_work - at the end of a transaction, move forward
+ * @atchan: channel where the transaction ended
+ *
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_advance_work(struct at_dma_chan *atchan)
+{
+ dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n");
+
+ if (list_empty(&atchan->active_list) ||
+ list_is_singular(&atchan->active_list)) {
+ atc_complete_all(atchan);
+ } else {
+ atc_chain_complete(atchan, atc_first_active(atchan));
+ /* advance work */
+ atc_dostart(atchan, atc_first_active(atchan));
+ }
+}
+
+
+/**
+ * atc_handle_error - handle errors reported by DMA controller
+ * @atchan: channel where error occurs
+ *
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_handle_error(struct at_dma_chan *atchan)
+{
+ struct at_desc *bad_desc;
+ struct at_desc *child;
+
+ /*
+ * The descriptor currently at the head of the active list is
+ * broked. Since we don't have any way to report errors, we'll
+ * just have to scream loudly and try to carry on.
+ */
+ bad_desc = atc_first_active(atchan);
+ list_del_init(&bad_desc->desc_node);
+
+ /* As we are stopped, take advantage to push queued descriptors
+ * in active_list */
+ list_splice_init(&atchan->queue, atchan->active_list.prev);
+
+ /* Try to restart the controller */
+ if (!list_empty(&atchan->active_list))
+ atc_dostart(atchan, atc_first_active(atchan));
+
+ /*
+ * KERN_CRITICAL may seem harsh, but since this only happens
+ * when someone submits a bad physical address in a
+ * descriptor, we should consider ourselves lucky that the
+ * controller flagged an error instead of scribbling over
+ * random memory locations.
+ */
+ dev_crit(chan2dev(&atchan->chan_common),
+ "Bad descriptor submitted for DMA!\n");
+ dev_crit(chan2dev(&atchan->chan_common),
+ " cookie: %d\n", bad_desc->txd.cookie);
+ atc_dump_lli(atchan, &bad_desc->lli);
+ list_for_each_entry(child, &bad_desc->tx_list, desc_node)
+ atc_dump_lli(atchan, &child->lli);
+
+ /* Pretend the descriptor completed successfully */
+ atc_chain_complete(atchan, bad_desc);
+}
+
+
+/*-- IRQ & Tasklet ---------------------------------------------------*/
+
+static void atc_tasklet(unsigned long data)
+{
+ struct at_dma_chan *atchan = (struct at_dma_chan *)data;
+
+ /* Channel cannot be enabled here */
+ if (atc_chan_is_enabled(atchan)) {
+ dev_err(chan2dev(&atchan->chan_common),
+ "BUG: channel enabled in tasklet\n");
+ return;
+ }
+
+ spin_lock(&atchan->lock);
+ if (test_and_clear_bit(0, &atchan->error_status))
+ atc_handle_error(atchan);
+ else
+ atc_advance_work(atchan);
+
+ spin_unlock(&atchan->lock);
+}
+
+static irqreturn_t at_dma_interrupt(int irq, void *dev_id)
+{
+ struct at_dma *atdma = (struct at_dma *)dev_id;
+ struct at_dma_chan *atchan;
+ int i;
+ u32 status, pending, imr;
+ int ret = IRQ_NONE;
+
+ do {
+ imr = dma_readl(atdma, EBCIMR);
+ status = dma_readl(atdma, EBCISR);
+ pending = status & imr;
+
+ if (!pending)
+ break;
+
+ dev_vdbg(atdma->dma_common.dev,
+ "interrupt: status = 0x%08x, 0x%08x, 0x%08x\n",
+ status, imr, pending);
+
+ for (i = 0; i < atdma->dma_common.chancnt; i++) {
+ atchan = &atdma->chan[i];
+ if (pending & (AT_DMA_CBTC(i) | AT_DMA_ERR(i))) {
+ if (pending & AT_DMA_ERR(i)) {
+ /* Disable channel on AHB error */
+ dma_writel(atdma, CHDR, atchan->mask);
+ /* Give information to tasklet */
+ set_bit(0, &atchan->error_status);
+ }
+ tasklet_schedule(&atchan->tasklet);
+ ret = IRQ_HANDLED;
+ }
+ }
+
+ } while (pending);
+
+ return ret;
+}
+
+
+/*-- DMA Engine API --------------------------------------------------*/
+
+/**
+ * atc_tx_submit - set the prepared descriptor(s) to be executed by the engine
+ * @desc: descriptor at the head of the transaction chain
+ *
+ * Queue chain if DMA engine is working already
+ *
+ * Cookie increment and adding to active_list or queue must be atomic
+ */
+static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct at_desc *desc = txd_to_at_desc(tx);
+ struct at_dma_chan *atchan = to_at_dma_chan(tx->chan);
+ dma_cookie_t cookie;
+
+ spin_lock_bh(&atchan->lock);
+ cookie = atc_assign_cookie(atchan, desc);
+
+ if (list_empty(&atchan->active_list)) {
+ dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n",
+ desc->txd.cookie);
+ atc_dostart(atchan, desc);
+ list_add_tail(&desc->desc_node, &atchan->active_list);
+ } else {
+ dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n",
+ desc->txd.cookie);
+ list_add_tail(&desc->desc_node, &atchan->queue);
+ }
+
+ spin_unlock_bh(&atchan->lock);
+
+ return cookie;
+}
+
+/**
+ * atc_prep_dma_memcpy - prepare a memcpy operation
+ * @chan: the channel to prepare operation on
+ * @dest: operation virtual destination address
+ * @src: operation virtual source address
+ * @len: operation length
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_desc *desc = NULL;
+ struct at_desc *first = NULL;
+ struct at_desc *prev = NULL;
+ size_t xfer_count;
+ size_t offset;
+ unsigned int src_width;
+ unsigned int dst_width;
+ u32 ctrla;
+ u32 ctrlb;
+
+ dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d0x%x s0x%x l0x%zx f0x%lx\n",
+ dest, src, len, flags);
+
+ if (unlikely(!len)) {
+ dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
+ return NULL;
+ }
+
+ ctrla = ATC_DEFAULT_CTRLA;
+ ctrlb = ATC_DEFAULT_CTRLB
+ | ATC_SRC_ADDR_MODE_INCR
+ | ATC_DST_ADDR_MODE_INCR
+ | ATC_FC_MEM2MEM;
+
+ /*
+ * We can be a lot more clever here, but this should take care
+ * of the most common optimization.
+ */
+ if (!((src | dest | len) & 3)) {
+ ctrla |= ATC_SRC_WIDTH_WORD | ATC_DST_WIDTH_WORD;
+ src_width = dst_width = 2;
+ } else if (!((src | dest | len) & 1)) {
+ ctrla |= ATC_SRC_WIDTH_HALFWORD | ATC_DST_WIDTH_HALFWORD;
+ src_width = dst_width = 1;
+ } else {
+ ctrla |= ATC_SRC_WIDTH_BYTE | ATC_DST_WIDTH_BYTE;
+ src_width = dst_width = 0;
+ }
+
+ for (offset = 0; offset < len; offset += xfer_count << src_width) {
+ xfer_count = min_t(size_t, (len - offset) >> src_width,
+ ATC_BTSIZE_MAX);
+
+ desc = atc_desc_get(atchan);
+ if (!desc)
+ goto err_desc_get;
+
+ desc->lli.saddr = src + offset;
+ desc->lli.daddr = dest + offset;
+ desc->lli.ctrla = ctrla | xfer_count;
+ desc->lli.ctrlb = ctrlb;
+
+ desc->txd.cookie = 0;
+ async_tx_ack(&desc->txd);
+
+ if (!first) {
+ first = desc;
+ } else {
+ /* inform the HW lli about chaining */
+ prev->lli.dscr = desc->txd.phys;
+ /* insert the link descriptor to the LD ring */
+ list_add_tail(&desc->desc_node,
+ &first->tx_list);
+ }
+ prev = desc;
+ }
+
+ /* First descriptor of the chain embedds additional information */
+ first->txd.cookie = -EBUSY;
+ first->len = len;
+
+ /* set end-of-link to the last link descriptor of list*/
+ set_desc_eol(desc);
+
+ desc->txd.flags = flags; /* client is in control of this ack */
+
+ return &first->txd;
+
+err_desc_get:
+ atc_desc_put(atchan, first);
+ return NULL;
+}
+
+
+/**
+ * atc_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
+ * @chan: DMA channel
+ * @sgl: scatterlist to transfer to/from
+ * @sg_len: number of entries in @scatterlist
+ * @direction: DMA direction
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_data_direction direction,
+ unsigned long flags)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_dma_slave *atslave = chan->private;
+ struct at_desc *first = NULL;
+ struct at_desc *prev = NULL;
+ u32 ctrla;
+ u32 ctrlb;
+ dma_addr_t reg;
+ unsigned int reg_width;
+ unsigned int mem_width;
+ unsigned int i;
+ struct scatterlist *sg;
+ size_t total_len = 0;
+
+ dev_vdbg(chan2dev(chan), "prep_slave_sg: %s f0x%lx\n",
+ direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE",
+ flags);
+
+ if (unlikely(!atslave || !sg_len)) {
+ dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
+ return NULL;
+ }
+
+ reg_width = atslave->reg_width;
+
+ ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla;
+ ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN;
+
+ switch (direction) {
+ case DMA_TO_DEVICE:
+ ctrla |= ATC_DST_WIDTH(reg_width);
+ ctrlb |= ATC_DST_ADDR_MODE_FIXED
+ | ATC_SRC_ADDR_MODE_INCR
+ | ATC_FC_MEM2PER;
+ reg = atslave->tx_reg;
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct at_desc *desc;
+ u32 len;
+ u32 mem;
+
+ desc = atc_desc_get(atchan);
+ if (!desc)
+ goto err_desc_get;
+
+ mem = sg_phys(sg);
+ len = sg_dma_len(sg);
+ mem_width = 2;
+ if (unlikely(mem & 3 || len & 3))
+ mem_width = 0;
+
+ desc->lli.saddr = mem;
+ desc->lli.daddr = reg;
+ desc->lli.ctrla = ctrla
+ | ATC_SRC_WIDTH(mem_width)
+ | len >> mem_width;
+ desc->lli.ctrlb = ctrlb;
+
+ if (!first) {
+ first = desc;
+ } else {
+ /* inform the HW lli about chaining */
+ prev->lli.dscr = desc->txd.phys;
+ /* insert the link descriptor to the LD ring */
+ list_add_tail(&desc->desc_node,
+ &first->tx_list);
+ }
+ prev = desc;
+ total_len += len;
+ }
+ break;
+ case DMA_FROM_DEVICE:
+ ctrla |= ATC_SRC_WIDTH(reg_width);
+ ctrlb |= ATC_DST_ADDR_MODE_INCR
+ | ATC_SRC_ADDR_MODE_FIXED
+ | ATC_FC_PER2MEM;
+
+ reg = atslave->rx_reg;
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct at_desc *desc;
+ u32 len;
+ u32 mem;
+
+ desc = atc_desc_get(atchan);
+ if (!desc)
+ goto err_desc_get;
+
+ mem = sg_phys(sg);
+ len = sg_dma_len(sg);
+ mem_width = 2;
+ if (unlikely(mem & 3 || len & 3))
+ mem_width = 0;
+
+ desc->lli.saddr = reg;
+ desc->lli.daddr = mem;
+ desc->lli.ctrla = ctrla
+ | ATC_DST_WIDTH(mem_width)
+ | len >> mem_width;
+ desc->lli.ctrlb = ctrlb;
+
+ if (!first) {
+ first = desc;
+ } else {
+ /* inform the HW lli about chaining */
+ prev->lli.dscr = desc->txd.phys;
+ /* insert the link descriptor to the LD ring */
+ list_add_tail(&desc->desc_node,
+ &first->tx_list);
+ }
+ prev = desc;
+ total_len += len;
+ }
+ break;
+ default:
+ return NULL;
+ }
+
+ /* set end-of-link to the last link descriptor of list*/
+ set_desc_eol(prev);
+
+ /* First descriptor of the chain embedds additional information */
+ first->txd.cookie = -EBUSY;
+ first->len = total_len;
+
+ /* last link descriptor of list is responsible of flags */
+ prev->txd.flags = flags; /* client is in control of this ack */
+
+ return &first->txd;
+
+err_desc_get:
+ dev_err(chan2dev(chan), "not enough descriptors available\n");
+ atc_desc_put(atchan, first);
+ return NULL;
+}
+
+static void atc_terminate_all(struct dma_chan *chan)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_dma *atdma = to_at_dma(chan->device);
+ struct at_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ /*
+ * This is only called when something went wrong elsewhere, so
+ * we don't really care about the data. Just disable the
+ * channel. We still have to poll the channel enable bit due
+ * to AHB/HSB limitations.
+ */
+ spin_lock_bh(&atchan->lock);
+
+ dma_writel(atdma, CHDR, atchan->mask);
+
+ /* confirm that this channel is disabled */
+ while (dma_readl(atdma, CHSR) & atchan->mask)
+ cpu_relax();
+
+ /* active_list entries will end up before queued entries */
+ list_splice_init(&atchan->queue, &list);
+ list_splice_init(&atchan->active_list, &list);
+
+ spin_unlock_bh(&atchan->lock);
+
+ /* Flush all pending and queued descriptors */
+ list_for_each_entry_safe(desc, _desc, &list, desc_node)
+ atc_chain_complete(atchan, desc);
+}
+
+/**
+ * atc_is_tx_complete - poll for transaction completion
+ * @chan: DMA channel
+ * @cookie: transaction identifier to check status of
+ * @done: if not %NULL, updated with last completed transaction
+ * @used: if not %NULL, updated with last used transaction
+ *
+ * If @done and @used are passed in, upon return they reflect the driver
+ * internal state and can be used with dma_async_is_complete() to check
+ * the status of multiple cookies without re-checking hardware state.
+ */
+static enum dma_status
+atc_is_tx_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+ enum dma_status ret;
+
+ dev_vdbg(chan2dev(chan), "is_tx_complete: %d (d%d, u%d)\n",
+ cookie, done ? *done : 0, used ? *used : 0);
+
+ spin_lock_bh(&atchan->lock);
+
+ last_complete = atchan->completed_cookie;
+ last_used = chan->cookie;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ if (ret != DMA_SUCCESS) {
+ atc_cleanup_descriptors(atchan);
+
+ last_complete = atchan->completed_cookie;
+ last_used = chan->cookie;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ }
+
+ spin_unlock_bh(&atchan->lock);
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return ret;
+}
+
+/**
+ * atc_issue_pending - try to finish work
+ * @chan: target DMA channel
+ */
+static void atc_issue_pending(struct dma_chan *chan)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+
+ dev_vdbg(chan2dev(chan), "issue_pending\n");
+
+ if (!atc_chan_is_enabled(atchan)) {
+ spin_lock_bh(&atchan->lock);
+ atc_advance_work(atchan);
+ spin_unlock_bh(&atchan->lock);
+ }
+}
+
+/**
+ * atc_alloc_chan_resources - allocate resources for DMA channel
+ * @chan: allocate descriptor resources for this channel
+ * @client: current client requesting the channel be ready for requests
+ *
+ * return - the number of allocated descriptors
+ */
+static int atc_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_dma *atdma = to_at_dma(chan->device);
+ struct at_desc *desc;
+ struct at_dma_slave *atslave;
+ int i;
+ u32 cfg;
+ LIST_HEAD(tmp_list);
+
+ dev_vdbg(chan2dev(chan), "alloc_chan_resources\n");
+
+ /* ASSERT: channel is idle */
+ if (atc_chan_is_enabled(atchan)) {
+ dev_dbg(chan2dev(chan), "DMA channel not idle ?\n");
+ return -EIO;
+ }
+
+ cfg = ATC_DEFAULT_CFG;
+
+ atslave = chan->private;
+ if (atslave) {
+ /*
+ * We need controller-specific data to set up slave
+ * transfers.
+ */
+ BUG_ON(!atslave->dma_dev || atslave->dma_dev != atdma->dma_common.dev);
+
+ /* if cfg configuration specified take it instad of default */
+ if (atslave->cfg)
+ cfg = atslave->cfg;
+ }
+
+ /* have we already been set up?
+ * reconfigure channel but no need to reallocate descriptors */
+ if (!list_empty(&atchan->free_list))
+ return atchan->descs_allocated;
+
+ /* Allocate initial pool of descriptors */
+ for (i = 0; i < init_nr_desc_per_channel; i++) {
+ desc = atc_alloc_descriptor(chan, GFP_KERNEL);
+ if (!desc) {
+ dev_err(atdma->dma_common.dev,
+ "Only %d initial descriptors\n", i);
+ break;
+ }
+ list_add_tail(&desc->desc_node, &tmp_list);
+ }
+
+ spin_lock_bh(&atchan->lock);
+ atchan->descs_allocated = i;
+ list_splice(&tmp_list, &atchan->free_list);
+ atchan->completed_cookie = chan->cookie = 1;
+ spin_unlock_bh(&atchan->lock);
+
+ /* channel parameters */
+ channel_writel(atchan, CFG, cfg);
+
+ dev_dbg(chan2dev(chan),
+ "alloc_chan_resources: allocated %d descriptors\n",
+ atchan->descs_allocated);
+
+ return atchan->descs_allocated;
+}
+
+/**
+ * atc_free_chan_resources - free all channel resources
+ * @chan: DMA channel
+ */
+static void atc_free_chan_resources(struct dma_chan *chan)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_dma *atdma = to_at_dma(chan->device);
+ struct at_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ dev_dbg(chan2dev(chan), "free_chan_resources: (descs allocated=%u)\n",
+ atchan->descs_allocated);
+
+ /* ASSERT: channel is idle */
+ BUG_ON(!list_empty(&atchan->active_list));
+ BUG_ON(!list_empty(&atchan->queue));
+ BUG_ON(atc_chan_is_enabled(atchan));
+
+ list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) {
+ dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc);
+ list_del(&desc->desc_node);
+ /* free link descriptor */
+ dma_pool_free(atdma->dma_desc_pool, desc, desc->txd.phys);
+ }
+ list_splice_init(&atchan->free_list, &list);
+ atchan->descs_allocated = 0;
+
+ dev_vdbg(chan2dev(chan), "free_chan_resources: done\n");
+}
+
+
+/*-- Module Management -----------------------------------------------*/
+
+/**
+ * at_dma_off - disable DMA controller
+ * @atdma: the Atmel HDAMC device
+ */
+static void at_dma_off(struct at_dma *atdma)
+{
+ dma_writel(atdma, EN, 0);
+
+ /* disable all interrupts */
+ dma_writel(atdma, EBCIDR, -1L);
+
+ /* confirm that all channels are disabled */
+ while (dma_readl(atdma, CHSR) & atdma->all_chan_mask)
+ cpu_relax();
+}
+
+static int __init at_dma_probe(struct platform_device *pdev)
+{
+ struct at_dma_platform_data *pdata;
+ struct resource *io;
+ struct at_dma *atdma;
+ size_t size;
+ int irq;
+ int err;
+ int i;
+
+ /* get DMA Controller parameters from platform */
+ pdata = pdev->dev.platform_data;
+ if (!pdata || pdata->nr_channels > AT_DMA_MAX_NR_CHANNELS)
+ return -EINVAL;
+
+ io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!io)
+ return -EINVAL;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ size = sizeof(struct at_dma);
+ size += pdata->nr_channels * sizeof(struct at_dma_chan);
+ atdma = kzalloc(size, GFP_KERNEL);
+ if (!atdma)
+ return -ENOMEM;
+
+ /* discover transaction capabilites from the platform data */
+ atdma->dma_common.cap_mask = pdata->cap_mask;
+ atdma->all_chan_mask = (1 << pdata->nr_channels) - 1;
+
+ size = io->end - io->start + 1;
+ if (!request_mem_region(io->start, size, pdev->dev.driver->name)) {
+ err = -EBUSY;
+ goto err_kfree;
+ }
+
+ atdma->regs = ioremap(io->start, size);
+ if (!atdma->regs) {
+ err = -ENOMEM;
+ goto err_release_r;
+ }
+
+ atdma->clk = clk_get(&pdev->dev, "dma_clk");
+ if (IS_ERR(atdma->clk)) {
+ err = PTR_ERR(atdma->clk);
+ goto err_clk;
+ }
+ clk_enable(atdma->clk);
+
+ /* force dma off, just in case */
+ at_dma_off(atdma);
+
+ err = request_irq(irq, at_dma_interrupt, 0, "at_hdmac", atdma);
+ if (err)
+ goto err_irq;
+
+ platform_set_drvdata(pdev, atdma);
+
+ /* create a pool of consistent memory blocks for hardware descriptors */
+ atdma->dma_desc_pool = dma_pool_create("at_hdmac_desc_pool",
+ &pdev->dev, sizeof(struct at_desc),
+ 4 /* word alignment */, 0);
+ if (!atdma->dma_desc_pool) {
+ dev_err(&pdev->dev, "No memory for descriptors dma pool\n");
+ err = -ENOMEM;
+ goto err_pool_create;
+ }
+
+ /* clear any pending interrupt */
+ while (dma_readl(atdma, EBCISR))
+ cpu_relax();
+
+ /* initialize channels related values */
+ INIT_LIST_HEAD(&atdma->dma_common.channels);
+ for (i = 0; i < pdata->nr_channels; i++, atdma->dma_common.chancnt++) {
+ struct at_dma_chan *atchan = &atdma->chan[i];
+
+ atchan->chan_common.device = &atdma->dma_common;
+ atchan->chan_common.cookie = atchan->completed_cookie = 1;
+ atchan->chan_common.chan_id = i;
+ list_add_tail(&atchan->chan_common.device_node,
+ &atdma->dma_common.channels);
+
+ atchan->ch_regs = atdma->regs + ch_regs(i);
+ spin_lock_init(&atchan->lock);
+ atchan->mask = 1 << i;
+
+ INIT_LIST_HEAD(&atchan->active_list);
+ INIT_LIST_HEAD(&atchan->queue);
+ INIT_LIST_HEAD(&atchan->free_list);
+
+ tasklet_init(&atchan->tasklet, atc_tasklet,
+ (unsigned long)atchan);
+ atc_enable_irq(atchan);
+ }
+
+ /* set base routines */
+ atdma->dma_common.device_alloc_chan_resources = atc_alloc_chan_resources;
+ atdma->dma_common.device_free_chan_resources = atc_free_chan_resources;
+ atdma->dma_common.device_is_tx_complete = atc_is_tx_complete;
+ atdma->dma_common.device_issue_pending = atc_issue_pending;
+ atdma->dma_common.dev = &pdev->dev;
+
+ /* set prep routines based on capability */
+ if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask))
+ atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy;
+
+ if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) {
+ atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg;
+ atdma->dma_common.device_terminate_all = atc_terminate_all;
+ }
+
+ dma_writel(atdma, EN, AT_DMA_ENABLE);
+
+ dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s), %d channels\n",
+ dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "",
+ dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ? "slave " : "",
+ atdma->dma_common.chancnt);
+
+ dma_async_device_register(&atdma->dma_common);
+
+ return 0;
+
+err_pool_create:
+ platform_set_drvdata(pdev, NULL);
+ free_irq(platform_get_irq(pdev, 0), atdma);
+err_irq:
+ clk_disable(atdma->clk);
+ clk_put(atdma->clk);
+err_clk:
+ iounmap(atdma->regs);
+ atdma->regs = NULL;
+err_release_r:
+ release_mem_region(io->start, size);
+err_kfree:
+ kfree(atdma);
+ return err;
+}
+
+static int __exit at_dma_remove(struct platform_device *pdev)
+{
+ struct at_dma *atdma = platform_get_drvdata(pdev);
+ struct dma_chan *chan, *_chan;
+ struct resource *io;
+
+ at_dma_off(atdma);
+ dma_async_device_unregister(&atdma->dma_common);
+
+ dma_pool_destroy(atdma->dma_desc_pool);
+ platform_set_drvdata(pdev, NULL);
+ free_irq(platform_get_irq(pdev, 0), atdma);
+
+ list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels,
+ device_node) {
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+
+ /* Disable interrupts */
+ atc_disable_irq(atchan);
+ tasklet_disable(&atchan->tasklet);
+
+ tasklet_kill(&atchan->tasklet);
+ list_del(&chan->device_node);
+ }
+
+ clk_disable(atdma->clk);
+ clk_put(atdma->clk);
+
+ iounmap(atdma->regs);
+ atdma->regs = NULL;
+
+ io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ release_mem_region(io->start, io->end - io->start + 1);
+
+ kfree(atdma);
+
+ return 0;
+}
+
+static void at_dma_shutdown(struct platform_device *pdev)
+{
+ struct at_dma *atdma = platform_get_drvdata(pdev);
+
+ at_dma_off(platform_get_drvdata(pdev));
+ clk_disable(atdma->clk);
+}
+
+static int at_dma_suspend_noirq(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct at_dma *atdma = platform_get_drvdata(pdev);
+
+ at_dma_off(platform_get_drvdata(pdev));
+ clk_disable(atdma->clk);
+ return 0;
+}
+
+static int at_dma_resume_noirq(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct at_dma *atdma = platform_get_drvdata(pdev);
+
+ clk_enable(atdma->clk);
+ dma_writel(atdma, EN, AT_DMA_ENABLE);
+ return 0;
+}
+
+static const struct dev_pm_ops at_dma_dev_pm_ops = {
+ .suspend_noirq = at_dma_suspend_noirq,
+ .resume_noirq = at_dma_resume_noirq,
+};
+
+static struct platform_driver at_dma_driver = {
+ .remove = __exit_p(at_dma_remove),
+ .shutdown = at_dma_shutdown,
+ .driver = {
+ .name = "at_hdmac",
+ .pm = &at_dma_dev_pm_ops,
+ },
+};
+
+static int __init at_dma_init(void)
+{
+ return platform_driver_probe(&at_dma_driver, at_dma_probe);
+}
+module_init(at_dma_init);
+
+static void __exit at_dma_exit(void)
+{
+ platform_driver_unregister(&at_dma_driver);
+}
+module_exit(at_dma_exit);
+
+MODULE_DESCRIPTION("Atmel AHB DMA Controller driver");
+MODULE_AUTHOR("Nicolas Ferre <nicolas.ferre@atmel.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:at_hdmac");
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
new file mode 100644
index 00000000000..495457e3dc4
--- /dev/null
+++ b/drivers/dma/at_hdmac_regs.h
@@ -0,0 +1,354 @@
+/*
+ * Header file for the Atmel AHB DMA Controller driver
+ *
+ * Copyright (C) 2008 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef AT_HDMAC_REGS_H
+#define AT_HDMAC_REGS_H
+
+#include <mach/at_hdmac.h>
+
+#define AT_DMA_MAX_NR_CHANNELS 8
+
+
+#define AT_DMA_GCFG 0x00 /* Global Configuration Register */
+#define AT_DMA_IF_BIGEND(i) (0x1 << (i)) /* AHB-Lite Interface i in Big-endian mode */
+#define AT_DMA_ARB_CFG (0x1 << 4) /* Arbiter mode. */
+#define AT_DMA_ARB_CFG_FIXED (0x0 << 4)
+#define AT_DMA_ARB_CFG_ROUND_ROBIN (0x1 << 4)
+
+#define AT_DMA_EN 0x04 /* Controller Enable Register */
+#define AT_DMA_ENABLE (0x1 << 0)
+
+#define AT_DMA_SREQ 0x08 /* Software Single Request Register */
+#define AT_DMA_SSREQ(x) (0x1 << ((x) << 1)) /* Request a source single transfer on channel x */
+#define AT_DMA_DSREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination single transfer on channel x */
+
+#define AT_DMA_CREQ 0x0C /* Software Chunk Transfer Request Register */
+#define AT_DMA_SCREQ(x) (0x1 << ((x) << 1)) /* Request a source chunk transfer on channel x */
+#define AT_DMA_DCREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination chunk transfer on channel x */
+
+#define AT_DMA_LAST 0x10 /* Software Last Transfer Flag Register */
+#define AT_DMA_SLAST(x) (0x1 << ((x) << 1)) /* This src rq is last tx of buffer on channel x */
+#define AT_DMA_DLAST(x) (0x1 << (1 + ((x) << 1))) /* This dst rq is last tx of buffer on channel x */
+
+#define AT_DMA_SYNC 0x14 /* Request Synchronization Register */
+#define AT_DMA_SYR(h) (0x1 << (h)) /* Synchronize handshake line h */
+
+/* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */
+#define AT_DMA_EBCIER 0x18 /* Enable register */
+#define AT_DMA_EBCIDR 0x1C /* Disable register */
+#define AT_DMA_EBCIMR 0x20 /* Mask Register */
+#define AT_DMA_EBCISR 0x24 /* Status Register */
+#define AT_DMA_CBTC_OFFSET 8
+#define AT_DMA_ERR_OFFSET 16
+#define AT_DMA_BTC(x) (0x1 << (x))
+#define AT_DMA_CBTC(x) (0x1 << (AT_DMA_CBTC_OFFSET + (x)))
+#define AT_DMA_ERR(x) (0x1 << (AT_DMA_ERR_OFFSET + (x)))
+
+#define AT_DMA_CHER 0x28 /* Channel Handler Enable Register */
+#define AT_DMA_ENA(x) (0x1 << (x))
+#define AT_DMA_SUSP(x) (0x1 << ( 8 + (x)))
+#define AT_DMA_KEEP(x) (0x1 << (24 + (x)))
+
+#define AT_DMA_CHDR 0x2C /* Channel Handler Disable Register */
+#define AT_DMA_DIS(x) (0x1 << (x))
+#define AT_DMA_RES(x) (0x1 << ( 8 + (x)))
+
+#define AT_DMA_CHSR 0x30 /* Channel Handler Status Register */
+#define AT_DMA_EMPT(x) (0x1 << (16 + (x)))
+#define AT_DMA_STAL(x) (0x1 << (24 + (x)))
+
+
+#define AT_DMA_CH_REGS_BASE 0x3C /* Channel registers base address */
+#define ch_regs(x) (AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */
+
+/* Hardware register offset for each channel */
+#define ATC_SADDR_OFFSET 0x00 /* Source Address Register */
+#define ATC_DADDR_OFFSET 0x04 /* Destination Address Register */
+#define ATC_DSCR_OFFSET 0x08 /* Descriptor Address Register */
+#define ATC_CTRLA_OFFSET 0x0C /* Control A Register */
+#define ATC_CTRLB_OFFSET 0x10 /* Control B Register */
+#define ATC_CFG_OFFSET 0x14 /* Configuration Register */
+#define ATC_SPIP_OFFSET 0x18 /* Src PIP Configuration Register */
+#define ATC_DPIP_OFFSET 0x1C /* Dst PIP Configuration Register */
+
+
+/* Bitfield definitions */
+
+/* Bitfields in DSCR */
+#define ATC_DSCR_IF(i) (0x3 & (i)) /* Dsc feched via AHB-Lite Interface i */
+
+/* Bitfields in CTRLA */
+#define ATC_BTSIZE_MAX 0xFFFFUL /* Maximum Buffer Transfer Size */
+#define ATC_BTSIZE(x) (ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */
+/* Chunck Tranfer size definitions are in at_hdmac.h */
+#define ATC_SRC_WIDTH_MASK (0x3 << 24) /* Source Single Transfer Size */
+#define ATC_SRC_WIDTH(x) ((x) << 24)
+#define ATC_SRC_WIDTH_BYTE (0x0 << 24)
+#define ATC_SRC_WIDTH_HALFWORD (0x1 << 24)
+#define ATC_SRC_WIDTH_WORD (0x2 << 24)
+#define ATC_DST_WIDTH_MASK (0x3 << 28) /* Destination Single Transfer Size */
+#define ATC_DST_WIDTH(x) ((x) << 28)
+#define ATC_DST_WIDTH_BYTE (0x0 << 28)
+#define ATC_DST_WIDTH_HALFWORD (0x1 << 28)
+#define ATC_DST_WIDTH_WORD (0x2 << 28)
+#define ATC_DONE (0x1 << 31) /* Tx Done (only written back in descriptor) */
+
+/* Bitfields in CTRLB */
+#define ATC_SIF(i) (0x3 & (i)) /* Src tx done via AHB-Lite Interface i */
+#define ATC_DIF(i) ((0x3 & (i)) << 4) /* Dst tx done via AHB-Lite Interface i */
+#define ATC_SRC_PIP (0x1 << 8) /* Source Picture-in-Picture enabled */
+#define ATC_DST_PIP (0x1 << 12) /* Destination Picture-in-Picture enabled */
+#define ATC_SRC_DSCR_DIS (0x1 << 16) /* Src Descriptor fetch disable */
+#define ATC_DST_DSCR_DIS (0x1 << 20) /* Dst Descriptor fetch disable */
+#define ATC_FC_MASK (0x7 << 21) /* Choose Flow Controller */
+#define ATC_FC_MEM2MEM (0x0 << 21) /* Mem-to-Mem (DMA) */
+#define ATC_FC_MEM2PER (0x1 << 21) /* Mem-to-Periph (DMA) */
+#define ATC_FC_PER2MEM (0x2 << 21) /* Periph-to-Mem (DMA) */
+#define ATC_FC_PER2PER (0x3 << 21) /* Periph-to-Periph (DMA) */
+#define ATC_FC_PER2MEM_PER (0x4 << 21) /* Periph-to-Mem (Peripheral) */
+#define ATC_FC_MEM2PER_PER (0x5 << 21) /* Mem-to-Periph (Peripheral) */
+#define ATC_FC_PER2PER_SRCPER (0x6 << 21) /* Periph-to-Periph (Src Peripheral) */
+#define ATC_FC_PER2PER_DSTPER (0x7 << 21) /* Periph-to-Periph (Dst Peripheral) */
+#define ATC_SRC_ADDR_MODE_MASK (0x3 << 24)
+#define ATC_SRC_ADDR_MODE_INCR (0x0 << 24) /* Incrementing Mode */
+#define ATC_SRC_ADDR_MODE_DECR (0x1 << 24) /* Decrementing Mode */
+#define ATC_SRC_ADDR_MODE_FIXED (0x2 << 24) /* Fixed Mode */
+#define ATC_DST_ADDR_MODE_MASK (0x3 << 28)
+#define ATC_DST_ADDR_MODE_INCR (0x0 << 28) /* Incrementing Mode */
+#define ATC_DST_ADDR_MODE_DECR (0x1 << 28) /* Decrementing Mode */
+#define ATC_DST_ADDR_MODE_FIXED (0x2 << 28) /* Fixed Mode */
+#define ATC_IEN (0x1 << 30) /* BTC interrupt enable (active low) */
+#define ATC_AUTO (0x1 << 31) /* Auto multiple buffer tx enable */
+
+/* Bitfields in CFG */
+/* are in at_hdmac.h */
+
+/* Bitfields in SPIP */
+#define ATC_SPIP_HOLE(x) (0xFFFFU & (x))
+#define ATC_SPIP_BOUNDARY(x) ((0x3FF & (x)) << 16)
+
+/* Bitfields in DPIP */
+#define ATC_DPIP_HOLE(x) (0xFFFFU & (x))
+#define ATC_DPIP_BOUNDARY(x) ((0x3FF & (x)) << 16)
+
+
+/*-- descriptors -----------------------------------------------------*/
+
+/* LLI == Linked List Item; aka DMA buffer descriptor */
+struct at_lli {
+ /* values that are not changed by hardware */
+ dma_addr_t saddr;
+ dma_addr_t daddr;
+ /* value that may get written back: */
+ u32 ctrla;
+ /* more values that are not changed by hardware */
+ u32 ctrlb;
+ dma_addr_t dscr; /* chain to next lli */
+};
+
+/**
+ * struct at_desc - software descriptor
+ * @at_lli: hardware lli structure
+ * @txd: support for the async_tx api
+ * @desc_node: node on the channed descriptors list
+ * @len: total transaction bytecount
+ */
+struct at_desc {
+ /* FIRST values the hardware uses */
+ struct at_lli lli;
+
+ /* THEN values for driver housekeeping */
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor txd;
+ struct list_head desc_node;
+ size_t len;
+};
+
+static inline struct at_desc *
+txd_to_at_desc(struct dma_async_tx_descriptor *txd)
+{
+ return container_of(txd, struct at_desc, txd);
+}
+
+
+/*-- Channels --------------------------------------------------------*/
+
+/**
+ * struct at_dma_chan - internal representation of an Atmel HDMAC channel
+ * @chan_common: common dmaengine channel object members
+ * @device: parent device
+ * @ch_regs: memory mapped register base
+ * @mask: channel index in a mask
+ * @error_status: transmit error status information from irq handler
+ * to tasklet (use atomic operations)
+ * @tasklet: bottom half to finish transaction work
+ * @lock: serializes enqueue/dequeue operations to descriptors lists
+ * @completed_cookie: identifier for the most recently completed operation
+ * @active_list: list of descriptors dmaengine is being running on
+ * @queue: list of descriptors ready to be submitted to engine
+ * @free_list: list of descriptors usable by the channel
+ * @descs_allocated: records the actual size of the descriptor pool
+ */
+struct at_dma_chan {
+ struct dma_chan chan_common;
+ struct at_dma *device;
+ void __iomem *ch_regs;
+ u8 mask;
+ unsigned long error_status;
+ struct tasklet_struct tasklet;
+
+ spinlock_t lock;
+
+ /* these other elements are all protected by lock */
+ dma_cookie_t completed_cookie;
+ struct list_head active_list;
+ struct list_head queue;
+ struct list_head free_list;
+ unsigned int descs_allocated;
+};
+
+#define channel_readl(atchan, name) \
+ __raw_readl((atchan)->ch_regs + ATC_##name##_OFFSET)
+
+#define channel_writel(atchan, name, val) \
+ __raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET)
+
+static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan)
+{
+ return container_of(dchan, struct at_dma_chan, chan_common);
+}
+
+
+/*-- Controller ------------------------------------------------------*/
+
+/**
+ * struct at_dma - internal representation of an Atmel HDMA Controller
+ * @chan_common: common dmaengine dma_device object members
+ * @ch_regs: memory mapped register base
+ * @clk: dma controller clock
+ * @all_chan_mask: all channels availlable in a mask
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @chan: channels table to store at_dma_chan structures
+ */
+struct at_dma {
+ struct dma_device dma_common;
+ void __iomem *regs;
+ struct clk *clk;
+
+ u8 all_chan_mask;
+
+ struct dma_pool *dma_desc_pool;
+ /* AT THE END channels table */
+ struct at_dma_chan chan[0];
+};
+
+#define dma_readl(atdma, name) \
+ __raw_readl((atdma)->regs + AT_DMA_##name)
+#define dma_writel(atdma, name, val) \
+ __raw_writel((val), (atdma)->regs + AT_DMA_##name)
+
+static inline struct at_dma *to_at_dma(struct dma_device *ddev)
+{
+ return container_of(ddev, struct at_dma, dma_common);
+}
+
+
+/*-- Helper functions ------------------------------------------------*/
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+ return &chan->dev->device;
+}
+static struct device *chan2parent(struct dma_chan *chan)
+{
+ return chan->dev->device.parent;
+}
+
+#if defined(VERBOSE_DEBUG)
+static void vdbg_dump_regs(struct at_dma_chan *atchan)
+{
+ struct at_dma *atdma = to_at_dma(atchan->chan_common.device);
+
+ dev_err(chan2dev(&atchan->chan_common),
+ " channel %d : imr = 0x%x, chsr = 0x%x\n",
+ atchan->chan_common.chan_id,
+ dma_readl(atdma, EBCIMR),
+ dma_readl(atdma, CHSR));
+
+ dev_err(chan2dev(&atchan->chan_common),
+ " channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n",
+ channel_readl(atchan, SADDR),
+ channel_readl(atchan, DADDR),
+ channel_readl(atchan, CTRLA),
+ channel_readl(atchan, CTRLB),
+ channel_readl(atchan, CFG),
+ channel_readl(atchan, DSCR));
+}
+#else
+static void vdbg_dump_regs(struct at_dma_chan *atchan) {}
+#endif
+
+static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli)
+{
+ dev_printk(KERN_CRIT, chan2dev(&atchan->chan_common),
+ " desc: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n",
+ lli->saddr, lli->daddr,
+ lli->ctrla, lli->ctrlb, lli->dscr);
+}
+
+
+static void atc_setup_irq(struct at_dma_chan *atchan, int on)
+{
+ struct at_dma *atdma = to_at_dma(atchan->chan_common.device);
+ u32 ebci;
+
+ /* enable interrupts on buffer chain completion & error */
+ ebci = AT_DMA_CBTC(atchan->chan_common.chan_id)
+ | AT_DMA_ERR(atchan->chan_common.chan_id);
+ if (on)
+ dma_writel(atdma, EBCIER, ebci);
+ else
+ dma_writel(atdma, EBCIDR, ebci);
+}
+
+static inline void atc_enable_irq(struct at_dma_chan *atchan)
+{
+ atc_setup_irq(atchan, 1);
+}
+
+static inline void atc_disable_irq(struct at_dma_chan *atchan)
+{
+ atc_setup_irq(atchan, 0);
+}
+
+
+/**
+ * atc_chan_is_enabled - test if given channel is enabled
+ * @atchan: channel we want to test status
+ */
+static inline int atc_chan_is_enabled(struct at_dma_chan *atchan)
+{
+ struct at_dma *atdma = to_at_dma(atchan->chan_common.device);
+
+ return !!(dma_readl(atdma, CHSR) & atchan->mask);
+}
+
+
+/**
+ * set_desc_eol - set end-of-link to descriptor so it will end transfer
+ * @desc: descriptor, signle or at the end of a chain, to end chain on
+ */
+static void set_desc_eol(struct at_desc *desc)
+{
+ desc->lli.ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS;
+ desc->lli.dscr = 0;
+}
+
+#endif /* AT_HDMAC_REGS_H */
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
new file mode 100644
index 00000000000..64a937262a4
--- /dev/null
+++ b/drivers/dma/coh901318.c
@@ -0,0 +1,1323 @@
+/*
+ * driver/dma/coh901318.c
+ *
+ * Copyright (C) 2007-2009 ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ * DMA driver for COH 901 318
+ * Author: Per Friden <per.friden@stericsson.com>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h> /* printk() */
+#include <linux/fs.h> /* everything... */
+#include <linux/slab.h> /* kmalloc() */
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/irqreturn.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <mach/coh901318.h>
+
+#include "coh901318_lli.h"
+
+#define COHC_2_DEV(cohc) (&cohc->chan.dev->device)
+
+#ifdef VERBOSE_DEBUG
+#define COH_DBG(x) ({ if (1) x; 0; })
+#else
+#define COH_DBG(x) ({ if (0) x; 0; })
+#endif
+
+struct coh901318_desc {
+ struct dma_async_tx_descriptor desc;
+ struct list_head node;
+ struct scatterlist *sg;
+ unsigned int sg_len;
+ struct coh901318_lli *data;
+ enum dma_data_direction dir;
+ int pending_irqs;
+ unsigned long flags;
+};
+
+struct coh901318_base {
+ struct device *dev;
+ void __iomem *virtbase;
+ struct coh901318_pool pool;
+ struct powersave pm;
+ struct dma_device dma_slave;
+ struct dma_device dma_memcpy;
+ struct coh901318_chan *chans;
+ struct coh901318_platform *platform;
+};
+
+struct coh901318_chan {
+ spinlock_t lock;
+ int allocated;
+ int completed;
+ int id;
+ int stopped;
+
+ struct work_struct free_work;
+ struct dma_chan chan;
+
+ struct tasklet_struct tasklet;
+
+ struct list_head active;
+ struct list_head queue;
+ struct list_head free;
+
+ unsigned long nbr_active_done;
+ unsigned long busy;
+ int pending_irqs;
+
+ struct coh901318_base *base;
+};
+
+static void coh901318_list_print(struct coh901318_chan *cohc,
+ struct coh901318_lli *lli)
+{
+ struct coh901318_lli *l;
+ dma_addr_t addr = virt_to_phys(lli);
+ int i = 0;
+
+ while (addr) {
+ l = phys_to_virt(addr);
+ dev_vdbg(COHC_2_DEV(cohc), "i %d, lli %p, ctrl 0x%x, src 0x%x"
+ ", dst 0x%x, link 0x%x link_virt 0x%p\n",
+ i, l, l->control, l->src_addr, l->dst_addr,
+ l->link_addr, phys_to_virt(l->link_addr));
+ i++;
+ addr = l->link_addr;
+ }
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#define COH901318_DEBUGFS_ASSIGN(x, y) (x = y)
+
+static struct coh901318_base *debugfs_dma_base;
+static struct dentry *dma_dentry;
+
+static int coh901318_debugfs_open(struct inode *inode, struct file *file)
+{
+
+ file->private_data = inode->i_private;
+ return 0;
+}
+
+static int coh901318_debugfs_read(struct file *file, char __user *buf,
+ size_t count, loff_t *f_pos)
+{
+ u64 started_channels = debugfs_dma_base->pm.started_channels;
+ int pool_count = debugfs_dma_base->pool.debugfs_pool_counter;
+ int i;
+ int ret = 0;
+ char *dev_buf;
+ char *tmp;
+ int dev_size;
+
+ dev_buf = kmalloc(4*1024, GFP_KERNEL);
+ if (dev_buf == NULL)
+ goto err_kmalloc;
+ tmp = dev_buf;
+
+ tmp += sprintf(tmp, "DMA -- enable dma channels\n");
+
+ for (i = 0; i < debugfs_dma_base->platform->max_channels; i++)
+ if (started_channels & (1 << i))
+ tmp += sprintf(tmp, "channel %d\n", i);
+
+ tmp += sprintf(tmp, "Pool alloc nbr %d\n", pool_count);
+ dev_size = tmp - dev_buf;
+
+ /* No more to read if offset != 0 */
+ if (*f_pos > dev_size)
+ goto out;
+
+ if (count > dev_size - *f_pos)
+ count = dev_size - *f_pos;
+
+ if (copy_to_user(buf, dev_buf + *f_pos, count))
+ ret = -EINVAL;
+ ret = count;
+ *f_pos += count;
+
+ out:
+ kfree(dev_buf);
+ return ret;
+
+ err_kmalloc:
+ return 0;
+}
+
+static const struct file_operations coh901318_debugfs_status_operations = {
+ .owner = THIS_MODULE,
+ .open = coh901318_debugfs_open,
+ .read = coh901318_debugfs_read,
+};
+
+
+static int __init init_coh901318_debugfs(void)
+{
+
+ dma_dentry = debugfs_create_dir("dma", NULL);
+
+ (void) debugfs_create_file("status",
+ S_IFREG | S_IRUGO,
+ dma_dentry, NULL,
+ &coh901318_debugfs_status_operations);
+ return 0;
+}
+
+static void __exit exit_coh901318_debugfs(void)
+{
+ debugfs_remove_recursive(dma_dentry);
+}
+
+module_init(init_coh901318_debugfs);
+module_exit(exit_coh901318_debugfs);
+#else
+
+#define COH901318_DEBUGFS_ASSIGN(x, y)
+
+#endif /* CONFIG_DEBUG_FS */
+
+static inline struct coh901318_chan *to_coh901318_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct coh901318_chan, chan);
+}
+
+static inline dma_addr_t
+cohc_dev_addr(struct coh901318_chan *cohc)
+{
+ return cohc->base->platform->chan_conf[cohc->id].dev_addr;
+}
+
+static inline const struct coh901318_params *
+cohc_chan_param(struct coh901318_chan *cohc)
+{
+ return &cohc->base->platform->chan_conf[cohc->id].param;
+}
+
+static inline const struct coh_dma_channel *
+cohc_chan_conf(struct coh901318_chan *cohc)
+{
+ return &cohc->base->platform->chan_conf[cohc->id];
+}
+
+static void enable_powersave(struct coh901318_chan *cohc)
+{
+ unsigned long flags;
+ struct powersave *pm = &cohc->base->pm;
+
+ spin_lock_irqsave(&pm->lock, flags);
+
+ pm->started_channels &= ~(1ULL << cohc->id);
+
+ if (!pm->started_channels) {
+ /* DMA no longer intends to access memory */
+ cohc->base->platform->access_memory_state(cohc->base->dev,
+ false);
+ }
+
+ spin_unlock_irqrestore(&pm->lock, flags);
+}
+static void disable_powersave(struct coh901318_chan *cohc)
+{
+ unsigned long flags;
+ struct powersave *pm = &cohc->base->pm;
+
+ spin_lock_irqsave(&pm->lock, flags);
+
+ if (!pm->started_channels) {
+ /* DMA intends to access memory */
+ cohc->base->platform->access_memory_state(cohc->base->dev,
+ true);
+ }
+
+ pm->started_channels |= (1ULL << cohc->id);
+
+ spin_unlock_irqrestore(&pm->lock, flags);
+}
+
+static inline int coh901318_set_ctrl(struct coh901318_chan *cohc, u32 control)
+{
+ int channel = cohc->id;
+ void __iomem *virtbase = cohc->base->virtbase;
+
+ writel(control,
+ virtbase + COH901318_CX_CTRL +
+ COH901318_CX_CTRL_SPACING * channel);
+ return 0;
+}
+
+static inline int coh901318_set_conf(struct coh901318_chan *cohc, u32 conf)
+{
+ int channel = cohc->id;
+ void __iomem *virtbase = cohc->base->virtbase;
+
+ writel(conf,
+ virtbase + COH901318_CX_CFG +
+ COH901318_CX_CFG_SPACING*channel);
+ return 0;
+}
+
+
+static int coh901318_start(struct coh901318_chan *cohc)
+{
+ u32 val;
+ int channel = cohc->id;
+ void __iomem *virtbase = cohc->base->virtbase;
+
+ disable_powersave(cohc);
+
+ val = readl(virtbase + COH901318_CX_CFG +
+ COH901318_CX_CFG_SPACING * channel);
+
+ /* Enable channel */
+ val |= COH901318_CX_CFG_CH_ENABLE;
+ writel(val, virtbase + COH901318_CX_CFG +
+ COH901318_CX_CFG_SPACING * channel);
+
+ return 0;
+}
+
+static int coh901318_prep_linked_list(struct coh901318_chan *cohc,
+ struct coh901318_lli *data)
+{
+ int channel = cohc->id;
+ void __iomem *virtbase = cohc->base->virtbase;
+
+ BUG_ON(readl(virtbase + COH901318_CX_STAT +
+ COH901318_CX_STAT_SPACING*channel) &
+ COH901318_CX_STAT_ACTIVE);
+
+ writel(data->src_addr,
+ virtbase + COH901318_CX_SRC_ADDR +
+ COH901318_CX_SRC_ADDR_SPACING * channel);
+
+ writel(data->dst_addr, virtbase +
+ COH901318_CX_DST_ADDR +
+ COH901318_CX_DST_ADDR_SPACING * channel);
+
+ writel(data->link_addr, virtbase + COH901318_CX_LNK_ADDR +
+ COH901318_CX_LNK_ADDR_SPACING * channel);
+
+ writel(data->control, virtbase + COH901318_CX_CTRL +
+ COH901318_CX_CTRL_SPACING * channel);
+
+ return 0;
+}
+static dma_cookie_t
+coh901318_assign_cookie(struct coh901318_chan *cohc,
+ struct coh901318_desc *cohd)
+{
+ dma_cookie_t cookie = cohc->chan.cookie;
+
+ if (++cookie < 0)
+ cookie = 1;
+
+ cohc->chan.cookie = cookie;
+ cohd->desc.cookie = cookie;
+
+ return cookie;
+}
+
+static struct coh901318_desc *
+coh901318_desc_get(struct coh901318_chan *cohc)
+{
+ struct coh901318_desc *desc;
+
+ if (list_empty(&cohc->free)) {
+ /* alloc new desc because we're out of used ones
+ * TODO: alloc a pile of descs instead of just one,
+ * avoid many small allocations.
+ */
+ desc = kmalloc(sizeof(struct coh901318_desc), GFP_NOWAIT);
+ if (desc == NULL)
+ goto out;
+ INIT_LIST_HEAD(&desc->node);
+ } else {
+ /* Reuse an old desc. */
+ desc = list_first_entry(&cohc->free,
+ struct coh901318_desc,
+ node);
+ list_del(&desc->node);
+ }
+
+ out:
+ return desc;
+}
+
+static void
+coh901318_desc_free(struct coh901318_chan *cohc, struct coh901318_desc *cohd)
+{
+ list_add_tail(&cohd->node, &cohc->free);
+}
+
+/* call with irq lock held */
+static void
+coh901318_desc_submit(struct coh901318_chan *cohc, struct coh901318_desc *desc)
+{
+ list_add_tail(&desc->node, &cohc->active);
+
+ BUG_ON(cohc->pending_irqs != 0);
+
+ cohc->pending_irqs = desc->pending_irqs;
+}
+
+static struct coh901318_desc *
+coh901318_first_active_get(struct coh901318_chan *cohc)
+{
+ struct coh901318_desc *d;
+
+ if (list_empty(&cohc->active))
+ return NULL;
+
+ d = list_first_entry(&cohc->active,
+ struct coh901318_desc,
+ node);
+ return d;
+}
+
+static void
+coh901318_desc_remove(struct coh901318_desc *cohd)
+{
+ list_del(&cohd->node);
+}
+
+static void
+coh901318_desc_queue(struct coh901318_chan *cohc, struct coh901318_desc *desc)
+{
+ list_add_tail(&desc->node, &cohc->queue);
+}
+
+static struct coh901318_desc *
+coh901318_first_queued(struct coh901318_chan *cohc)
+{
+ struct coh901318_desc *d;
+
+ if (list_empty(&cohc->queue))
+ return NULL;
+
+ d = list_first_entry(&cohc->queue,
+ struct coh901318_desc,
+ node);
+ return d;
+}
+
+/*
+ * DMA start/stop controls
+ */
+u32 coh901318_get_bytes_left(struct dma_chan *chan)
+{
+ unsigned long flags;
+ u32 ret;
+ struct coh901318_chan *cohc = to_coh901318_chan(chan);
+
+ spin_lock_irqsave(&cohc->lock, flags);
+
+ /* Read transfer count value */
+ ret = readl(cohc->base->virtbase +
+ COH901318_CX_CTRL+COH901318_CX_CTRL_SPACING *
+ cohc->id) & COH901318_CX_CTRL_TC_VALUE_MASK;
+
+ spin_unlock_irqrestore(&cohc->lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(coh901318_get_bytes_left);
+
+
+/* Stops a transfer without losing data. Enables power save.
+ Use this function in conjunction with coh901318_continue(..)
+*/
+void coh901318_stop(struct dma_chan *chan)
+{
+ u32 val;
+ unsigned long flags;
+ struct coh901318_chan *cohc = to_coh901318_chan(chan);
+ int channel = cohc->id;
+ void __iomem *virtbase = cohc->base->virtbase;
+
+ spin_lock_irqsave(&cohc->lock, flags);
+
+ /* Disable channel in HW */
+ val = readl(virtbase + COH901318_CX_CFG +
+ COH901318_CX_CFG_SPACING * channel);
+
+ /* Stopping infinit transfer */
+ if ((val & COH901318_CX_CTRL_TC_ENABLE) == 0 &&
+ (val & COH901318_CX_CFG_CH_ENABLE))
+ cohc->stopped = 1;
+
+
+ val &= ~COH901318_CX_CFG_CH_ENABLE;
+ /* Enable twice, HW bug work around */
+ writel(val, virtbase + COH901318_CX_CFG +
+ COH901318_CX_CFG_SPACING * channel);
+ writel(val, virtbase + COH901318_CX_CFG +
+ COH901318_CX_CFG_SPACING * channel);
+
+ /* Spin-wait for it to actually go inactive */
+ while (readl(virtbase + COH901318_CX_STAT+COH901318_CX_STAT_SPACING *
+ channel) & COH901318_CX_STAT_ACTIVE)
+ cpu_relax();
+
+ /* Check if we stopped an active job */
+ if ((readl(virtbase + COH901318_CX_CTRL+COH901318_CX_CTRL_SPACING *
+ channel) & COH901318_CX_CTRL_TC_VALUE_MASK) > 0)
+ cohc->stopped = 1;
+
+ enable_powersave(cohc);
+
+ spin_unlock_irqrestore(&cohc->lock, flags);
+}
+EXPORT_SYMBOL(coh901318_stop);
+
+/* Continues a transfer that has been stopped via 300_dma_stop(..).
+ Power save is handled.
+*/
+void coh901318_continue(struct dma_chan *chan)
+{
+ u32 val;
+ unsigned long flags;
+ struct coh901318_chan *cohc = to_coh901318_chan(chan);
+ int channel = cohc->id;
+
+ spin_lock_irqsave(&cohc->lock, flags);
+
+ disable_powersave(cohc);
+
+ if (cohc->stopped) {
+ /* Enable channel in HW */
+ val = readl(cohc->base->virtbase + COH901318_CX_CFG +
+ COH901318_CX_CFG_SPACING * channel);
+
+ val |= COH901318_CX_CFG_CH_ENABLE;
+
+ writel(val, cohc->base->virtbase + COH901318_CX_CFG +
+ COH901318_CX_CFG_SPACING*channel);
+
+ cohc->stopped = 0;
+ }
+
+ spin_unlock_irqrestore(&cohc->lock, flags);
+}
+EXPORT_SYMBOL(coh901318_continue);
+
+bool coh901318_filter_id(struct dma_chan *chan, void *chan_id)
+{
+ unsigned int ch_nr = (unsigned int) chan_id;
+
+ if (ch_nr == to_coh901318_chan(chan)->id)
+ return true;
+
+ return false;
+}
+EXPORT_SYMBOL(coh901318_filter_id);
+
+/*
+ * DMA channel allocation
+ */
+static int coh901318_config(struct coh901318_chan *cohc,
+ struct coh901318_params *param)
+{
+ unsigned long flags;
+ const struct coh901318_params *p;
+ int channel = cohc->id;
+ void __iomem *virtbase = cohc->base->virtbase;
+
+ spin_lock_irqsave(&cohc->lock, flags);
+
+ if (param)
+ p = param;
+ else
+ p = &cohc->base->platform->chan_conf[channel].param;
+
+ /* Clear any pending BE or TC interrupt */
+ if (channel < 32) {
+ writel(1 << channel, virtbase + COH901318_BE_INT_CLEAR1);
+ writel(1 << channel, virtbase + COH901318_TC_INT_CLEAR1);
+ } else {
+ writel(1 << (channel - 32), virtbase +
+ COH901318_BE_INT_CLEAR2);
+ writel(1 << (channel - 32), virtbase +
+ COH901318_TC_INT_CLEAR2);
+ }
+
+ coh901318_set_conf(cohc, p->config);
+ coh901318_set_ctrl(cohc, p->ctrl_lli_last);
+
+ spin_unlock_irqrestore(&cohc->lock, flags);
+
+ return 0;
+}
+
+/* must lock when calling this function
+ * start queued jobs, if any
+ * TODO: start all queued jobs in one go
+ *
+ * Returns descriptor if queued job is started otherwise NULL.
+ * If the queue is empty NULL is returned.
+ */
+static struct coh901318_desc *coh901318_queue_start(struct coh901318_chan *cohc)
+{
+ struct coh901318_desc *cohd_que;
+
+ /* start queued jobs, if any
+ * TODO: transmit all queued jobs in one go
+ */
+ cohd_que = coh901318_first_queued(cohc);
+
+ if (cohd_que != NULL) {
+ /* Remove from queue */
+ coh901318_desc_remove(cohd_que);
+ /* initiate DMA job */
+ cohc->busy = 1;
+
+ coh901318_desc_submit(cohc, cohd_que);
+
+ coh901318_prep_linked_list(cohc, cohd_que->data);
+
+ /* start dma job */
+ coh901318_start(cohc);
+
+ }
+
+ return cohd_que;
+}
+
+static void dma_tasklet(unsigned long data)
+{
+ struct coh901318_chan *cohc = (struct coh901318_chan *) data;
+ struct coh901318_desc *cohd_fin;
+ unsigned long flags;
+ dma_async_tx_callback callback;
+ void *callback_param;
+
+ spin_lock_irqsave(&cohc->lock, flags);
+
+ /* get first active entry from list */
+ cohd_fin = coh901318_first_active_get(cohc);
+
+ BUG_ON(cohd_fin->pending_irqs == 0);
+
+ if (cohd_fin == NULL)
+ goto err;
+
+ cohd_fin->pending_irqs--;
+ cohc->completed = cohd_fin->desc.cookie;
+
+ if (cohc->nbr_active_done == 0)
+ return;
+
+ if (!cohd_fin->pending_irqs) {
+ /* release the lli allocation*/
+ coh901318_lli_free(&cohc->base->pool, &cohd_fin->data);
+ }
+
+ dev_vdbg(COHC_2_DEV(cohc), "[%s] chan_id %d pending_irqs %d"
+ " nbr_active_done %ld\n", __func__,
+ cohc->id, cohc->pending_irqs, cohc->nbr_active_done);
+
+ /* callback to client */
+ callback = cohd_fin->desc.callback;
+ callback_param = cohd_fin->desc.callback_param;
+
+ if (!cohd_fin->pending_irqs) {
+ coh901318_desc_remove(cohd_fin);
+
+ /* return desc to free-list */
+ coh901318_desc_free(cohc, cohd_fin);
+ }
+
+ if (cohc->nbr_active_done)
+ cohc->nbr_active_done--;
+
+ if (cohc->nbr_active_done) {
+ if (cohc_chan_conf(cohc)->priority_high)
+ tasklet_hi_schedule(&cohc->tasklet);
+ else
+ tasklet_schedule(&cohc->tasklet);
+ }
+ spin_unlock_irqrestore(&cohc->lock, flags);
+
+ if (callback)
+ callback(callback_param);
+
+ return;
+
+ err:
+ spin_unlock_irqrestore(&cohc->lock, flags);
+ dev_err(COHC_2_DEV(cohc), "[%s] No active dma desc\n", __func__);
+}
+
+
+/* called from interrupt context */
+static void dma_tc_handle(struct coh901318_chan *cohc)
+{
+ BUG_ON(!cohc->allocated && (list_empty(&cohc->active) ||
+ list_empty(&cohc->queue)));
+
+ if (!cohc->allocated)
+ return;
+
+ BUG_ON(cohc->pending_irqs == 0);
+
+ cohc->pending_irqs--;
+ cohc->nbr_active_done++;
+
+ if (cohc->pending_irqs == 0 && coh901318_queue_start(cohc) == NULL)
+ cohc->busy = 0;
+
+ BUG_ON(list_empty(&cohc->active));
+
+ if (cohc_chan_conf(cohc)->priority_high)
+ tasklet_hi_schedule(&cohc->tasklet);
+ else
+ tasklet_schedule(&cohc->tasklet);
+}
+
+
+static irqreturn_t dma_irq_handler(int irq, void *dev_id)
+{
+ u32 status1;
+ u32 status2;
+ int i;
+ int ch;
+ struct coh901318_base *base = dev_id;
+ struct coh901318_chan *cohc;
+ void __iomem *virtbase = base->virtbase;
+
+ status1 = readl(virtbase + COH901318_INT_STATUS1);
+ status2 = readl(virtbase + COH901318_INT_STATUS2);
+
+ if (unlikely(status1 == 0 && status2 == 0)) {
+ dev_warn(base->dev, "spurious DMA IRQ from no channel!\n");
+ return IRQ_HANDLED;
+ }
+
+ /* TODO: consider handle IRQ in tasklet here to
+ * minimize interrupt latency */
+
+ /* Check the first 32 DMA channels for IRQ */
+ while (status1) {
+ /* Find first bit set, return as a number. */
+ i = ffs(status1) - 1;
+ ch = i;
+
+ cohc = &base->chans[ch];
+ spin_lock(&cohc->lock);
+
+ /* Mask off this bit */
+ status1 &= ~(1 << i);
+ /* Check the individual channel bits */
+ if (test_bit(i, virtbase + COH901318_BE_INT_STATUS1)) {
+ dev_crit(COHC_2_DEV(cohc),
+ "DMA bus error on channel %d!\n", ch);
+ BUG_ON(1);
+ /* Clear BE interrupt */
+ __set_bit(i, virtbase + COH901318_BE_INT_CLEAR1);
+ } else {
+ /* Caused by TC, really? */
+ if (unlikely(!test_bit(i, virtbase +
+ COH901318_TC_INT_STATUS1))) {
+ dev_warn(COHC_2_DEV(cohc),
+ "ignoring interrupt not caused by terminal count on channel %d\n", ch);
+ /* Clear TC interrupt */
+ BUG_ON(1);
+ __set_bit(i, virtbase + COH901318_TC_INT_CLEAR1);
+ } else {
+ /* Enable powersave if transfer has finished */
+ if (!(readl(virtbase + COH901318_CX_STAT +
+ COH901318_CX_STAT_SPACING*ch) &
+ COH901318_CX_STAT_ENABLED)) {
+ enable_powersave(cohc);
+ }
+
+ /* Must clear TC interrupt before calling
+ * dma_tc_handle
+ * in case tc_handle initate a new dma job
+ */
+ __set_bit(i, virtbase + COH901318_TC_INT_CLEAR1);
+
+ dma_tc_handle(cohc);
+ }
+ }
+ spin_unlock(&cohc->lock);
+ }
+
+ /* Check the remaining 32 DMA channels for IRQ */
+ while (status2) {
+ /* Find first bit set, return as a number. */
+ i = ffs(status2) - 1;
+ ch = i + 32;
+ cohc = &base->chans[ch];
+ spin_lock(&cohc->lock);
+
+ /* Mask off this bit */
+ status2 &= ~(1 << i);
+ /* Check the individual channel bits */
+ if (test_bit(i, virtbase + COH901318_BE_INT_STATUS2)) {
+ dev_crit(COHC_2_DEV(cohc),
+ "DMA bus error on channel %d!\n", ch);
+ /* Clear BE interrupt */
+ BUG_ON(1);
+ __set_bit(i, virtbase + COH901318_BE_INT_CLEAR2);
+ } else {
+ /* Caused by TC, really? */
+ if (unlikely(!test_bit(i, virtbase +
+ COH901318_TC_INT_STATUS2))) {
+ dev_warn(COHC_2_DEV(cohc),
+ "ignoring interrupt not caused by terminal count on channel %d\n", ch);
+ /* Clear TC interrupt */
+ __set_bit(i, virtbase + COH901318_TC_INT_CLEAR2);
+ BUG_ON(1);
+ } else {
+ /* Enable powersave if transfer has finished */
+ if (!(readl(virtbase + COH901318_CX_STAT +
+ COH901318_CX_STAT_SPACING*ch) &
+ COH901318_CX_STAT_ENABLED)) {
+ enable_powersave(cohc);
+ }
+ /* Must clear TC interrupt before calling
+ * dma_tc_handle
+ * in case tc_handle initate a new dma job
+ */
+ __set_bit(i, virtbase + COH901318_TC_INT_CLEAR2);
+
+ dma_tc_handle(cohc);
+ }
+ }
+ spin_unlock(&cohc->lock);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int coh901318_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct coh901318_chan *cohc = to_coh901318_chan(chan);
+
+ dev_vdbg(COHC_2_DEV(cohc), "[%s] DMA channel %d\n",
+ __func__, cohc->id);
+
+ if (chan->client_count > 1)
+ return -EBUSY;
+
+ coh901318_config(cohc, NULL);
+
+ cohc->allocated = 1;
+ cohc->completed = chan->cookie = 1;
+
+ return 1;
+}
+
+static void
+coh901318_free_chan_resources(struct dma_chan *chan)
+{
+ struct coh901318_chan *cohc = to_coh901318_chan(chan);
+ int channel = cohc->id;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cohc->lock, flags);
+
+ /* Disable HW */
+ writel(0x00000000U, cohc->base->virtbase + COH901318_CX_CFG +
+ COH901318_CX_CFG_SPACING*channel);
+ writel(0x00000000U, cohc->base->virtbase + COH901318_CX_CTRL +
+ COH901318_CX_CTRL_SPACING*channel);
+
+ cohc->allocated = 0;
+
+ spin_unlock_irqrestore(&cohc->lock, flags);
+
+ chan->device->device_terminate_all(chan);
+}
+
+
+static dma_cookie_t
+coh901318_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct coh901318_desc *cohd = container_of(tx, struct coh901318_desc,
+ desc);
+ struct coh901318_chan *cohc = to_coh901318_chan(tx->chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&cohc->lock, flags);
+
+ tx->cookie = coh901318_assign_cookie(cohc, cohd);
+
+ coh901318_desc_queue(cohc, cohd);
+
+ spin_unlock_irqrestore(&cohc->lock, flags);
+
+ return tx->cookie;
+}
+
+static struct dma_async_tx_descriptor *
+coh901318_prep_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t size, unsigned long flags)
+{
+ struct coh901318_lli *data;
+ struct coh901318_desc *cohd;
+ unsigned long flg;
+ struct coh901318_chan *cohc = to_coh901318_chan(chan);
+ int lli_len;
+ u32 ctrl_last = cohc_chan_param(cohc)->ctrl_lli_last;
+
+ spin_lock_irqsave(&cohc->lock, flg);
+
+ dev_vdbg(COHC_2_DEV(cohc),
+ "[%s] channel %d src 0x%x dest 0x%x size %d\n",
+ __func__, cohc->id, src, dest, size);
+
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Trigger interrupt after last lli */
+ ctrl_last |= COH901318_CX_CTRL_TC_IRQ_ENABLE;
+
+ lli_len = size >> MAX_DMA_PACKET_SIZE_SHIFT;
+ if ((lli_len << MAX_DMA_PACKET_SIZE_SHIFT) < size)
+ lli_len++;
+
+ data = coh901318_lli_alloc(&cohc->base->pool, lli_len);
+
+ if (data == NULL)
+ goto err;
+
+ cohd = coh901318_desc_get(cohc);
+ cohd->sg = NULL;
+ cohd->sg_len = 0;
+ cohd->data = data;
+
+ cohd->pending_irqs =
+ coh901318_lli_fill_memcpy(
+ &cohc->base->pool, data, src, size, dest,
+ cohc_chan_param(cohc)->ctrl_lli_chained,
+ ctrl_last);
+ cohd->flags = flags;
+
+ COH_DBG(coh901318_list_print(cohc, data));
+
+ dma_async_tx_descriptor_init(&cohd->desc, chan);
+
+ cohd->desc.tx_submit = coh901318_tx_submit;
+
+ spin_unlock_irqrestore(&cohc->lock, flg);
+
+ return &cohd->desc;
+ err:
+ spin_unlock_irqrestore(&cohc->lock, flg);
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_data_direction direction,
+ unsigned long flags)
+{
+ struct coh901318_chan *cohc = to_coh901318_chan(chan);
+ struct coh901318_lli *data;
+ struct coh901318_desc *cohd;
+ struct scatterlist *sg;
+ int len = 0;
+ int size;
+ int i;
+ u32 ctrl_chained = cohc_chan_param(cohc)->ctrl_lli_chained;
+ u32 ctrl = cohc_chan_param(cohc)->ctrl_lli;
+ u32 ctrl_last = cohc_chan_param(cohc)->ctrl_lli_last;
+ unsigned long flg;
+
+ if (!sgl)
+ goto out;
+ if (sgl->length == 0)
+ goto out;
+
+ spin_lock_irqsave(&cohc->lock, flg);
+
+ dev_vdbg(COHC_2_DEV(cohc), "[%s] sg_len %d dir %d\n",
+ __func__, sg_len, direction);
+
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Trigger interrupt after last lli */
+ ctrl_last |= COH901318_CX_CTRL_TC_IRQ_ENABLE;
+
+ cohd = coh901318_desc_get(cohc);
+ cohd->sg = NULL;
+ cohd->sg_len = 0;
+ cohd->dir = direction;
+
+ if (direction == DMA_TO_DEVICE) {
+ u32 tx_flags = COH901318_CX_CTRL_PRDD_SOURCE |
+ COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE;
+
+ ctrl_chained |= tx_flags;
+ ctrl_last |= tx_flags;
+ ctrl |= tx_flags;
+ } else if (direction == DMA_FROM_DEVICE) {
+ u32 rx_flags = COH901318_CX_CTRL_PRDD_DEST |
+ COH901318_CX_CTRL_DST_ADDR_INC_ENABLE;
+
+ ctrl_chained |= rx_flags;
+ ctrl_last |= rx_flags;
+ ctrl |= rx_flags;
+ } else
+ goto err_direction;
+
+ dma_async_tx_descriptor_init(&cohd->desc, chan);
+
+ cohd->desc.tx_submit = coh901318_tx_submit;
+
+
+ /* The dma only supports transmitting packages up to
+ * MAX_DMA_PACKET_SIZE. Calculate to total number of
+ * dma elemts required to send the entire sg list
+ */
+ for_each_sg(sgl, sg, sg_len, i) {
+ unsigned int factor;
+ size = sg_dma_len(sg);
+
+ if (size <= MAX_DMA_PACKET_SIZE) {
+ len++;
+ continue;
+ }
+
+ factor = size >> MAX_DMA_PACKET_SIZE_SHIFT;
+ if ((factor << MAX_DMA_PACKET_SIZE_SHIFT) < size)
+ factor++;
+
+ len += factor;
+ }
+
+ data = coh901318_lli_alloc(&cohc->base->pool, len);
+
+ if (data == NULL)
+ goto err_dma_alloc;
+
+ /* initiate allocated data list */
+ cohd->pending_irqs =
+ coh901318_lli_fill_sg(&cohc->base->pool, data, sgl, sg_len,
+ cohc_dev_addr(cohc),
+ ctrl_chained,
+ ctrl,
+ ctrl_last,
+ direction, COH901318_CX_CTRL_TC_IRQ_ENABLE);
+ cohd->data = data;
+
+ cohd->flags = flags;
+
+ COH_DBG(coh901318_list_print(cohc, data));
+
+ spin_unlock_irqrestore(&cohc->lock, flg);
+
+ return &cohd->desc;
+ err_dma_alloc:
+ err_direction:
+ coh901318_desc_remove(cohd);
+ coh901318_desc_free(cohc, cohd);
+ spin_unlock_irqrestore(&cohc->lock, flg);
+ out:
+ return NULL;
+}
+
+static enum dma_status
+coh901318_is_tx_complete(struct dma_chan *chan,
+ dma_cookie_t cookie, dma_cookie_t *done,
+ dma_cookie_t *used)
+{
+ struct coh901318_chan *cohc = to_coh901318_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+ int ret;
+
+ last_complete = cohc->completed;
+ last_used = chan->cookie;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return ret;
+}
+
+static void
+coh901318_issue_pending(struct dma_chan *chan)
+{
+ struct coh901318_chan *cohc = to_coh901318_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&cohc->lock, flags);
+
+ /* Busy means that pending jobs are already being processed */
+ if (!cohc->busy)
+ coh901318_queue_start(cohc);
+
+ spin_unlock_irqrestore(&cohc->lock, flags);
+}
+
+static void
+coh901318_terminate_all(struct dma_chan *chan)
+{
+ unsigned long flags;
+ struct coh901318_chan *cohc = to_coh901318_chan(chan);
+ struct coh901318_desc *cohd;
+ void __iomem *virtbase = cohc->base->virtbase;
+
+ coh901318_stop(chan);
+
+ spin_lock_irqsave(&cohc->lock, flags);
+
+ /* Clear any pending BE or TC interrupt */
+ if (cohc->id < 32) {
+ writel(1 << cohc->id, virtbase + COH901318_BE_INT_CLEAR1);
+ writel(1 << cohc->id, virtbase + COH901318_TC_INT_CLEAR1);
+ } else {
+ writel(1 << (cohc->id - 32), virtbase +
+ COH901318_BE_INT_CLEAR2);
+ writel(1 << (cohc->id - 32), virtbase +
+ COH901318_TC_INT_CLEAR2);
+ }
+
+ enable_powersave(cohc);
+
+ while ((cohd = coh901318_first_active_get(cohc))) {
+ /* release the lli allocation*/
+ coh901318_lli_free(&cohc->base->pool, &cohd->data);
+
+ coh901318_desc_remove(cohd);
+
+ /* return desc to free-list */
+ coh901318_desc_free(cohc, cohd);
+ }
+
+ while ((cohd = coh901318_first_queued(cohc))) {
+ /* release the lli allocation*/
+ coh901318_lli_free(&cohc->base->pool, &cohd->data);
+
+ coh901318_desc_remove(cohd);
+
+ /* return desc to free-list */
+ coh901318_desc_free(cohc, cohd);
+ }
+
+
+ cohc->nbr_active_done = 0;
+ cohc->busy = 0;
+ cohc->pending_irqs = 0;
+
+ spin_unlock_irqrestore(&cohc->lock, flags);
+}
+void coh901318_base_init(struct dma_device *dma, const int *pick_chans,
+ struct coh901318_base *base)
+{
+ int chans_i;
+ int i = 0;
+ struct coh901318_chan *cohc;
+
+ INIT_LIST_HEAD(&dma->channels);
+
+ for (chans_i = 0; pick_chans[chans_i] != -1; chans_i += 2) {
+ for (i = pick_chans[chans_i]; i <= pick_chans[chans_i+1]; i++) {
+ cohc = &base->chans[i];
+
+ cohc->base = base;
+ cohc->chan.device = dma;
+ cohc->id = i;
+
+ /* TODO: do we really need this lock if only one
+ * client is connected to each channel?
+ */
+
+ spin_lock_init(&cohc->lock);
+
+ cohc->pending_irqs = 0;
+ cohc->nbr_active_done = 0;
+ cohc->busy = 0;
+ INIT_LIST_HEAD(&cohc->free);
+ INIT_LIST_HEAD(&cohc->active);
+ INIT_LIST_HEAD(&cohc->queue);
+
+ tasklet_init(&cohc->tasklet, dma_tasklet,
+ (unsigned long) cohc);
+
+ list_add_tail(&cohc->chan.device_node,
+ &dma->channels);
+ }
+ }
+}
+
+static int __init coh901318_probe(struct platform_device *pdev)
+{
+ int err = 0;
+ struct coh901318_platform *pdata;
+ struct coh901318_base *base;
+ int irq;
+ struct resource *io;
+
+ io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!io)
+ goto err_get_resource;
+
+ /* Map DMA controller registers to virtual memory */
+ if (request_mem_region(io->start,
+ resource_size(io),
+ pdev->dev.driver->name) == NULL) {
+ err = -EBUSY;
+ goto err_request_mem;
+ }
+
+ pdata = pdev->dev.platform_data;
+ if (!pdata)
+ goto err_no_platformdata;
+
+ base = kmalloc(ALIGN(sizeof(struct coh901318_base), 4) +
+ pdata->max_channels *
+ sizeof(struct coh901318_chan),
+ GFP_KERNEL);
+ if (!base)
+ goto err_alloc_coh_dma_channels;
+
+ base->chans = ((void *)base) + ALIGN(sizeof(struct coh901318_base), 4);
+
+ base->virtbase = ioremap(io->start, resource_size(io));
+ if (!base->virtbase) {
+ err = -ENOMEM;
+ goto err_no_ioremap;
+ }
+
+ base->dev = &pdev->dev;
+ base->platform = pdata;
+ spin_lock_init(&base->pm.lock);
+ base->pm.started_channels = 0;
+
+ COH901318_DEBUGFS_ASSIGN(debugfs_dma_base, base);
+
+ platform_set_drvdata(pdev, base);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ goto err_no_irq;
+
+ err = request_irq(irq, dma_irq_handler, IRQF_DISABLED,
+ "coh901318", base);
+ if (err) {
+ dev_crit(&pdev->dev,
+ "Cannot allocate IRQ for DMA controller!\n");
+ goto err_request_irq;
+ }
+
+ err = coh901318_pool_create(&base->pool, &pdev->dev,
+ sizeof(struct coh901318_lli),
+ 32);
+ if (err)
+ goto err_pool_create;
+
+ /* init channels for device transfers */
+ coh901318_base_init(&base->dma_slave, base->platform->chans_slave,
+ base);
+
+ dma_cap_zero(base->dma_slave.cap_mask);
+ dma_cap_set(DMA_SLAVE, base->dma_slave.cap_mask);
+
+ base->dma_slave.device_alloc_chan_resources = coh901318_alloc_chan_resources;
+ base->dma_slave.device_free_chan_resources = coh901318_free_chan_resources;
+ base->dma_slave.device_prep_slave_sg = coh901318_prep_slave_sg;
+ base->dma_slave.device_is_tx_complete = coh901318_is_tx_complete;
+ base->dma_slave.device_issue_pending = coh901318_issue_pending;
+ base->dma_slave.device_terminate_all = coh901318_terminate_all;
+ base->dma_slave.dev = &pdev->dev;
+
+ err = dma_async_device_register(&base->dma_slave);
+
+ if (err)
+ goto err_register_slave;
+
+ /* init channels for memcpy */
+ coh901318_base_init(&base->dma_memcpy, base->platform->chans_memcpy,
+ base);
+
+ dma_cap_zero(base->dma_memcpy.cap_mask);
+ dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask);
+
+ base->dma_memcpy.device_alloc_chan_resources = coh901318_alloc_chan_resources;
+ base->dma_memcpy.device_free_chan_resources = coh901318_free_chan_resources;
+ base->dma_memcpy.device_prep_dma_memcpy = coh901318_prep_memcpy;
+ base->dma_memcpy.device_is_tx_complete = coh901318_is_tx_complete;
+ base->dma_memcpy.device_issue_pending = coh901318_issue_pending;
+ base->dma_memcpy.device_terminate_all = coh901318_terminate_all;
+ base->dma_memcpy.dev = &pdev->dev;
+ err = dma_async_device_register(&base->dma_memcpy);
+
+ if (err)
+ goto err_register_memcpy;
+
+ dev_dbg(&pdev->dev, "Initialized COH901318 DMA on virtual base 0x%08x\n",
+ (u32) base->virtbase);
+
+ return err;
+
+ err_register_memcpy:
+ dma_async_device_unregister(&base->dma_slave);
+ err_register_slave:
+ coh901318_pool_destroy(&base->pool);
+ err_pool_create:
+ free_irq(platform_get_irq(pdev, 0), base);
+ err_request_irq:
+ err_no_irq:
+ iounmap(base->virtbase);
+ err_no_ioremap:
+ kfree(base);
+ err_alloc_coh_dma_channels:
+ err_no_platformdata:
+ release_mem_region(pdev->resource->start,
+ resource_size(pdev->resource));
+ err_request_mem:
+ err_get_resource:
+ return err;
+}
+
+static int __exit coh901318_remove(struct platform_device *pdev)
+{
+ struct coh901318_base *base = platform_get_drvdata(pdev);
+
+ dma_async_device_unregister(&base->dma_memcpy);
+ dma_async_device_unregister(&base->dma_slave);
+ coh901318_pool_destroy(&base->pool);
+ free_irq(platform_get_irq(pdev, 0), base);
+ iounmap(base->virtbase);
+ kfree(base);
+ release_mem_region(pdev->resource->start,
+ resource_size(pdev->resource));
+ return 0;
+}
+
+
+static struct platform_driver coh901318_driver = {
+ .remove = __exit_p(coh901318_remove),
+ .driver = {
+ .name = "coh901318",
+ },
+};
+
+int __init coh901318_init(void)
+{
+ return platform_driver_probe(&coh901318_driver, coh901318_probe);
+}
+subsys_initcall(coh901318_init);
+
+void __exit coh901318_exit(void)
+{
+ platform_driver_unregister(&coh901318_driver);
+}
+module_exit(coh901318_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Per Friden");
diff --git a/drivers/dma/coh901318_lli.c b/drivers/dma/coh901318_lli.c
new file mode 100644
index 00000000000..f5120f238a4
--- /dev/null
+++ b/drivers/dma/coh901318_lli.c
@@ -0,0 +1,318 @@
+/*
+ * driver/dma/coh901318_lli.c
+ *
+ * Copyright (C) 2007-2009 ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ * Support functions for handling lli for dma
+ * Author: Per Friden <per.friden@stericsson.com>
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/dmapool.h>
+#include <linux/memory.h>
+#include <mach/coh901318.h>
+
+#include "coh901318_lli.h"
+
+#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_U300_DEBUG))
+#define DEBUGFS_POOL_COUNTER_RESET(pool) (pool->debugfs_pool_counter = 0)
+#define DEBUGFS_POOL_COUNTER_ADD(pool, add) (pool->debugfs_pool_counter += add)
+#else
+#define DEBUGFS_POOL_COUNTER_RESET(pool)
+#define DEBUGFS_POOL_COUNTER_ADD(pool, add)
+#endif
+
+static struct coh901318_lli *
+coh901318_lli_next(struct coh901318_lli *data)
+{
+ if (data == NULL || data->link_addr == 0)
+ return NULL;
+
+ return (struct coh901318_lli *) data->virt_link_addr;
+}
+
+int coh901318_pool_create(struct coh901318_pool *pool,
+ struct device *dev,
+ size_t size, size_t align)
+{
+ spin_lock_init(&pool->lock);
+ pool->dev = dev;
+ pool->dmapool = dma_pool_create("lli_pool", dev, size, align, 0);
+
+ DEBUGFS_POOL_COUNTER_RESET(pool);
+ return 0;
+}
+
+int coh901318_pool_destroy(struct coh901318_pool *pool)
+{
+
+ dma_pool_destroy(pool->dmapool);
+ return 0;
+}
+
+struct coh901318_lli *
+coh901318_lli_alloc(struct coh901318_pool *pool, unsigned int len)
+{
+ int i;
+ struct coh901318_lli *head;
+ struct coh901318_lli *lli;
+ struct coh901318_lli *lli_prev;
+ dma_addr_t phy;
+
+ if (len == 0)
+ goto err;
+
+ spin_lock(&pool->lock);
+
+ head = dma_pool_alloc(pool->dmapool, GFP_NOWAIT, &phy);
+
+ if (head == NULL)
+ goto err;
+
+ DEBUGFS_POOL_COUNTER_ADD(pool, 1);
+
+ lli = head;
+ lli->phy_this = phy;
+
+ for (i = 1; i < len; i++) {
+ lli_prev = lli;
+
+ lli = dma_pool_alloc(pool->dmapool, GFP_NOWAIT, &phy);
+
+ if (lli == NULL)
+ goto err_clean_up;
+
+ DEBUGFS_POOL_COUNTER_ADD(pool, 1);
+ lli->phy_this = phy;
+
+ lli_prev->link_addr = phy;
+ lli_prev->virt_link_addr = lli;
+ }
+
+ lli->link_addr = 0x00000000U;
+
+ spin_unlock(&pool->lock);
+
+ return head;
+
+ err:
+ spin_unlock(&pool->lock);
+ return NULL;
+
+ err_clean_up:
+ lli_prev->link_addr = 0x00000000U;
+ spin_unlock(&pool->lock);
+ coh901318_lli_free(pool, &head);
+ return NULL;
+}
+
+void coh901318_lli_free(struct coh901318_pool *pool,
+ struct coh901318_lli **lli)
+{
+ struct coh901318_lli *l;
+ struct coh901318_lli *next;
+
+ if (lli == NULL)
+ return;
+
+ l = *lli;
+
+ if (l == NULL)
+ return;
+
+ spin_lock(&pool->lock);
+
+ while (l->link_addr) {
+ next = l->virt_link_addr;
+ dma_pool_free(pool->dmapool, l, l->phy_this);
+ DEBUGFS_POOL_COUNTER_ADD(pool, -1);
+ l = next;
+ }
+ dma_pool_free(pool->dmapool, l, l->phy_this);
+ DEBUGFS_POOL_COUNTER_ADD(pool, -1);
+
+ spin_unlock(&pool->lock);
+ *lli = NULL;
+}
+
+int
+coh901318_lli_fill_memcpy(struct coh901318_pool *pool,
+ struct coh901318_lli *lli,
+ dma_addr_t source, unsigned int size,
+ dma_addr_t destination, u32 ctrl_chained,
+ u32 ctrl_eom)
+{
+ int s = size;
+ dma_addr_t src = source;
+ dma_addr_t dst = destination;
+
+ lli->src_addr = src;
+ lli->dst_addr = dst;
+
+ while (lli->link_addr) {
+ lli->control = ctrl_chained | MAX_DMA_PACKET_SIZE;
+ lli->src_addr = src;
+ lli->dst_addr = dst;
+
+ s -= MAX_DMA_PACKET_SIZE;
+ lli = coh901318_lli_next(lli);
+
+ src += MAX_DMA_PACKET_SIZE;
+ dst += MAX_DMA_PACKET_SIZE;
+ }
+
+ lli->control = ctrl_eom | s;
+ lli->src_addr = src;
+ lli->dst_addr = dst;
+
+ /* One irq per single transfer */
+ return 1;
+}
+
+int
+coh901318_lli_fill_single(struct coh901318_pool *pool,
+ struct coh901318_lli *lli,
+ dma_addr_t buf, unsigned int size,
+ dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_eom,
+ enum dma_data_direction dir)
+{
+ int s = size;
+ dma_addr_t src;
+ dma_addr_t dst;
+
+
+ if (dir == DMA_TO_DEVICE) {
+ src = buf;
+ dst = dev_addr;
+
+ } else if (dir == DMA_FROM_DEVICE) {
+
+ src = dev_addr;
+ dst = buf;
+ } else {
+ return -EINVAL;
+ }
+
+ while (lli->link_addr) {
+ size_t block_size = MAX_DMA_PACKET_SIZE;
+ lli->control = ctrl_chained | MAX_DMA_PACKET_SIZE;
+
+ /* If we are on the next-to-final block and there will
+ * be less than half a DMA packet left for the last
+ * block, then we want to make this block a little
+ * smaller to balance the sizes. This is meant to
+ * avoid too small transfers if the buffer size is
+ * (MAX_DMA_PACKET_SIZE*N + 1) */
+ if (s < (MAX_DMA_PACKET_SIZE + MAX_DMA_PACKET_SIZE/2))
+ block_size = MAX_DMA_PACKET_SIZE/2;
+
+ s -= block_size;
+ lli->src_addr = src;
+ lli->dst_addr = dst;
+
+ lli = coh901318_lli_next(lli);
+
+ if (dir == DMA_TO_DEVICE)
+ src += block_size;
+ else if (dir == DMA_FROM_DEVICE)
+ dst += block_size;
+ }
+
+ lli->control = ctrl_eom | s;
+ lli->src_addr = src;
+ lli->dst_addr = dst;
+
+ /* One irq per single transfer */
+ return 1;
+}
+
+int
+coh901318_lli_fill_sg(struct coh901318_pool *pool,
+ struct coh901318_lli *lli,
+ struct scatterlist *sgl, unsigned int nents,
+ dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl,
+ u32 ctrl_last,
+ enum dma_data_direction dir, u32 ctrl_irq_mask)
+{
+ int i;
+ struct scatterlist *sg;
+ u32 ctrl_sg;
+ dma_addr_t src = 0;
+ dma_addr_t dst = 0;
+ int nbr_of_irq = 0;
+ u32 bytes_to_transfer;
+ u32 elem_size;
+
+ if (lli == NULL)
+ goto err;
+
+ spin_lock(&pool->lock);
+
+ if (dir == DMA_TO_DEVICE)
+ dst = dev_addr;
+ else if (dir == DMA_FROM_DEVICE)
+ src = dev_addr;
+ else
+ goto err;
+
+ for_each_sg(sgl, sg, nents, i) {
+ if (sg_is_chain(sg)) {
+ /* sg continues to the next sg-element don't
+ * send ctrl_finish until the last
+ * sg-element in the chain
+ */
+ ctrl_sg = ctrl_chained;
+ } else if (i == nents - 1)
+ ctrl_sg = ctrl_last;
+ else
+ ctrl_sg = ctrl ? ctrl : ctrl_last;
+
+
+ if ((ctrl_sg & ctrl_irq_mask))
+ nbr_of_irq++;
+
+ if (dir == DMA_TO_DEVICE)
+ /* increment source address */
+ src = sg_dma_address(sg);
+ else
+ /* increment destination address */
+ dst = sg_dma_address(sg);
+
+ bytes_to_transfer = sg_dma_len(sg);
+
+ while (bytes_to_transfer) {
+ u32 val;
+
+ if (bytes_to_transfer > MAX_DMA_PACKET_SIZE) {
+ elem_size = MAX_DMA_PACKET_SIZE;
+ val = ctrl_chained;
+ } else {
+ elem_size = bytes_to_transfer;
+ val = ctrl_sg;
+ }
+
+ lli->control = val | elem_size;
+ lli->src_addr = src;
+ lli->dst_addr = dst;
+
+ if (dir == DMA_FROM_DEVICE)
+ dst += elem_size;
+ else
+ src += elem_size;
+
+ BUG_ON(lli->link_addr & 3);
+
+ bytes_to_transfer -= elem_size;
+ lli = coh901318_lli_next(lli);
+ }
+
+ }
+ spin_unlock(&pool->lock);
+
+ /* There can be many IRQs per sg transfer */
+ return nbr_of_irq;
+ err:
+ spin_unlock(&pool->lock);
+ return -EINVAL;
+}
diff --git a/drivers/dma/coh901318_lli.h b/drivers/dma/coh901318_lli.h
new file mode 100644
index 00000000000..7bf713b79c6
--- /dev/null
+++ b/drivers/dma/coh901318_lli.h
@@ -0,0 +1,124 @@
+/*
+ * driver/dma/coh901318_lli.h
+ *
+ * Copyright (C) 2007-2009 ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ * Support functions for handling lli for coh901318
+ * Author: Per Friden <per.friden@stericsson.com>
+ */
+
+#ifndef COH901318_LLI_H
+#define COH901318_LLI_H
+
+#include <mach/coh901318.h>
+
+struct device;
+
+struct coh901318_pool {
+ spinlock_t lock;
+ struct dma_pool *dmapool;
+ struct device *dev;
+
+#ifdef CONFIG_DEBUG_FS
+ int debugfs_pool_counter;
+#endif
+};
+
+struct device;
+/**
+ * coh901318_pool_create() - Creates an dma pool for lli:s
+ * @pool: pool handle
+ * @dev: dma device
+ * @lli_nbr: number of lli:s in the pool
+ * @algin: adress alignemtn of lli:s
+ * returns 0 on success otherwise none zero
+ */
+int coh901318_pool_create(struct coh901318_pool *pool,
+ struct device *dev,
+ size_t lli_nbr, size_t align);
+
+/**
+ * coh901318_pool_destroy() - Destroys the dma pool
+ * @pool: pool handle
+ * returns 0 on success otherwise none zero
+ */
+int coh901318_pool_destroy(struct coh901318_pool *pool);
+
+/**
+ * coh901318_lli_alloc() - Allocates a linked list
+ *
+ * @pool: pool handle
+ * @len: length to list
+ * return: none NULL if success otherwise NULL
+ */
+struct coh901318_lli *
+coh901318_lli_alloc(struct coh901318_pool *pool,
+ unsigned int len);
+
+/**
+ * coh901318_lli_free() - Returns the linked list items to the pool
+ * @pool: pool handle
+ * @lli: reference to lli pointer to be freed
+ */
+void coh901318_lli_free(struct coh901318_pool *pool,
+ struct coh901318_lli **lli);
+
+/**
+ * coh901318_lli_fill_memcpy() - Prepares the lli:s for dma memcpy
+ * @pool: pool handle
+ * @lli: allocated lli
+ * @src: src address
+ * @size: transfer size
+ * @dst: destination address
+ * @ctrl_chained: ctrl for chained lli
+ * @ctrl_last: ctrl for the last lli
+ * returns number of CPU interrupts for the lli, negative on error.
+ */
+int
+coh901318_lli_fill_memcpy(struct coh901318_pool *pool,
+ struct coh901318_lli *lli,
+ dma_addr_t src, unsigned int size,
+ dma_addr_t dst, u32 ctrl_chained, u32 ctrl_last);
+
+/**
+ * coh901318_lli_fill_single() - Prepares the lli:s for dma single transfer
+ * @pool: pool handle
+ * @lli: allocated lli
+ * @buf: transfer buffer
+ * @size: transfer size
+ * @dev_addr: address of periphal
+ * @ctrl_chained: ctrl for chained lli
+ * @ctrl_last: ctrl for the last lli
+ * @dir: direction of transfer (to or from device)
+ * returns number of CPU interrupts for the lli, negative on error.
+ */
+int
+coh901318_lli_fill_single(struct coh901318_pool *pool,
+ struct coh901318_lli *lli,
+ dma_addr_t buf, unsigned int size,
+ dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_last,
+ enum dma_data_direction dir);
+
+/**
+ * coh901318_lli_fill_single() - Prepares the lli:s for dma scatter list transfer
+ * @pool: pool handle
+ * @lli: allocated lli
+ * @sg: scatter gather list
+ * @nents: number of entries in sg
+ * @dev_addr: address of periphal
+ * @ctrl_chained: ctrl for chained lli
+ * @ctrl: ctrl of middle lli
+ * @ctrl_last: ctrl for the last lli
+ * @dir: direction of transfer (to or from device)
+ * @ctrl_irq_mask: ctrl mask for CPU interrupt
+ * returns number of CPU interrupts for the lli, negative on error.
+ */
+int
+coh901318_lli_fill_sg(struct coh901318_pool *pool,
+ struct coh901318_lli *lli,
+ struct scatterlist *sg, unsigned int nents,
+ dma_addr_t dev_addr, u32 ctrl_chained,
+ u32 ctrl, u32 ctrl_last,
+ enum dma_data_direction dir, u32 ctrl_irq_mask);
+
+#endif /* COH901318_LLI_H */
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
new file mode 100644
index 00000000000..e7a3230fb7d
--- /dev/null
+++ b/drivers/dma/dmaengine.c
@@ -0,0 +1,1053 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This code implements the DMA subsystem. It provides a HW-neutral interface
+ * for other kernel code to use asynchronous memory copy capabilities,
+ * if present, and allows different HW DMA drivers to register as providing
+ * this capability.
+ *
+ * Due to the fact we are accelerating what is already a relatively fast
+ * operation, the code goes to great lengths to avoid additional overhead,
+ * such as locking.
+ *
+ * LOCKING:
+ *
+ * The subsystem keeps a global list of dma_device structs it is protected by a
+ * mutex, dma_list_mutex.
+ *
+ * A subsystem can get access to a channel by calling dmaengine_get() followed
+ * by dma_find_channel(), or if it has need for an exclusive channel it can call
+ * dma_request_channel(). Once a channel is allocated a reference is taken
+ * against its corresponding driver to disable removal.
+ *
+ * Each device has a channels list, which runs unlocked but is never modified
+ * once the device is registered, it's just setup by the driver.
+ *
+ * See Documentation/dmaengine.txt for more details
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/hardirq.h>
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/mutex.h>
+#include <linux/jiffies.h>
+#include <linux/rculist.h>
+#include <linux/idr.h>
+
+static DEFINE_MUTEX(dma_list_mutex);
+static LIST_HEAD(dma_device_list);
+static long dmaengine_ref_count;
+static struct idr dma_idr;
+
+/* --- sysfs implementation --- */
+
+/**
+ * dev_to_dma_chan - convert a device pointer to the its sysfs container object
+ * @dev - device node
+ *
+ * Must be called under dma_list_mutex
+ */
+static struct dma_chan *dev_to_dma_chan(struct device *dev)
+{
+ struct dma_chan_dev *chan_dev;
+
+ chan_dev = container_of(dev, typeof(*chan_dev), device);
+ return chan_dev->chan;
+}
+
+static ssize_t show_memcpy_count(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct dma_chan *chan;
+ unsigned long count = 0;
+ int i;
+ int err;
+
+ mutex_lock(&dma_list_mutex);
+ chan = dev_to_dma_chan(dev);
+ if (chan) {
+ for_each_possible_cpu(i)
+ count += per_cpu_ptr(chan->local, i)->memcpy_count;
+ err = sprintf(buf, "%lu\n", count);
+ } else
+ err = -ENODEV;
+ mutex_unlock(&dma_list_mutex);
+
+ return err;
+}
+
+static ssize_t show_bytes_transferred(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct dma_chan *chan;
+ unsigned long count = 0;
+ int i;
+ int err;
+
+ mutex_lock(&dma_list_mutex);
+ chan = dev_to_dma_chan(dev);
+ if (chan) {
+ for_each_possible_cpu(i)
+ count += per_cpu_ptr(chan->local, i)->bytes_transferred;
+ err = sprintf(buf, "%lu\n", count);
+ } else
+ err = -ENODEV;
+ mutex_unlock(&dma_list_mutex);
+
+ return err;
+}
+
+static ssize_t show_in_use(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct dma_chan *chan;
+ int err;
+
+ mutex_lock(&dma_list_mutex);
+ chan = dev_to_dma_chan(dev);
+ if (chan)
+ err = sprintf(buf, "%d\n", chan->client_count);
+ else
+ err = -ENODEV;
+ mutex_unlock(&dma_list_mutex);
+
+ return err;
+}
+
+static struct device_attribute dma_attrs[] = {
+ __ATTR(memcpy_count, S_IRUGO, show_memcpy_count, NULL),
+ __ATTR(bytes_transferred, S_IRUGO, show_bytes_transferred, NULL),
+ __ATTR(in_use, S_IRUGO, show_in_use, NULL),
+ __ATTR_NULL
+};
+
+static void chan_dev_release(struct device *dev)
+{
+ struct dma_chan_dev *chan_dev;
+
+ chan_dev = container_of(dev, typeof(*chan_dev), device);
+ if (atomic_dec_and_test(chan_dev->idr_ref)) {
+ mutex_lock(&dma_list_mutex);
+ idr_remove(&dma_idr, chan_dev->dev_id);
+ mutex_unlock(&dma_list_mutex);
+ kfree(chan_dev->idr_ref);
+ }
+ kfree(chan_dev);
+}
+
+static struct class dma_devclass = {
+ .name = "dma",
+ .dev_attrs = dma_attrs,
+ .dev_release = chan_dev_release,
+};
+
+/* --- client and device registration --- */
+
+#define dma_device_satisfies_mask(device, mask) \
+ __dma_device_satisfies_mask((device), &(mask))
+static int
+__dma_device_satisfies_mask(struct dma_device *device, dma_cap_mask_t *want)
+{
+ dma_cap_mask_t has;
+
+ bitmap_and(has.bits, want->bits, device->cap_mask.bits,
+ DMA_TX_TYPE_END);
+ return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END);
+}
+
+static struct module *dma_chan_to_owner(struct dma_chan *chan)
+{
+ return chan->device->dev->driver->owner;
+}
+
+/**
+ * balance_ref_count - catch up the channel reference count
+ * @chan - channel to balance ->client_count versus dmaengine_ref_count
+ *
+ * balance_ref_count must be called under dma_list_mutex
+ */
+static void balance_ref_count(struct dma_chan *chan)
+{
+ struct module *owner = dma_chan_to_owner(chan);
+
+ while (chan->client_count < dmaengine_ref_count) {
+ __module_get(owner);
+ chan->client_count++;
+ }
+}
+
+/**
+ * dma_chan_get - try to grab a dma channel's parent driver module
+ * @chan - channel to grab
+ *
+ * Must be called under dma_list_mutex
+ */
+static int dma_chan_get(struct dma_chan *chan)
+{
+ int err = -ENODEV;
+ struct module *owner = dma_chan_to_owner(chan);
+
+ if (chan->client_count) {
+ __module_get(owner);
+ err = 0;
+ } else if (try_module_get(owner))
+ err = 0;
+
+ if (err == 0)
+ chan->client_count++;
+
+ /* allocate upon first client reference */
+ if (chan->client_count == 1 && err == 0) {
+ int desc_cnt = chan->device->device_alloc_chan_resources(chan);
+
+ if (desc_cnt < 0) {
+ err = desc_cnt;
+ chan->client_count = 0;
+ module_put(owner);
+ } else if (!dma_has_cap(DMA_PRIVATE, chan->device->cap_mask))
+ balance_ref_count(chan);
+ }
+
+ return err;
+}
+
+/**
+ * dma_chan_put - drop a reference to a dma channel's parent driver module
+ * @chan - channel to release
+ *
+ * Must be called under dma_list_mutex
+ */
+static void dma_chan_put(struct dma_chan *chan)
+{
+ if (!chan->client_count)
+ return; /* this channel failed alloc_chan_resources */
+ chan->client_count--;
+ module_put(dma_chan_to_owner(chan));
+ if (chan->client_count == 0)
+ chan->device->device_free_chan_resources(chan);
+}
+
+enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
+{
+ enum dma_status status;
+ unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
+
+ dma_async_issue_pending(chan);
+ do {
+ status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
+ if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+ printk(KERN_ERR "dma_sync_wait_timeout!\n");
+ return DMA_ERROR;
+ }
+ } while (status == DMA_IN_PROGRESS);
+
+ return status;
+}
+EXPORT_SYMBOL(dma_sync_wait);
+
+/**
+ * dma_cap_mask_all - enable iteration over all operation types
+ */
+static dma_cap_mask_t dma_cap_mask_all;
+
+/**
+ * dma_chan_tbl_ent - tracks channel allocations per core/operation
+ * @chan - associated channel for this entry
+ */
+struct dma_chan_tbl_ent {
+ struct dma_chan *chan;
+};
+
+/**
+ * channel_table - percpu lookup table for memory-to-memory offload providers
+ */
+static struct dma_chan_tbl_ent *channel_table[DMA_TX_TYPE_END];
+
+static int __init dma_channel_table_init(void)
+{
+ enum dma_transaction_type cap;
+ int err = 0;
+
+ bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
+
+ /* 'interrupt', 'private', and 'slave' are channel capabilities,
+ * but are not associated with an operation so they do not need
+ * an entry in the channel_table
+ */
+ clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
+ clear_bit(DMA_PRIVATE, dma_cap_mask_all.bits);
+ clear_bit(DMA_SLAVE, dma_cap_mask_all.bits);
+
+ for_each_dma_cap_mask(cap, dma_cap_mask_all) {
+ channel_table[cap] = alloc_percpu(struct dma_chan_tbl_ent);
+ if (!channel_table[cap]) {
+ err = -ENOMEM;
+ break;
+ }
+ }
+
+ if (err) {
+ pr_err("dmaengine: initialization failure\n");
+ for_each_dma_cap_mask(cap, dma_cap_mask_all)
+ if (channel_table[cap])
+ free_percpu(channel_table[cap]);
+ }
+
+ return err;
+}
+arch_initcall(dma_channel_table_init);
+
+/**
+ * dma_find_channel - find a channel to carry out the operation
+ * @tx_type: transaction type
+ */
+struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type)
+{
+ return this_cpu_read(channel_table[tx_type]->chan);
+}
+EXPORT_SYMBOL(dma_find_channel);
+
+/**
+ * dma_issue_pending_all - flush all pending operations across all channels
+ */
+void dma_issue_pending_all(void)
+{
+ struct dma_device *device;
+ struct dma_chan *chan;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(device, &dma_device_list, global_node) {
+ if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+ continue;
+ list_for_each_entry(chan, &device->channels, device_node)
+ if (chan->client_count)
+ device->device_issue_pending(chan);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(dma_issue_pending_all);
+
+/**
+ * nth_chan - returns the nth channel of the given capability
+ * @cap: capability to match
+ * @n: nth channel desired
+ *
+ * Defaults to returning the channel with the desired capability and the
+ * lowest reference count when 'n' cannot be satisfied. Must be called
+ * under dma_list_mutex.
+ */
+static struct dma_chan *nth_chan(enum dma_transaction_type cap, int n)
+{
+ struct dma_device *device;
+ struct dma_chan *chan;
+ struct dma_chan *ret = NULL;
+ struct dma_chan *min = NULL;
+
+ list_for_each_entry(device, &dma_device_list, global_node) {
+ if (!dma_has_cap(cap, device->cap_mask) ||
+ dma_has_cap(DMA_PRIVATE, device->cap_mask))
+ continue;
+ list_for_each_entry(chan, &device->channels, device_node) {
+ if (!chan->client_count)
+ continue;
+ if (!min)
+ min = chan;
+ else if (chan->table_count < min->table_count)
+ min = chan;
+
+ if (n-- == 0) {
+ ret = chan;
+ break; /* done */
+ }
+ }
+ if (ret)
+ break; /* done */
+ }
+
+ if (!ret)
+ ret = min;
+
+ if (ret)
+ ret->table_count++;
+
+ return ret;
+}
+
+/**
+ * dma_channel_rebalance - redistribute the available channels
+ *
+ * Optimize for cpu isolation (each cpu gets a dedicated channel for an
+ * operation type) in the SMP case, and operation isolation (avoid
+ * multi-tasking channels) in the non-SMP case. Must be called under
+ * dma_list_mutex.
+ */
+static void dma_channel_rebalance(void)
+{
+ struct dma_chan *chan;
+ struct dma_device *device;
+ int cpu;
+ int cap;
+ int n;
+
+ /* undo the last distribution */
+ for_each_dma_cap_mask(cap, dma_cap_mask_all)
+ for_each_possible_cpu(cpu)
+ per_cpu_ptr(channel_table[cap], cpu)->chan = NULL;
+
+ list_for_each_entry(device, &dma_device_list, global_node) {
+ if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+ continue;
+ list_for_each_entry(chan, &device->channels, device_node)
+ chan->table_count = 0;
+ }
+
+ /* don't populate the channel_table if no clients are available */
+ if (!dmaengine_ref_count)
+ return;
+
+ /* redistribute available channels */
+ n = 0;
+ for_each_dma_cap_mask(cap, dma_cap_mask_all)
+ for_each_online_cpu(cpu) {
+ if (num_possible_cpus() > 1)
+ chan = nth_chan(cap, n++);
+ else
+ chan = nth_chan(cap, -1);
+
+ per_cpu_ptr(channel_table[cap], cpu)->chan = chan;
+ }
+}
+
+static struct dma_chan *private_candidate(dma_cap_mask_t *mask, struct dma_device *dev,
+ dma_filter_fn fn, void *fn_param)
+{
+ struct dma_chan *chan;
+
+ if (!__dma_device_satisfies_mask(dev, mask)) {
+ pr_debug("%s: wrong capabilities\n", __func__);
+ return NULL;
+ }
+ /* devices with multiple channels need special handling as we need to
+ * ensure that all channels are either private or public.
+ */
+ if (dev->chancnt > 1 && !dma_has_cap(DMA_PRIVATE, dev->cap_mask))
+ list_for_each_entry(chan, &dev->channels, device_node) {
+ /* some channels are already publicly allocated */
+ if (chan->client_count)
+ return NULL;
+ }
+
+ list_for_each_entry(chan, &dev->channels, device_node) {
+ if (chan->client_count) {
+ pr_debug("%s: %s busy\n",
+ __func__, dma_chan_name(chan));
+ continue;
+ }
+ if (fn && !fn(chan, fn_param)) {
+ pr_debug("%s: %s filter said false\n",
+ __func__, dma_chan_name(chan));
+ continue;
+ }
+ return chan;
+ }
+
+ return NULL;
+}
+
+/**
+ * dma_request_channel - try to allocate an exclusive channel
+ * @mask: capabilities that the channel must satisfy
+ * @fn: optional callback to disposition available channels
+ * @fn_param: opaque parameter to pass to dma_filter_fn
+ */
+struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param)
+{
+ struct dma_device *device, *_d;
+ struct dma_chan *chan = NULL;
+ int err;
+
+ /* Find a channel */
+ mutex_lock(&dma_list_mutex);
+ list_for_each_entry_safe(device, _d, &dma_device_list, global_node) {
+ chan = private_candidate(mask, device, fn, fn_param);
+ if (chan) {
+ /* Found a suitable channel, try to grab, prep, and
+ * return it. We first set DMA_PRIVATE to disable
+ * balance_ref_count as this channel will not be
+ * published in the general-purpose allocator
+ */
+ dma_cap_set(DMA_PRIVATE, device->cap_mask);
+ device->privatecnt++;
+ err = dma_chan_get(chan);
+
+ if (err == -ENODEV) {
+ pr_debug("%s: %s module removed\n", __func__,
+ dma_chan_name(chan));
+ list_del_rcu(&device->global_node);
+ } else if (err)
+ pr_err("dmaengine: failed to get %s: (%d)\n",
+ dma_chan_name(chan), err);
+ else
+ break;
+ if (--device->privatecnt == 0)
+ dma_cap_clear(DMA_PRIVATE, device->cap_mask);
+ chan->private = NULL;
+ chan = NULL;
+ }
+ }
+ mutex_unlock(&dma_list_mutex);
+
+ pr_debug("%s: %s (%s)\n", __func__, chan ? "success" : "fail",
+ chan ? dma_chan_name(chan) : NULL);
+
+ return chan;
+}
+EXPORT_SYMBOL_GPL(__dma_request_channel);
+
+void dma_release_channel(struct dma_chan *chan)
+{
+ mutex_lock(&dma_list_mutex);
+ WARN_ONCE(chan->client_count != 1,
+ "chan reference count %d != 1\n", chan->client_count);
+ dma_chan_put(chan);
+ /* drop PRIVATE cap enabled by __dma_request_channel() */
+ if (--chan->device->privatecnt == 0)
+ dma_cap_clear(DMA_PRIVATE, chan->device->cap_mask);
+ chan->private = NULL;
+ mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL_GPL(dma_release_channel);
+
+/**
+ * dmaengine_get - register interest in dma_channels
+ */
+void dmaengine_get(void)
+{
+ struct dma_device *device, *_d;
+ struct dma_chan *chan;
+ int err;
+
+ mutex_lock(&dma_list_mutex);
+ dmaengine_ref_count++;
+
+ /* try to grab channels */
+ list_for_each_entry_safe(device, _d, &dma_device_list, global_node) {
+ if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+ continue;
+ list_for_each_entry(chan, &device->channels, device_node) {
+ err = dma_chan_get(chan);
+ if (err == -ENODEV) {
+ /* module removed before we could use it */
+ list_del_rcu(&device->global_node);
+ break;
+ } else if (err)
+ pr_err("dmaengine: failed to get %s: (%d)\n",
+ dma_chan_name(chan), err);
+ }
+ }
+
+ /* if this is the first reference and there were channels
+ * waiting we need to rebalance to get those channels
+ * incorporated into the channel table
+ */
+ if (dmaengine_ref_count == 1)
+ dma_channel_rebalance();
+ mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL(dmaengine_get);
+
+/**
+ * dmaengine_put - let dma drivers be removed when ref_count == 0
+ */
+void dmaengine_put(void)
+{
+ struct dma_device *device;
+ struct dma_chan *chan;
+
+ mutex_lock(&dma_list_mutex);
+ dmaengine_ref_count--;
+ BUG_ON(dmaengine_ref_count < 0);
+ /* drop channel references */
+ list_for_each_entry(device, &dma_device_list, global_node) {
+ if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+ continue;
+ list_for_each_entry(chan, &device->channels, device_node)
+ dma_chan_put(chan);
+ }
+ mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL(dmaengine_put);
+
+static bool device_has_all_tx_types(struct dma_device *device)
+{
+ /* A device that satisfies this test has channels that will never cause
+ * an async_tx channel switch event as all possible operation types can
+ * be handled.
+ */
+ #ifdef CONFIG_ASYNC_TX_DMA
+ if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask))
+ return false;
+ #endif
+
+ #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE)
+ if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
+ return false;
+ #endif
+
+ #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE)
+ if (!dma_has_cap(DMA_MEMSET, device->cap_mask))
+ return false;
+ #endif
+
+ #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
+ if (!dma_has_cap(DMA_XOR, device->cap_mask))
+ return false;
+
+ #ifndef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA
+ if (!dma_has_cap(DMA_XOR_VAL, device->cap_mask))
+ return false;
+ #endif
+ #endif
+
+ #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
+ if (!dma_has_cap(DMA_PQ, device->cap_mask))
+ return false;
+
+ #ifndef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA
+ if (!dma_has_cap(DMA_PQ_VAL, device->cap_mask))
+ return false;
+ #endif
+ #endif
+
+ return true;
+}
+
+static int get_dma_id(struct dma_device *device)
+{
+ int rc;
+
+ idr_retry:
+ if (!idr_pre_get(&dma_idr, GFP_KERNEL))
+ return -ENOMEM;
+ mutex_lock(&dma_list_mutex);
+ rc = idr_get_new(&dma_idr, NULL, &device->dev_id);
+ mutex_unlock(&dma_list_mutex);
+ if (rc == -EAGAIN)
+ goto idr_retry;
+ else if (rc != 0)
+ return rc;
+
+ return 0;
+}
+
+/**
+ * dma_async_device_register - registers DMA devices found
+ * @device: &dma_device
+ */
+int dma_async_device_register(struct dma_device *device)
+{
+ int chancnt = 0, rc;
+ struct dma_chan* chan;
+ atomic_t *idr_ref;
+
+ if (!device)
+ return -ENODEV;
+
+ /* validate device routines */
+ BUG_ON(dma_has_cap(DMA_MEMCPY, device->cap_mask) &&
+ !device->device_prep_dma_memcpy);
+ BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
+ !device->device_prep_dma_xor);
+ BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
+ !device->device_prep_dma_xor_val);
+ BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
+ !device->device_prep_dma_pq);
+ BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
+ !device->device_prep_dma_pq_val);
+ BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
+ !device->device_prep_dma_memset);
+ BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
+ !device->device_prep_dma_interrupt);
+ BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
+ !device->device_prep_slave_sg);
+ BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
+ !device->device_terminate_all);
+
+ BUG_ON(!device->device_alloc_chan_resources);
+ BUG_ON(!device->device_free_chan_resources);
+ BUG_ON(!device->device_is_tx_complete);
+ BUG_ON(!device->device_issue_pending);
+ BUG_ON(!device->dev);
+
+ /* note: this only matters in the
+ * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case
+ */
+ if (device_has_all_tx_types(device))
+ dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
+
+ idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
+ if (!idr_ref)
+ return -ENOMEM;
+ rc = get_dma_id(device);
+ if (rc != 0) {
+ kfree(idr_ref);
+ return rc;
+ }
+
+ atomic_set(idr_ref, 0);
+
+ /* represent channels in sysfs. Probably want devs too */
+ list_for_each_entry(chan, &device->channels, device_node) {
+ rc = -ENOMEM;
+ chan->local = alloc_percpu(typeof(*chan->local));
+ if (chan->local == NULL)
+ goto err_out;
+ chan->dev = kzalloc(sizeof(*chan->dev), GFP_KERNEL);
+ if (chan->dev == NULL) {
+ free_percpu(chan->local);
+ chan->local = NULL;
+ goto err_out;
+ }
+
+ chan->chan_id = chancnt++;
+ chan->dev->device.class = &dma_devclass;
+ chan->dev->device.parent = device->dev;
+ chan->dev->chan = chan;
+ chan->dev->idr_ref = idr_ref;
+ chan->dev->dev_id = device->dev_id;
+ atomic_inc(idr_ref);
+ dev_set_name(&chan->dev->device, "dma%dchan%d",
+ device->dev_id, chan->chan_id);
+
+ rc = device_register(&chan->dev->device);
+ if (rc) {
+ free_percpu(chan->local);
+ chan->local = NULL;
+ kfree(chan->dev);
+ atomic_dec(idr_ref);
+ goto err_out;
+ }
+ chan->client_count = 0;
+ }
+ device->chancnt = chancnt;
+
+ mutex_lock(&dma_list_mutex);
+ /* take references on public channels */
+ if (dmaengine_ref_count && !dma_has_cap(DMA_PRIVATE, device->cap_mask))
+ list_for_each_entry(chan, &device->channels, device_node) {
+ /* if clients are already waiting for channels we need
+ * to take references on their behalf
+ */
+ if (dma_chan_get(chan) == -ENODEV) {
+ /* note we can only get here for the first
+ * channel as the remaining channels are
+ * guaranteed to get a reference
+ */
+ rc = -ENODEV;
+ mutex_unlock(&dma_list_mutex);
+ goto err_out;
+ }
+ }
+ list_add_tail_rcu(&device->global_node, &dma_device_list);
+ if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+ device->privatecnt++; /* Always private */
+ dma_channel_rebalance();
+ mutex_unlock(&dma_list_mutex);
+
+ return 0;
+
+err_out:
+ /* if we never registered a channel just release the idr */
+ if (atomic_read(idr_ref) == 0) {
+ mutex_lock(&dma_list_mutex);
+ idr_remove(&dma_idr, device->dev_id);
+ mutex_unlock(&dma_list_mutex);
+ kfree(idr_ref);
+ return rc;
+ }
+
+ list_for_each_entry(chan, &device->channels, device_node) {
+ if (chan->local == NULL)
+ continue;
+ mutex_lock(&dma_list_mutex);
+ chan->dev->chan = NULL;
+ mutex_unlock(&dma_list_mutex);
+ device_unregister(&chan->dev->device);
+ free_percpu(chan->local);
+ }
+ return rc;
+}
+EXPORT_SYMBOL(dma_async_device_register);
+
+/**
+ * dma_async_device_unregister - unregister a DMA device
+ * @device: &dma_device
+ *
+ * This routine is called by dma driver exit routines, dmaengine holds module
+ * references to prevent it being called while channels are in use.
+ */
+void dma_async_device_unregister(struct dma_device *device)
+{
+ struct dma_chan *chan;
+
+ mutex_lock(&dma_list_mutex);
+ list_del_rcu(&device->global_node);
+ dma_channel_rebalance();
+ mutex_unlock(&dma_list_mutex);
+
+ list_for_each_entry(chan, &device->channels, device_node) {
+ WARN_ONCE(chan->client_count,
+ "%s called while %d clients hold a reference\n",
+ __func__, chan->client_count);
+ mutex_lock(&dma_list_mutex);
+ chan->dev->chan = NULL;
+ mutex_unlock(&dma_list_mutex);
+ device_unregister(&chan->dev->device);
+ free_percpu(chan->local);
+ }
+}
+EXPORT_SYMBOL(dma_async_device_unregister);
+
+/**
+ * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
+ * @chan: DMA channel to offload copy to
+ * @dest: destination address (virtual)
+ * @src: source address (virtual)
+ * @len: length
+ *
+ * Both @dest and @src must be mappable to a bus address according to the
+ * DMA mapping API rules for streaming mappings.
+ * Both @dest and @src must stay memory resident (kernel memory or locked
+ * user space pages).
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
+ void *src, size_t len)
+{
+ struct dma_device *dev = chan->device;
+ struct dma_async_tx_descriptor *tx;
+ dma_addr_t dma_dest, dma_src;
+ dma_cookie_t cookie;
+ unsigned long flags;
+
+ dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
+ dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
+ flags = DMA_CTRL_ACK |
+ DMA_COMPL_SRC_UNMAP_SINGLE |
+ DMA_COMPL_DEST_UNMAP_SINGLE;
+ tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+
+ if (!tx) {
+ dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
+ dma_unmap_single(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+ return -ENOMEM;
+ }
+
+ tx->callback = NULL;
+ cookie = tx->tx_submit(tx);
+
+ preempt_disable();
+ __this_cpu_add(chan->local->bytes_transferred, len);
+ __this_cpu_inc(chan->local->memcpy_count);
+ preempt_enable();
+
+ return cookie;
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
+
+/**
+ * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
+ * @chan: DMA channel to offload copy to
+ * @page: destination page
+ * @offset: offset in page to copy to
+ * @kdata: source address (virtual)
+ * @len: length
+ *
+ * Both @page/@offset and @kdata must be mappable to a bus address according
+ * to the DMA mapping API rules for streaming mappings.
+ * Both @page/@offset and @kdata must stay memory resident (kernel memory or
+ * locked user space pages)
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
+ unsigned int offset, void *kdata, size_t len)
+{
+ struct dma_device *dev = chan->device;
+ struct dma_async_tx_descriptor *tx;
+ dma_addr_t dma_dest, dma_src;
+ dma_cookie_t cookie;
+ unsigned long flags;
+
+ dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
+ dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
+ flags = DMA_CTRL_ACK | DMA_COMPL_SRC_UNMAP_SINGLE;
+ tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+
+ if (!tx) {
+ dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
+ dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+ return -ENOMEM;
+ }
+
+ tx->callback = NULL;
+ cookie = tx->tx_submit(tx);
+
+ preempt_disable();
+ __this_cpu_add(chan->local->bytes_transferred, len);
+ __this_cpu_inc(chan->local->memcpy_count);
+ preempt_enable();
+
+ return cookie;
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
+
+/**
+ * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
+ * @chan: DMA channel to offload copy to
+ * @dest_pg: destination page
+ * @dest_off: offset in page to copy to
+ * @src_pg: source page
+ * @src_off: offset in page to copy from
+ * @len: length
+ *
+ * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus
+ * address according to the DMA mapping API rules for streaming mappings.
+ * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident
+ * (kernel memory or locked user space pages).
+ */
+dma_cookie_t
+dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
+ unsigned int dest_off, struct page *src_pg, unsigned int src_off,
+ size_t len)
+{
+ struct dma_device *dev = chan->device;
+ struct dma_async_tx_descriptor *tx;
+ dma_addr_t dma_dest, dma_src;
+ dma_cookie_t cookie;
+ unsigned long flags;
+
+ dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
+ dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len,
+ DMA_FROM_DEVICE);
+ flags = DMA_CTRL_ACK;
+ tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+
+ if (!tx) {
+ dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE);
+ dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+ return -ENOMEM;
+ }
+
+ tx->callback = NULL;
+ cookie = tx->tx_submit(tx);
+
+ preempt_disable();
+ __this_cpu_add(chan->local->bytes_transferred, len);
+ __this_cpu_inc(chan->local->memcpy_count);
+ preempt_enable();
+
+ return cookie;
+}
+EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);
+
+void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
+ struct dma_chan *chan)
+{
+ tx->chan = chan;
+ spin_lock_init(&tx->lock);
+}
+EXPORT_SYMBOL(dma_async_tx_descriptor_init);
+
+/* dma_wait_for_async_tx - spin wait for a transaction to complete
+ * @tx: in-flight transaction to wait on
+ */
+enum dma_status
+dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
+{
+ unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
+
+ if (!tx)
+ return DMA_SUCCESS;
+
+ while (tx->cookie == -EBUSY) {
+ if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+ pr_err("%s timeout waiting for descriptor submission\n",
+ __func__);
+ return DMA_ERROR;
+ }
+ cpu_relax();
+ }
+ return dma_sync_wait(tx->chan, tx->cookie);
+}
+EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
+
+/* dma_run_dependencies - helper routine for dma drivers to process
+ * (start) dependent operations on their target channel
+ * @tx: transaction with dependencies
+ */
+void dma_run_dependencies(struct dma_async_tx_descriptor *tx)
+{
+ struct dma_async_tx_descriptor *dep = tx->next;
+ struct dma_async_tx_descriptor *dep_next;
+ struct dma_chan *chan;
+
+ if (!dep)
+ return;
+
+ /* we'll submit tx->next now, so clear the link */
+ tx->next = NULL;
+ chan = dep->chan;
+
+ /* keep submitting up until a channel switch is detected
+ * in that case we will be called again as a result of
+ * processing the interrupt from async_tx_channel_switch
+ */
+ for (; dep; dep = dep_next) {
+ spin_lock_bh(&dep->lock);
+ dep->parent = NULL;
+ dep_next = dep->next;
+ if (dep_next && dep_next->chan == chan)
+ dep->next = NULL; /* ->next will be submitted */
+ else
+ dep_next = NULL; /* submit current dep and terminate */
+ spin_unlock_bh(&dep->lock);
+
+ dep->tx_submit(dep);
+ }
+
+ chan->device->device_issue_pending(chan);
+}
+EXPORT_SYMBOL_GPL(dma_run_dependencies);
+
+static int __init dma_bus_init(void)
+{
+ idr_init(&dma_idr);
+ mutex_init(&dma_list_mutex);
+ return class_register(&dma_devclass);
+}
+arch_initcall(dma_bus_init);
+
+
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
new file mode 100644
index 00000000000..948d563941c
--- /dev/null
+++ b/drivers/dma/dmatest.c
@@ -0,0 +1,627 @@
+/*
+ * DMA Engine test module
+ *
+ * Copyright (C) 2007 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/wait.h>
+
+static unsigned int test_buf_size = 16384;
+module_param(test_buf_size, uint, S_IRUGO);
+MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer");
+
+static char test_channel[20];
+module_param_string(channel, test_channel, sizeof(test_channel), S_IRUGO);
+MODULE_PARM_DESC(channel, "Bus ID of the channel to test (default: any)");
+
+static char test_device[20];
+module_param_string(device, test_device, sizeof(test_device), S_IRUGO);
+MODULE_PARM_DESC(device, "Bus ID of the DMA Engine to test (default: any)");
+
+static unsigned int threads_per_chan = 1;
+module_param(threads_per_chan, uint, S_IRUGO);
+MODULE_PARM_DESC(threads_per_chan,
+ "Number of threads to start per channel (default: 1)");
+
+static unsigned int max_channels;
+module_param(max_channels, uint, S_IRUGO);
+MODULE_PARM_DESC(max_channels,
+ "Maximum number of channels to use (default: all)");
+
+static unsigned int iterations;
+module_param(iterations, uint, S_IRUGO);
+MODULE_PARM_DESC(iterations,
+ "Iterations before stopping test (default: infinite)");
+
+static unsigned int xor_sources = 3;
+module_param(xor_sources, uint, S_IRUGO);
+MODULE_PARM_DESC(xor_sources,
+ "Number of xor source buffers (default: 3)");
+
+static unsigned int pq_sources = 3;
+module_param(pq_sources, uint, S_IRUGO);
+MODULE_PARM_DESC(pq_sources,
+ "Number of p+q source buffers (default: 3)");
+
+/*
+ * Initialization patterns. All bytes in the source buffer has bit 7
+ * set, all bytes in the destination buffer has bit 7 cleared.
+ *
+ * Bit 6 is set for all bytes which are to be copied by the DMA
+ * engine. Bit 5 is set for all bytes which are to be overwritten by
+ * the DMA engine.
+ *
+ * The remaining bits are the inverse of a counter which increments by
+ * one for each byte address.
+ */
+#define PATTERN_SRC 0x80
+#define PATTERN_DST 0x00
+#define PATTERN_COPY 0x40
+#define PATTERN_OVERWRITE 0x20
+#define PATTERN_COUNT_MASK 0x1f
+
+struct dmatest_thread {
+ struct list_head node;
+ struct task_struct *task;
+ struct dma_chan *chan;
+ u8 **srcs;
+ u8 **dsts;
+ enum dma_transaction_type type;
+};
+
+struct dmatest_chan {
+ struct list_head node;
+ struct dma_chan *chan;
+ struct list_head threads;
+};
+
+/*
+ * These are protected by dma_list_mutex since they're only used by
+ * the DMA filter function callback
+ */
+static LIST_HEAD(dmatest_channels);
+static unsigned int nr_channels;
+
+static bool dmatest_match_channel(struct dma_chan *chan)
+{
+ if (test_channel[0] == '\0')
+ return true;
+ return strcmp(dma_chan_name(chan), test_channel) == 0;
+}
+
+static bool dmatest_match_device(struct dma_device *device)
+{
+ if (test_device[0] == '\0')
+ return true;
+ return strcmp(dev_name(device->dev), test_device) == 0;
+}
+
+static unsigned long dmatest_random(void)
+{
+ unsigned long buf;
+
+ get_random_bytes(&buf, sizeof(buf));
+ return buf;
+}
+
+static void dmatest_init_srcs(u8 **bufs, unsigned int start, unsigned int len)
+{
+ unsigned int i;
+ u8 *buf;
+
+ for (; (buf = *bufs); bufs++) {
+ for (i = 0; i < start; i++)
+ buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
+ for ( ; i < start + len; i++)
+ buf[i] = PATTERN_SRC | PATTERN_COPY
+ | (~i & PATTERN_COUNT_MASK);
+ for ( ; i < test_buf_size; i++)
+ buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
+ buf++;
+ }
+}
+
+static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len)
+{
+ unsigned int i;
+ u8 *buf;
+
+ for (; (buf = *bufs); bufs++) {
+ for (i = 0; i < start; i++)
+ buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
+ for ( ; i < start + len; i++)
+ buf[i] = PATTERN_DST | PATTERN_OVERWRITE
+ | (~i & PATTERN_COUNT_MASK);
+ for ( ; i < test_buf_size; i++)
+ buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
+ }
+}
+
+static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index,
+ unsigned int counter, bool is_srcbuf)
+{
+ u8 diff = actual ^ pattern;
+ u8 expected = pattern | (~counter & PATTERN_COUNT_MASK);
+ const char *thread_name = current->comm;
+
+ if (is_srcbuf)
+ pr_warning("%s: srcbuf[0x%x] overwritten!"
+ " Expected %02x, got %02x\n",
+ thread_name, index, expected, actual);
+ else if ((pattern & PATTERN_COPY)
+ && (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
+ pr_warning("%s: dstbuf[0x%x] not copied!"
+ " Expected %02x, got %02x\n",
+ thread_name, index, expected, actual);
+ else if (diff & PATTERN_SRC)
+ pr_warning("%s: dstbuf[0x%x] was copied!"
+ " Expected %02x, got %02x\n",
+ thread_name, index, expected, actual);
+ else
+ pr_warning("%s: dstbuf[0x%x] mismatch!"
+ " Expected %02x, got %02x\n",
+ thread_name, index, expected, actual);
+}
+
+static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
+ unsigned int end, unsigned int counter, u8 pattern,
+ bool is_srcbuf)
+{
+ unsigned int i;
+ unsigned int error_count = 0;
+ u8 actual;
+ u8 expected;
+ u8 *buf;
+ unsigned int counter_orig = counter;
+
+ for (; (buf = *bufs); bufs++) {
+ counter = counter_orig;
+ for (i = start; i < end; i++) {
+ actual = buf[i];
+ expected = pattern | (~counter & PATTERN_COUNT_MASK);
+ if (actual != expected) {
+ if (error_count < 32)
+ dmatest_mismatch(actual, pattern, i,
+ counter, is_srcbuf);
+ error_count++;
+ }
+ counter++;
+ }
+ }
+
+ if (error_count > 32)
+ pr_warning("%s: %u errors suppressed\n",
+ current->comm, error_count - 32);
+
+ return error_count;
+}
+
+static void dmatest_callback(void *completion)
+{
+ complete(completion);
+}
+
+/*
+ * This function repeatedly tests DMA transfers of various lengths and
+ * offsets for a given operation type until it is told to exit by
+ * kthread_stop(). There may be multiple threads running this function
+ * in parallel for a single channel, and there may be multiple channels
+ * being tested in parallel.
+ *
+ * Before each test, the source and destination buffer is initialized
+ * with a known pattern. This pattern is different depending on
+ * whether it's in an area which is supposed to be copied or
+ * overwritten, and different in the source and destination buffers.
+ * So if the DMA engine doesn't copy exactly what we tell it to copy,
+ * we'll notice.
+ */
+static int dmatest_func(void *data)
+{
+ struct dmatest_thread *thread = data;
+ struct dma_chan *chan;
+ const char *thread_name;
+ unsigned int src_off, dst_off, len;
+ unsigned int error_count;
+ unsigned int failed_tests = 0;
+ unsigned int total_tests = 0;
+ dma_cookie_t cookie;
+ enum dma_status status;
+ enum dma_ctrl_flags flags;
+ u8 pq_coefs[pq_sources];
+ int ret;
+ int src_cnt;
+ int dst_cnt;
+ int i;
+
+ thread_name = current->comm;
+
+ ret = -ENOMEM;
+
+ smp_rmb();
+ chan = thread->chan;
+ if (thread->type == DMA_MEMCPY)
+ src_cnt = dst_cnt = 1;
+ else if (thread->type == DMA_XOR) {
+ src_cnt = xor_sources | 1; /* force odd to ensure dst = src */
+ dst_cnt = 1;
+ } else if (thread->type == DMA_PQ) {
+ src_cnt = pq_sources | 1; /* force odd to ensure dst = src */
+ dst_cnt = 2;
+ for (i = 0; i < pq_sources; i++)
+ pq_coefs[i] = 1;
+ } else
+ goto err_srcs;
+
+ thread->srcs = kcalloc(src_cnt+1, sizeof(u8 *), GFP_KERNEL);
+ if (!thread->srcs)
+ goto err_srcs;
+ for (i = 0; i < src_cnt; i++) {
+ thread->srcs[i] = kmalloc(test_buf_size, GFP_KERNEL);
+ if (!thread->srcs[i])
+ goto err_srcbuf;
+ }
+ thread->srcs[i] = NULL;
+
+ thread->dsts = kcalloc(dst_cnt+1, sizeof(u8 *), GFP_KERNEL);
+ if (!thread->dsts)
+ goto err_dsts;
+ for (i = 0; i < dst_cnt; i++) {
+ thread->dsts[i] = kmalloc(test_buf_size, GFP_KERNEL);
+ if (!thread->dsts[i])
+ goto err_dstbuf;
+ }
+ thread->dsts[i] = NULL;
+
+ set_user_nice(current, 10);
+
+ flags = DMA_CTRL_ACK | DMA_COMPL_SKIP_DEST_UNMAP | DMA_PREP_INTERRUPT;
+
+ while (!kthread_should_stop()
+ && !(iterations && total_tests >= iterations)) {
+ struct dma_device *dev = chan->device;
+ struct dma_async_tx_descriptor *tx = NULL;
+ dma_addr_t dma_srcs[src_cnt];
+ dma_addr_t dma_dsts[dst_cnt];
+ struct completion cmp;
+ unsigned long tmo = msecs_to_jiffies(3000);
+ u8 align = 0;
+
+ total_tests++;
+
+ /* honor alignment restrictions */
+ if (thread->type == DMA_MEMCPY)
+ align = dev->copy_align;
+ else if (thread->type == DMA_XOR)
+ align = dev->xor_align;
+ else if (thread->type == DMA_PQ)
+ align = dev->pq_align;
+
+ if (1 << align > test_buf_size) {
+ pr_err("%u-byte buffer too small for %d-byte alignment\n",
+ test_buf_size, 1 << align);
+ break;
+ }
+
+ len = dmatest_random() % test_buf_size + 1;
+ len = (len >> align) << align;
+ if (!len)
+ len = 1 << align;
+ src_off = dmatest_random() % (test_buf_size - len + 1);
+ dst_off = dmatest_random() % (test_buf_size - len + 1);
+
+ src_off = (src_off >> align) << align;
+ dst_off = (dst_off >> align) << align;
+
+ dmatest_init_srcs(thread->srcs, src_off, len);
+ dmatest_init_dsts(thread->dsts, dst_off, len);
+
+ for (i = 0; i < src_cnt; i++) {
+ u8 *buf = thread->srcs[i] + src_off;
+
+ dma_srcs[i] = dma_map_single(dev->dev, buf, len,
+ DMA_TO_DEVICE);
+ }
+ /* map with DMA_BIDIRECTIONAL to force writeback/invalidate */
+ for (i = 0; i < dst_cnt; i++) {
+ dma_dsts[i] = dma_map_single(dev->dev, thread->dsts[i],
+ test_buf_size,
+ DMA_BIDIRECTIONAL);
+ }
+
+
+ if (thread->type == DMA_MEMCPY)
+ tx = dev->device_prep_dma_memcpy(chan,
+ dma_dsts[0] + dst_off,
+ dma_srcs[0], len,
+ flags);
+ else if (thread->type == DMA_XOR)
+ tx = dev->device_prep_dma_xor(chan,
+ dma_dsts[0] + dst_off,
+ dma_srcs, xor_sources,
+ len, flags);
+ else if (thread->type == DMA_PQ) {
+ dma_addr_t dma_pq[dst_cnt];
+
+ for (i = 0; i < dst_cnt; i++)
+ dma_pq[i] = dma_dsts[i] + dst_off;
+ tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
+ pq_sources, pq_coefs,
+ len, flags);
+ }
+
+ if (!tx) {
+ for (i = 0; i < src_cnt; i++)
+ dma_unmap_single(dev->dev, dma_srcs[i], len,
+ DMA_TO_DEVICE);
+ for (i = 0; i < dst_cnt; i++)
+ dma_unmap_single(dev->dev, dma_dsts[i],
+ test_buf_size,
+ DMA_BIDIRECTIONAL);
+ pr_warning("%s: #%u: prep error with src_off=0x%x "
+ "dst_off=0x%x len=0x%x\n",
+ thread_name, total_tests - 1,
+ src_off, dst_off, len);
+ msleep(100);
+ failed_tests++;
+ continue;
+ }
+
+ init_completion(&cmp);
+ tx->callback = dmatest_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+
+ if (dma_submit_error(cookie)) {
+ pr_warning("%s: #%u: submit error %d with src_off=0x%x "
+ "dst_off=0x%x len=0x%x\n",
+ thread_name, total_tests - 1, cookie,
+ src_off, dst_off, len);
+ msleep(100);
+ failed_tests++;
+ continue;
+ }
+ dma_async_issue_pending(chan);
+
+ tmo = wait_for_completion_timeout(&cmp, tmo);
+ status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
+
+ if (tmo == 0) {
+ pr_warning("%s: #%u: test timed out\n",
+ thread_name, total_tests - 1);
+ failed_tests++;
+ continue;
+ } else if (status != DMA_SUCCESS) {
+ pr_warning("%s: #%u: got completion callback,"
+ " but status is \'%s\'\n",
+ thread_name, total_tests - 1,
+ status == DMA_ERROR ? "error" : "in progress");
+ failed_tests++;
+ continue;
+ }
+
+ /* Unmap by myself (see DMA_COMPL_SKIP_DEST_UNMAP above) */
+ for (i = 0; i < dst_cnt; i++)
+ dma_unmap_single(dev->dev, dma_dsts[i], test_buf_size,
+ DMA_BIDIRECTIONAL);
+
+ error_count = 0;
+
+ pr_debug("%s: verifying source buffer...\n", thread_name);
+ error_count += dmatest_verify(thread->srcs, 0, src_off,
+ 0, PATTERN_SRC, true);
+ error_count += dmatest_verify(thread->srcs, src_off,
+ src_off + len, src_off,
+ PATTERN_SRC | PATTERN_COPY, true);
+ error_count += dmatest_verify(thread->srcs, src_off + len,
+ test_buf_size, src_off + len,
+ PATTERN_SRC, true);
+
+ pr_debug("%s: verifying dest buffer...\n",
+ thread->task->comm);
+ error_count += dmatest_verify(thread->dsts, 0, dst_off,
+ 0, PATTERN_DST, false);
+ error_count += dmatest_verify(thread->dsts, dst_off,
+ dst_off + len, src_off,
+ PATTERN_SRC | PATTERN_COPY, false);
+ error_count += dmatest_verify(thread->dsts, dst_off + len,
+ test_buf_size, dst_off + len,
+ PATTERN_DST, false);
+
+ if (error_count) {
+ pr_warning("%s: #%u: %u errors with "
+ "src_off=0x%x dst_off=0x%x len=0x%x\n",
+ thread_name, total_tests - 1, error_count,
+ src_off, dst_off, len);
+ failed_tests++;
+ } else {
+ pr_debug("%s: #%u: No errors with "
+ "src_off=0x%x dst_off=0x%x len=0x%x\n",
+ thread_name, total_tests - 1,
+ src_off, dst_off, len);
+ }
+ }
+
+ ret = 0;
+ for (i = 0; thread->dsts[i]; i++)
+ kfree(thread->dsts[i]);
+err_dstbuf:
+ kfree(thread->dsts);
+err_dsts:
+ for (i = 0; thread->srcs[i]; i++)
+ kfree(thread->srcs[i]);
+err_srcbuf:
+ kfree(thread->srcs);
+err_srcs:
+ pr_notice("%s: terminating after %u tests, %u failures (status %d)\n",
+ thread_name, total_tests, failed_tests, ret);
+
+ if (iterations > 0)
+ while (!kthread_should_stop()) {
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait_dmatest_exit);
+ interruptible_sleep_on(&wait_dmatest_exit);
+ }
+
+ return ret;
+}
+
+static void dmatest_cleanup_channel(struct dmatest_chan *dtc)
+{
+ struct dmatest_thread *thread;
+ struct dmatest_thread *_thread;
+ int ret;
+
+ list_for_each_entry_safe(thread, _thread, &dtc->threads, node) {
+ ret = kthread_stop(thread->task);
+ pr_debug("dmatest: thread %s exited with status %d\n",
+ thread->task->comm, ret);
+ list_del(&thread->node);
+ kfree(thread);
+ }
+ kfree(dtc);
+}
+
+static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_type type)
+{
+ struct dmatest_thread *thread;
+ struct dma_chan *chan = dtc->chan;
+ char *op;
+ unsigned int i;
+
+ if (type == DMA_MEMCPY)
+ op = "copy";
+ else if (type == DMA_XOR)
+ op = "xor";
+ else if (type == DMA_PQ)
+ op = "pq";
+ else
+ return -EINVAL;
+
+ for (i = 0; i < threads_per_chan; i++) {
+ thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL);
+ if (!thread) {
+ pr_warning("dmatest: No memory for %s-%s%u\n",
+ dma_chan_name(chan), op, i);
+
+ break;
+ }
+ thread->chan = dtc->chan;
+ thread->type = type;
+ smp_wmb();
+ thread->task = kthread_run(dmatest_func, thread, "%s-%s%u",
+ dma_chan_name(chan), op, i);
+ if (IS_ERR(thread->task)) {
+ pr_warning("dmatest: Failed to run thread %s-%s%u\n",
+ dma_chan_name(chan), op, i);
+ kfree(thread);
+ break;
+ }
+
+ /* srcbuf and dstbuf are allocated by the thread itself */
+
+ list_add_tail(&thread->node, &dtc->threads);
+ }
+
+ return i;
+}
+
+static int dmatest_add_channel(struct dma_chan *chan)
+{
+ struct dmatest_chan *dtc;
+ struct dma_device *dma_dev = chan->device;
+ unsigned int thread_count = 0;
+ unsigned int cnt;
+
+ dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
+ if (!dtc) {
+ pr_warning("dmatest: No memory for %s\n", dma_chan_name(chan));
+ return -ENOMEM;
+ }
+
+ dtc->chan = chan;
+ INIT_LIST_HEAD(&dtc->threads);
+
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+ cnt = dmatest_add_threads(dtc, DMA_MEMCPY);
+ thread_count += cnt > 0 ? cnt : 0;
+ }
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ cnt = dmatest_add_threads(dtc, DMA_XOR);
+ thread_count += cnt > 0 ? cnt : 0;
+ }
+ if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+ cnt = dmatest_add_threads(dtc, DMA_PQ);
+ thread_count += cnt > 0 ?: 0;
+ }
+
+ pr_info("dmatest: Started %u threads using %s\n",
+ thread_count, dma_chan_name(chan));
+
+ list_add_tail(&dtc->node, &dmatest_channels);
+ nr_channels++;
+
+ return 0;
+}
+
+static bool filter(struct dma_chan *chan, void *param)
+{
+ if (!dmatest_match_channel(chan) || !dmatest_match_device(chan->device))
+ return false;
+ else
+ return true;
+}
+
+static int __init dmatest_init(void)
+{
+ dma_cap_mask_t mask;
+ struct dma_chan *chan;
+ int err = 0;
+
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_MEMCPY, mask);
+ for (;;) {
+ chan = dma_request_channel(mask, filter, NULL);
+ if (chan) {
+ err = dmatest_add_channel(chan);
+ if (err) {
+ dma_release_channel(chan);
+ break; /* add_channel failed, punt */
+ }
+ } else
+ break; /* no more channels available */
+ if (max_channels && nr_channels >= max_channels)
+ break; /* we have all we need */
+ }
+
+ return err;
+}
+/* when compiled-in wait for drivers to load first */
+late_initcall(dmatest_init);
+
+static void __exit dmatest_exit(void)
+{
+ struct dmatest_chan *dtc, *_dtc;
+ struct dma_chan *chan;
+
+ list_for_each_entry_safe(dtc, _dtc, &dmatest_channels, node) {
+ list_del(&dtc->node);
+ chan = dtc->chan;
+ dmatest_cleanup_channel(dtc);
+ pr_debug("dmatest: dropped channel %s\n",
+ dma_chan_name(chan));
+ dma_release_channel(chan);
+ }
+}
+module_exit(dmatest_exit);
+
+MODULE_AUTHOR("Haavard Skinnemoen <hskinnemoen@atmel.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
new file mode 100644
index 00000000000..d28369f7afd
--- /dev/null
+++ b/drivers/dma/dw_dmac.c
@@ -0,0 +1,1456 @@
+/*
+ * Driver for the Synopsys DesignWare DMA Controller (aka DMACA on
+ * AVR32 systems.)
+ *
+ * Copyright (C) 2007-2008 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "dw_dmac_regs.h"
+
+/*
+ * This supports the Synopsys "DesignWare AHB Central DMA Controller",
+ * (DW_ahb_dmac) which is used with various AMBA 2.0 systems (not all
+ * of which use ARM any more). See the "Databook" from Synopsys for
+ * information beyond what licensees probably provide.
+ *
+ * The driver has currently been tested only with the Atmel AT32AP7000,
+ * which does not support descriptor writeback.
+ */
+
+/* NOTE: DMS+SMS is system-specific. We should get this information
+ * from the platform code somehow.
+ */
+#define DWC_DEFAULT_CTLLO (DWC_CTLL_DST_MSIZE(0) \
+ | DWC_CTLL_SRC_MSIZE(0) \
+ | DWC_CTLL_DMS(0) \
+ | DWC_CTLL_SMS(1) \
+ | DWC_CTLL_LLP_D_EN \
+ | DWC_CTLL_LLP_S_EN)
+
+/*
+ * This is configuration-dependent and usually a funny size like 4095.
+ * Let's round it down to the nearest power of two.
+ *
+ * Note that this is a transfer count, i.e. if we transfer 32-bit
+ * words, we can do 8192 bytes per descriptor.
+ *
+ * This parameter is also system-specific.
+ */
+#define DWC_MAX_COUNT 2048U
+
+/*
+ * Number of descriptors to allocate for each channel. This should be
+ * made configurable somehow; preferably, the clients (at least the
+ * ones using slave transfers) should be able to give us a hint.
+ */
+#define NR_DESCS_PER_CHANNEL 64
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Because we're not relying on writeback from the controller (it may not
+ * even be configured into the core!) we don't need to use dma_pool. These
+ * descriptors -- and associated data -- are cacheable. We do need to make
+ * sure their dcache entries are written back before handing them off to
+ * the controller, though.
+ */
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+ return &chan->dev->device;
+}
+static struct device *chan2parent(struct dma_chan *chan)
+{
+ return chan->dev->device.parent;
+}
+
+static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc)
+{
+ return list_entry(dwc->active_list.next, struct dw_desc, desc_node);
+}
+
+static struct dw_desc *dwc_first_queued(struct dw_dma_chan *dwc)
+{
+ return list_entry(dwc->queue.next, struct dw_desc, desc_node);
+}
+
+static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
+{
+ struct dw_desc *desc, *_desc;
+ struct dw_desc *ret = NULL;
+ unsigned int i = 0;
+
+ spin_lock_bh(&dwc->lock);
+ list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) {
+ if (async_tx_test_ack(&desc->txd)) {
+ list_del(&desc->desc_node);
+ ret = desc;
+ break;
+ }
+ dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc);
+ i++;
+ }
+ spin_unlock_bh(&dwc->lock);
+
+ dev_vdbg(chan2dev(&dwc->chan), "scanned %u descriptors on freelist\n", i);
+
+ return ret;
+}
+
+static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+ struct dw_desc *child;
+
+ list_for_each_entry(child, &desc->tx_list, desc_node)
+ dma_sync_single_for_cpu(chan2parent(&dwc->chan),
+ child->txd.phys, sizeof(child->lli),
+ DMA_TO_DEVICE);
+ dma_sync_single_for_cpu(chan2parent(&dwc->chan),
+ desc->txd.phys, sizeof(desc->lli),
+ DMA_TO_DEVICE);
+}
+
+/*
+ * Move a descriptor, including any children, to the free list.
+ * `desc' must not be on any lists.
+ */
+static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+ if (desc) {
+ struct dw_desc *child;
+
+ dwc_sync_desc_for_cpu(dwc, desc);
+
+ spin_lock_bh(&dwc->lock);
+ list_for_each_entry(child, &desc->tx_list, desc_node)
+ dev_vdbg(chan2dev(&dwc->chan),
+ "moving child desc %p to freelist\n",
+ child);
+ list_splice_init(&desc->tx_list, &dwc->free_list);
+ dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc);
+ list_add(&desc->desc_node, &dwc->free_list);
+ spin_unlock_bh(&dwc->lock);
+ }
+}
+
+/* Called with dwc->lock held and bh disabled */
+static dma_cookie_t
+dwc_assign_cookie(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+ dma_cookie_t cookie = dwc->chan.cookie;
+
+ if (++cookie < 0)
+ cookie = 1;
+
+ dwc->chan.cookie = cookie;
+ desc->txd.cookie = cookie;
+
+ return cookie;
+}
+
+/*----------------------------------------------------------------------*/
+
+/* Called with dwc->lock held and bh disabled */
+static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
+{
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+
+ /* ASSERT: channel is idle */
+ if (dma_readl(dw, CH_EN) & dwc->mask) {
+ dev_err(chan2dev(&dwc->chan),
+ "BUG: Attempted to start non-idle channel\n");
+ dev_err(chan2dev(&dwc->chan),
+ " SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
+ channel_readl(dwc, SAR),
+ channel_readl(dwc, DAR),
+ channel_readl(dwc, LLP),
+ channel_readl(dwc, CTL_HI),
+ channel_readl(dwc, CTL_LO));
+
+ /* The tasklet will hopefully advance the queue... */
+ return;
+ }
+
+ channel_writel(dwc, LLP, first->txd.phys);
+ channel_writel(dwc, CTL_LO,
+ DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+ channel_writel(dwc, CTL_HI, 0);
+ channel_set_bit(dw, CH_EN, dwc->mask);
+}
+
+/*----------------------------------------------------------------------*/
+
+static void
+dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+ dma_async_tx_callback callback;
+ void *param;
+ struct dma_async_tx_descriptor *txd = &desc->txd;
+
+ dev_vdbg(chan2dev(&dwc->chan), "descriptor %u complete\n", txd->cookie);
+
+ dwc->completed = txd->cookie;
+ callback = txd->callback;
+ param = txd->callback_param;
+
+ dwc_sync_desc_for_cpu(dwc, desc);
+ list_splice_init(&desc->tx_list, &dwc->free_list);
+ list_move(&desc->desc_node, &dwc->free_list);
+
+ if (!dwc->chan.private) {
+ struct device *parent = chan2parent(&dwc->chan);
+ if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+ dma_unmap_single(parent, desc->lli.dar,
+ desc->len, DMA_FROM_DEVICE);
+ else
+ dma_unmap_page(parent, desc->lli.dar,
+ desc->len, DMA_FROM_DEVICE);
+ }
+ if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+ dma_unmap_single(parent, desc->lli.sar,
+ desc->len, DMA_TO_DEVICE);
+ else
+ dma_unmap_page(parent, desc->lli.sar,
+ desc->len, DMA_TO_DEVICE);
+ }
+ }
+
+ /*
+ * The API requires that no submissions are done from a
+ * callback, so we don't need to drop the lock here
+ */
+ if (callback)
+ callback(param);
+}
+
+static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+ struct dw_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ if (dma_readl(dw, CH_EN) & dwc->mask) {
+ dev_err(chan2dev(&dwc->chan),
+ "BUG: XFER bit set, but channel not idle!\n");
+
+ /* Try to continue after resetting the channel... */
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+ while (dma_readl(dw, CH_EN) & dwc->mask)
+ cpu_relax();
+ }
+
+ /*
+ * Submit queued descriptors ASAP, i.e. before we go through
+ * the completed ones.
+ */
+ if (!list_empty(&dwc->queue))
+ dwc_dostart(dwc, dwc_first_queued(dwc));
+ list_splice_init(&dwc->active_list, &list);
+ list_splice_init(&dwc->queue, &dwc->active_list);
+
+ list_for_each_entry_safe(desc, _desc, &list, desc_node)
+ dwc_descriptor_complete(dwc, desc);
+}
+
+static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+ dma_addr_t llp;
+ struct dw_desc *desc, *_desc;
+ struct dw_desc *child;
+ u32 status_xfer;
+
+ /*
+ * Clear block interrupt flag before scanning so that we don't
+ * miss any, and read LLP before RAW_XFER to ensure it is
+ * valid if we decide to scan the list.
+ */
+ dma_writel(dw, CLEAR.BLOCK, dwc->mask);
+ llp = channel_readl(dwc, LLP);
+ status_xfer = dma_readl(dw, RAW.XFER);
+
+ if (status_xfer & dwc->mask) {
+ /* Everything we've submitted is done */
+ dma_writel(dw, CLEAR.XFER, dwc->mask);
+ dwc_complete_all(dw, dwc);
+ return;
+ }
+
+ dev_vdbg(chan2dev(&dwc->chan), "scan_descriptors: llp=0x%x\n", llp);
+
+ list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
+ if (desc->lli.llp == llp)
+ /* This one is currently in progress */
+ return;
+
+ list_for_each_entry(child, &desc->tx_list, desc_node)
+ if (child->lli.llp == llp)
+ /* Currently in progress */
+ return;
+
+ /*
+ * No descriptors so far seem to be in progress, i.e.
+ * this one must be done.
+ */
+ dwc_descriptor_complete(dwc, desc);
+ }
+
+ dev_err(chan2dev(&dwc->chan),
+ "BUG: All descriptors done, but channel not idle!\n");
+
+ /* Try to continue after resetting the channel... */
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+ while (dma_readl(dw, CH_EN) & dwc->mask)
+ cpu_relax();
+
+ if (!list_empty(&dwc->queue)) {
+ dwc_dostart(dwc, dwc_first_queued(dwc));
+ list_splice_init(&dwc->queue, &dwc->active_list);
+ }
+}
+
+static void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli)
+{
+ dev_printk(KERN_CRIT, chan2dev(&dwc->chan),
+ " desc: s0x%x d0x%x l0x%x c0x%x:%x\n",
+ lli->sar, lli->dar, lli->llp,
+ lli->ctlhi, lli->ctllo);
+}
+
+static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+ struct dw_desc *bad_desc;
+ struct dw_desc *child;
+
+ dwc_scan_descriptors(dw, dwc);
+
+ /*
+ * The descriptor currently at the head of the active list is
+ * borked. Since we don't have any way to report errors, we'll
+ * just have to scream loudly and try to carry on.
+ */
+ bad_desc = dwc_first_active(dwc);
+ list_del_init(&bad_desc->desc_node);
+ list_splice_init(&dwc->queue, dwc->active_list.prev);
+
+ /* Clear the error flag and try to restart the controller */
+ dma_writel(dw, CLEAR.ERROR, dwc->mask);
+ if (!list_empty(&dwc->active_list))
+ dwc_dostart(dwc, dwc_first_active(dwc));
+
+ /*
+ * KERN_CRITICAL may seem harsh, but since this only happens
+ * when someone submits a bad physical address in a
+ * descriptor, we should consider ourselves lucky that the
+ * controller flagged an error instead of scribbling over
+ * random memory locations.
+ */
+ dev_printk(KERN_CRIT, chan2dev(&dwc->chan),
+ "Bad descriptor submitted for DMA!\n");
+ dev_printk(KERN_CRIT, chan2dev(&dwc->chan),
+ " cookie: %d\n", bad_desc->txd.cookie);
+ dwc_dump_lli(dwc, &bad_desc->lli);
+ list_for_each_entry(child, &bad_desc->tx_list, desc_node)
+ dwc_dump_lli(dwc, &child->lli);
+
+ /* Pretend the descriptor completed successfully */
+ dwc_descriptor_complete(dwc, bad_desc);
+}
+
+/* --------------------- Cyclic DMA API extensions -------------------- */
+
+inline dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ return channel_readl(dwc, SAR);
+}
+EXPORT_SYMBOL(dw_dma_get_src_addr);
+
+inline dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ return channel_readl(dwc, DAR);
+}
+EXPORT_SYMBOL(dw_dma_get_dst_addr);
+
+/* called with dwc->lock held and all DMAC interrupts disabled */
+static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc,
+ u32 status_block, u32 status_err, u32 status_xfer)
+{
+ if (status_block & dwc->mask) {
+ void (*callback)(void *param);
+ void *callback_param;
+
+ dev_vdbg(chan2dev(&dwc->chan), "new cyclic period llp 0x%08x\n",
+ channel_readl(dwc, LLP));
+ dma_writel(dw, CLEAR.BLOCK, dwc->mask);
+
+ callback = dwc->cdesc->period_callback;
+ callback_param = dwc->cdesc->period_callback_param;
+ if (callback) {
+ spin_unlock(&dwc->lock);
+ callback(callback_param);
+ spin_lock(&dwc->lock);
+ }
+ }
+
+ /*
+ * Error and transfer complete are highly unlikely, and will most
+ * likely be due to a configuration error by the user.
+ */
+ if (unlikely(status_err & dwc->mask) ||
+ unlikely(status_xfer & dwc->mask)) {
+ int i;
+
+ dev_err(chan2dev(&dwc->chan), "cyclic DMA unexpected %s "
+ "interrupt, stopping DMA transfer\n",
+ status_xfer ? "xfer" : "error");
+ dev_err(chan2dev(&dwc->chan),
+ " SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
+ channel_readl(dwc, SAR),
+ channel_readl(dwc, DAR),
+ channel_readl(dwc, LLP),
+ channel_readl(dwc, CTL_HI),
+ channel_readl(dwc, CTL_LO));
+
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+ while (dma_readl(dw, CH_EN) & dwc->mask)
+ cpu_relax();
+
+ /* make sure DMA does not restart by loading a new list */
+ channel_writel(dwc, LLP, 0);
+ channel_writel(dwc, CTL_LO, 0);
+ channel_writel(dwc, CTL_HI, 0);
+
+ dma_writel(dw, CLEAR.BLOCK, dwc->mask);
+ dma_writel(dw, CLEAR.ERROR, dwc->mask);
+ dma_writel(dw, CLEAR.XFER, dwc->mask);
+
+ for (i = 0; i < dwc->cdesc->periods; i++)
+ dwc_dump_lli(dwc, &dwc->cdesc->desc[i]->lli);
+ }
+}
+
+/* ------------------------------------------------------------------------- */
+
+static void dw_dma_tasklet(unsigned long data)
+{
+ struct dw_dma *dw = (struct dw_dma *)data;
+ struct dw_dma_chan *dwc;
+ u32 status_block;
+ u32 status_xfer;
+ u32 status_err;
+ int i;
+
+ status_block = dma_readl(dw, RAW.BLOCK);
+ status_xfer = dma_readl(dw, RAW.XFER);
+ status_err = dma_readl(dw, RAW.ERROR);
+
+ dev_vdbg(dw->dma.dev, "tasklet: status_block=%x status_err=%x\n",
+ status_block, status_err);
+
+ for (i = 0; i < dw->dma.chancnt; i++) {
+ dwc = &dw->chan[i];
+ spin_lock(&dwc->lock);
+ if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags))
+ dwc_handle_cyclic(dw, dwc, status_block, status_err,
+ status_xfer);
+ else if (status_err & (1 << i))
+ dwc_handle_error(dw, dwc);
+ else if ((status_block | status_xfer) & (1 << i))
+ dwc_scan_descriptors(dw, dwc);
+ spin_unlock(&dwc->lock);
+ }
+
+ /*
+ * Re-enable interrupts. Block Complete interrupts are only
+ * enabled if the INT_EN bit in the descriptor is set. This
+ * will trigger a scan before the whole list is done.
+ */
+ channel_set_bit(dw, MASK.XFER, dw->all_chan_mask);
+ channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+ channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask);
+}
+
+static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
+{
+ struct dw_dma *dw = dev_id;
+ u32 status;
+
+ dev_vdbg(dw->dma.dev, "interrupt: status=0x%x\n",
+ dma_readl(dw, STATUS_INT));
+
+ /*
+ * Just disable the interrupts. We'll turn them back on in the
+ * softirq handler.
+ */
+ channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+ status = dma_readl(dw, STATUS_INT);
+ if (status) {
+ dev_err(dw->dma.dev,
+ "BUG: Unexpected interrupts pending: 0x%x\n",
+ status);
+
+ /* Try to recover */
+ channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1);
+ channel_clear_bit(dw, MASK.BLOCK, (1 << 8) - 1);
+ channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1);
+ channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1);
+ channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1);
+ }
+
+ tasklet_schedule(&dw->tasklet);
+
+ return IRQ_HANDLED;
+}
+
+/*----------------------------------------------------------------------*/
+
+static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct dw_desc *desc = txd_to_dw_desc(tx);
+ struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan);
+ dma_cookie_t cookie;
+
+ spin_lock_bh(&dwc->lock);
+ cookie = dwc_assign_cookie(dwc, desc);
+
+ /*
+ * REVISIT: We should attempt to chain as many descriptors as
+ * possible, perhaps even appending to those already submitted
+ * for DMA. But this is hard to do in a race-free manner.
+ */
+ if (list_empty(&dwc->active_list)) {
+ dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n",
+ desc->txd.cookie);
+ dwc_dostart(dwc, desc);
+ list_add_tail(&desc->desc_node, &dwc->active_list);
+ } else {
+ dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n",
+ desc->txd.cookie);
+
+ list_add_tail(&desc->desc_node, &dwc->queue);
+ }
+
+ spin_unlock_bh(&dwc->lock);
+
+ return cookie;
+}
+
+static struct dma_async_tx_descriptor *
+dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_desc *desc;
+ struct dw_desc *first;
+ struct dw_desc *prev;
+ size_t xfer_count;
+ size_t offset;
+ unsigned int src_width;
+ unsigned int dst_width;
+ u32 ctllo;
+
+ dev_vdbg(chan2dev(chan), "prep_dma_memcpy d0x%x s0x%x l0x%zx f0x%lx\n",
+ dest, src, len, flags);
+
+ if (unlikely(!len)) {
+ dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
+ return NULL;
+ }
+
+ /*
+ * We can be a lot more clever here, but this should take care
+ * of the most common optimization.
+ */
+ if (!((src | dest | len) & 3))
+ src_width = dst_width = 2;
+ else if (!((src | dest | len) & 1))
+ src_width = dst_width = 1;
+ else
+ src_width = dst_width = 0;
+
+ ctllo = DWC_DEFAULT_CTLLO
+ | DWC_CTLL_DST_WIDTH(dst_width)
+ | DWC_CTLL_SRC_WIDTH(src_width)
+ | DWC_CTLL_DST_INC
+ | DWC_CTLL_SRC_INC
+ | DWC_CTLL_FC_M2M;
+ prev = first = NULL;
+
+ for (offset = 0; offset < len; offset += xfer_count << src_width) {
+ xfer_count = min_t(size_t, (len - offset) >> src_width,
+ DWC_MAX_COUNT);
+
+ desc = dwc_desc_get(dwc);
+ if (!desc)
+ goto err_desc_get;
+
+ desc->lli.sar = src + offset;
+ desc->lli.dar = dest + offset;
+ desc->lli.ctllo = ctllo;
+ desc->lli.ctlhi = xfer_count;
+
+ if (!first) {
+ first = desc;
+ } else {
+ prev->lli.llp = desc->txd.phys;
+ dma_sync_single_for_device(chan2parent(chan),
+ prev->txd.phys, sizeof(prev->lli),
+ DMA_TO_DEVICE);
+ list_add_tail(&desc->desc_node,
+ &first->tx_list);
+ }
+ prev = desc;
+ }
+
+
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Trigger interrupt after last block */
+ prev->lli.ctllo |= DWC_CTLL_INT_EN;
+
+ prev->lli.llp = 0;
+ dma_sync_single_for_device(chan2parent(chan),
+ prev->txd.phys, sizeof(prev->lli),
+ DMA_TO_DEVICE);
+
+ first->txd.flags = flags;
+ first->len = len;
+
+ return &first->txd;
+
+err_desc_get:
+ dwc_desc_put(dwc, first);
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_data_direction direction,
+ unsigned long flags)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma_slave *dws = chan->private;
+ struct dw_desc *prev;
+ struct dw_desc *first;
+ u32 ctllo;
+ dma_addr_t reg;
+ unsigned int reg_width;
+ unsigned int mem_width;
+ unsigned int i;
+ struct scatterlist *sg;
+ size_t total_len = 0;
+
+ dev_vdbg(chan2dev(chan), "prep_dma_slave\n");
+
+ if (unlikely(!dws || !sg_len))
+ return NULL;
+
+ reg_width = dws->reg_width;
+ prev = first = NULL;
+
+ switch (direction) {
+ case DMA_TO_DEVICE:
+ ctllo = (DWC_DEFAULT_CTLLO
+ | DWC_CTLL_DST_WIDTH(reg_width)
+ | DWC_CTLL_DST_FIX
+ | DWC_CTLL_SRC_INC
+ | DWC_CTLL_FC_M2P);
+ reg = dws->tx_reg;
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct dw_desc *desc;
+ u32 len;
+ u32 mem;
+
+ desc = dwc_desc_get(dwc);
+ if (!desc) {
+ dev_err(chan2dev(chan),
+ "not enough descriptors available\n");
+ goto err_desc_get;
+ }
+
+ mem = sg_phys(sg);
+ len = sg_dma_len(sg);
+ mem_width = 2;
+ if (unlikely(mem & 3 || len & 3))
+ mem_width = 0;
+
+ desc->lli.sar = mem;
+ desc->lli.dar = reg;
+ desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width);
+ desc->lli.ctlhi = len >> mem_width;
+
+ if (!first) {
+ first = desc;
+ } else {
+ prev->lli.llp = desc->txd.phys;
+ dma_sync_single_for_device(chan2parent(chan),
+ prev->txd.phys,
+ sizeof(prev->lli),
+ DMA_TO_DEVICE);
+ list_add_tail(&desc->desc_node,
+ &first->tx_list);
+ }
+ prev = desc;
+ total_len += len;
+ }
+ break;
+ case DMA_FROM_DEVICE:
+ ctllo = (DWC_DEFAULT_CTLLO
+ | DWC_CTLL_SRC_WIDTH(reg_width)
+ | DWC_CTLL_DST_INC
+ | DWC_CTLL_SRC_FIX
+ | DWC_CTLL_FC_P2M);
+
+ reg = dws->rx_reg;
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct dw_desc *desc;
+ u32 len;
+ u32 mem;
+
+ desc = dwc_desc_get(dwc);
+ if (!desc) {
+ dev_err(chan2dev(chan),
+ "not enough descriptors available\n");
+ goto err_desc_get;
+ }
+
+ mem = sg_phys(sg);
+ len = sg_dma_len(sg);
+ mem_width = 2;
+ if (unlikely(mem & 3 || len & 3))
+ mem_width = 0;
+
+ desc->lli.sar = reg;
+ desc->lli.dar = mem;
+ desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width);
+ desc->lli.ctlhi = len >> reg_width;
+
+ if (!first) {
+ first = desc;
+ } else {
+ prev->lli.llp = desc->txd.phys;
+ dma_sync_single_for_device(chan2parent(chan),
+ prev->txd.phys,
+ sizeof(prev->lli),
+ DMA_TO_DEVICE);
+ list_add_tail(&desc->desc_node,
+ &first->tx_list);
+ }
+ prev = desc;
+ total_len += len;
+ }
+ break;
+ default:
+ return NULL;
+ }
+
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Trigger interrupt after last block */
+ prev->lli.ctllo |= DWC_CTLL_INT_EN;
+
+ prev->lli.llp = 0;
+ dma_sync_single_for_device(chan2parent(chan),
+ prev->txd.phys, sizeof(prev->lli),
+ DMA_TO_DEVICE);
+
+ first->len = total_len;
+
+ return &first->txd;
+
+err_desc_get:
+ dwc_desc_put(dwc, first);
+ return NULL;
+}
+
+static void dwc_terminate_all(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+ struct dw_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ /*
+ * This is only called when something went wrong elsewhere, so
+ * we don't really care about the data. Just disable the
+ * channel. We still have to poll the channel enable bit due
+ * to AHB/HSB limitations.
+ */
+ spin_lock_bh(&dwc->lock);
+
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+
+ while (dma_readl(dw, CH_EN) & dwc->mask)
+ cpu_relax();
+
+ /* active_list entries will end up before queued entries */
+ list_splice_init(&dwc->queue, &list);
+ list_splice_init(&dwc->active_list, &list);
+
+ spin_unlock_bh(&dwc->lock);
+
+ /* Flush all pending and queued descriptors */
+ list_for_each_entry_safe(desc, _desc, &list, desc_node)
+ dwc_descriptor_complete(dwc, desc);
+}
+
+static enum dma_status
+dwc_is_tx_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+ int ret;
+
+ last_complete = dwc->completed;
+ last_used = chan->cookie;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ if (ret != DMA_SUCCESS) {
+ dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+
+ last_complete = dwc->completed;
+ last_used = chan->cookie;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ }
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return ret;
+}
+
+static void dwc_issue_pending(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+
+ spin_lock_bh(&dwc->lock);
+ if (!list_empty(&dwc->queue))
+ dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+ spin_unlock_bh(&dwc->lock);
+}
+
+static int dwc_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+ struct dw_desc *desc;
+ struct dw_dma_slave *dws;
+ int i;
+ u32 cfghi;
+ u32 cfglo;
+
+ dev_vdbg(chan2dev(chan), "alloc_chan_resources\n");
+
+ /* ASSERT: channel is idle */
+ if (dma_readl(dw, CH_EN) & dwc->mask) {
+ dev_dbg(chan2dev(chan), "DMA channel not idle?\n");
+ return -EIO;
+ }
+
+ dwc->completed = chan->cookie = 1;
+
+ cfghi = DWC_CFGH_FIFO_MODE;
+ cfglo = 0;
+
+ dws = chan->private;
+ if (dws) {
+ /*
+ * We need controller-specific data to set up slave
+ * transfers.
+ */
+ BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev);
+
+ cfghi = dws->cfg_hi;
+ cfglo = dws->cfg_lo;
+ }
+ channel_writel(dwc, CFG_LO, cfglo);
+ channel_writel(dwc, CFG_HI, cfghi);
+
+ /*
+ * NOTE: some controllers may have additional features that we
+ * need to initialize here, like "scatter-gather" (which
+ * doesn't mean what you think it means), and status writeback.
+ */
+
+ spin_lock_bh(&dwc->lock);
+ i = dwc->descs_allocated;
+ while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) {
+ spin_unlock_bh(&dwc->lock);
+
+ desc = kzalloc(sizeof(struct dw_desc), GFP_KERNEL);
+ if (!desc) {
+ dev_info(chan2dev(chan),
+ "only allocated %d descriptors\n", i);
+ spin_lock_bh(&dwc->lock);
+ break;
+ }
+
+ INIT_LIST_HEAD(&desc->tx_list);
+ dma_async_tx_descriptor_init(&desc->txd, chan);
+ desc->txd.tx_submit = dwc_tx_submit;
+ desc->txd.flags = DMA_CTRL_ACK;
+ desc->txd.phys = dma_map_single(chan2parent(chan), &desc->lli,
+ sizeof(desc->lli), DMA_TO_DEVICE);
+ dwc_desc_put(dwc, desc);
+
+ spin_lock_bh(&dwc->lock);
+ i = ++dwc->descs_allocated;
+ }
+
+ /* Enable interrupts */
+ channel_set_bit(dw, MASK.XFER, dwc->mask);
+ channel_set_bit(dw, MASK.BLOCK, dwc->mask);
+ channel_set_bit(dw, MASK.ERROR, dwc->mask);
+
+ spin_unlock_bh(&dwc->lock);
+
+ dev_dbg(chan2dev(chan),
+ "alloc_chan_resources allocated %d descriptors\n", i);
+
+ return i;
+}
+
+static void dwc_free_chan_resources(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+ struct dw_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ dev_dbg(chan2dev(chan), "free_chan_resources (descs allocated=%u)\n",
+ dwc->descs_allocated);
+
+ /* ASSERT: channel is idle */
+ BUG_ON(!list_empty(&dwc->active_list));
+ BUG_ON(!list_empty(&dwc->queue));
+ BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask);
+
+ spin_lock_bh(&dwc->lock);
+ list_splice_init(&dwc->free_list, &list);
+ dwc->descs_allocated = 0;
+
+ /* Disable interrupts */
+ channel_clear_bit(dw, MASK.XFER, dwc->mask);
+ channel_clear_bit(dw, MASK.BLOCK, dwc->mask);
+ channel_clear_bit(dw, MASK.ERROR, dwc->mask);
+
+ spin_unlock_bh(&dwc->lock);
+
+ list_for_each_entry_safe(desc, _desc, &list, desc_node) {
+ dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc);
+ dma_unmap_single(chan2parent(chan), desc->txd.phys,
+ sizeof(desc->lli), DMA_TO_DEVICE);
+ kfree(desc);
+ }
+
+ dev_vdbg(chan2dev(chan), "free_chan_resources done\n");
+}
+
+/* --------------------- Cyclic DMA API extensions -------------------- */
+
+/**
+ * dw_dma_cyclic_start - start the cyclic DMA transfer
+ * @chan: the DMA channel to start
+ *
+ * Must be called with soft interrupts disabled. Returns zero on success or
+ * -errno on failure.
+ */
+int dw_dma_cyclic_start(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+
+ if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) {
+ dev_err(chan2dev(&dwc->chan), "missing prep for cyclic DMA\n");
+ return -ENODEV;
+ }
+
+ spin_lock(&dwc->lock);
+
+ /* assert channel is idle */
+ if (dma_readl(dw, CH_EN) & dwc->mask) {
+ dev_err(chan2dev(&dwc->chan),
+ "BUG: Attempted to start non-idle channel\n");
+ dev_err(chan2dev(&dwc->chan),
+ " SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
+ channel_readl(dwc, SAR),
+ channel_readl(dwc, DAR),
+ channel_readl(dwc, LLP),
+ channel_readl(dwc, CTL_HI),
+ channel_readl(dwc, CTL_LO));
+ spin_unlock(&dwc->lock);
+ return -EBUSY;
+ }
+
+ dma_writel(dw, CLEAR.BLOCK, dwc->mask);
+ dma_writel(dw, CLEAR.ERROR, dwc->mask);
+ dma_writel(dw, CLEAR.XFER, dwc->mask);
+
+ /* setup DMAC channel registers */
+ channel_writel(dwc, LLP, dwc->cdesc->desc[0]->txd.phys);
+ channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+ channel_writel(dwc, CTL_HI, 0);
+
+ channel_set_bit(dw, CH_EN, dwc->mask);
+
+ spin_unlock(&dwc->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(dw_dma_cyclic_start);
+
+/**
+ * dw_dma_cyclic_stop - stop the cyclic DMA transfer
+ * @chan: the DMA channel to stop
+ *
+ * Must be called with soft interrupts disabled.
+ */
+void dw_dma_cyclic_stop(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+
+ spin_lock(&dwc->lock);
+
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+ while (dma_readl(dw, CH_EN) & dwc->mask)
+ cpu_relax();
+
+ spin_unlock(&dwc->lock);
+}
+EXPORT_SYMBOL(dw_dma_cyclic_stop);
+
+/**
+ * dw_dma_cyclic_prep - prepare the cyclic DMA transfer
+ * @chan: the DMA channel to prepare
+ * @buf_addr: physical DMA address where the buffer starts
+ * @buf_len: total number of bytes for the entire buffer
+ * @period_len: number of bytes for each period
+ * @direction: transfer direction, to or from device
+ *
+ * Must be called before trying to start the transfer. Returns a valid struct
+ * dw_cyclic_desc if successful or an ERR_PTR(-errno) if not successful.
+ */
+struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
+ dma_addr_t buf_addr, size_t buf_len, size_t period_len,
+ enum dma_data_direction direction)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_cyclic_desc *cdesc;
+ struct dw_cyclic_desc *retval = NULL;
+ struct dw_desc *desc;
+ struct dw_desc *last = NULL;
+ struct dw_dma_slave *dws = chan->private;
+ unsigned long was_cyclic;
+ unsigned int reg_width;
+ unsigned int periods;
+ unsigned int i;
+
+ spin_lock_bh(&dwc->lock);
+ if (!list_empty(&dwc->queue) || !list_empty(&dwc->active_list)) {
+ spin_unlock_bh(&dwc->lock);
+ dev_dbg(chan2dev(&dwc->chan),
+ "queue and/or active list are not empty\n");
+ return ERR_PTR(-EBUSY);
+ }
+
+ was_cyclic = test_and_set_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
+ spin_unlock_bh(&dwc->lock);
+ if (was_cyclic) {
+ dev_dbg(chan2dev(&dwc->chan),
+ "channel already prepared for cyclic DMA\n");
+ return ERR_PTR(-EBUSY);
+ }
+
+ retval = ERR_PTR(-EINVAL);
+ reg_width = dws->reg_width;
+ periods = buf_len / period_len;
+
+ /* Check for too big/unaligned periods and unaligned DMA buffer. */
+ if (period_len > (DWC_MAX_COUNT << reg_width))
+ goto out_err;
+ if (unlikely(period_len & ((1 << reg_width) - 1)))
+ goto out_err;
+ if (unlikely(buf_addr & ((1 << reg_width) - 1)))
+ goto out_err;
+ if (unlikely(!(direction & (DMA_TO_DEVICE | DMA_FROM_DEVICE))))
+ goto out_err;
+
+ retval = ERR_PTR(-ENOMEM);
+
+ if (periods > NR_DESCS_PER_CHANNEL)
+ goto out_err;
+
+ cdesc = kzalloc(sizeof(struct dw_cyclic_desc), GFP_KERNEL);
+ if (!cdesc)
+ goto out_err;
+
+ cdesc->desc = kzalloc(sizeof(struct dw_desc *) * periods, GFP_KERNEL);
+ if (!cdesc->desc)
+ goto out_err_alloc;
+
+ for (i = 0; i < periods; i++) {
+ desc = dwc_desc_get(dwc);
+ if (!desc)
+ goto out_err_desc_get;
+
+ switch (direction) {
+ case DMA_TO_DEVICE:
+ desc->lli.dar = dws->tx_reg;
+ desc->lli.sar = buf_addr + (period_len * i);
+ desc->lli.ctllo = (DWC_DEFAULT_CTLLO
+ | DWC_CTLL_DST_WIDTH(reg_width)
+ | DWC_CTLL_SRC_WIDTH(reg_width)
+ | DWC_CTLL_DST_FIX
+ | DWC_CTLL_SRC_INC
+ | DWC_CTLL_FC_M2P
+ | DWC_CTLL_INT_EN);
+ break;
+ case DMA_FROM_DEVICE:
+ desc->lli.dar = buf_addr + (period_len * i);
+ desc->lli.sar = dws->rx_reg;
+ desc->lli.ctllo = (DWC_DEFAULT_CTLLO
+ | DWC_CTLL_SRC_WIDTH(reg_width)
+ | DWC_CTLL_DST_WIDTH(reg_width)
+ | DWC_CTLL_DST_INC
+ | DWC_CTLL_SRC_FIX
+ | DWC_CTLL_FC_P2M
+ | DWC_CTLL_INT_EN);
+ break;
+ default:
+ break;
+ }
+
+ desc->lli.ctlhi = (period_len >> reg_width);
+ cdesc->desc[i] = desc;
+
+ if (last) {
+ last->lli.llp = desc->txd.phys;
+ dma_sync_single_for_device(chan2parent(chan),
+ last->txd.phys, sizeof(last->lli),
+ DMA_TO_DEVICE);
+ }
+
+ last = desc;
+ }
+
+ /* lets make a cyclic list */
+ last->lli.llp = cdesc->desc[0]->txd.phys;
+ dma_sync_single_for_device(chan2parent(chan), last->txd.phys,
+ sizeof(last->lli), DMA_TO_DEVICE);
+
+ dev_dbg(chan2dev(&dwc->chan), "cyclic prepared buf 0x%08x len %zu "
+ "period %zu periods %d\n", buf_addr, buf_len,
+ period_len, periods);
+
+ cdesc->periods = periods;
+ dwc->cdesc = cdesc;
+
+ return cdesc;
+
+out_err_desc_get:
+ while (i--)
+ dwc_desc_put(dwc, cdesc->desc[i]);
+out_err_alloc:
+ kfree(cdesc);
+out_err:
+ clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
+ return (struct dw_cyclic_desc *)retval;
+}
+EXPORT_SYMBOL(dw_dma_cyclic_prep);
+
+/**
+ * dw_dma_cyclic_free - free a prepared cyclic DMA transfer
+ * @chan: the DMA channel to free
+ */
+void dw_dma_cyclic_free(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+ struct dw_cyclic_desc *cdesc = dwc->cdesc;
+ int i;
+
+ dev_dbg(chan2dev(&dwc->chan), "cyclic free\n");
+
+ if (!cdesc)
+ return;
+
+ spin_lock_bh(&dwc->lock);
+
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+ while (dma_readl(dw, CH_EN) & dwc->mask)
+ cpu_relax();
+
+ dma_writel(dw, CLEAR.BLOCK, dwc->mask);
+ dma_writel(dw, CLEAR.ERROR, dwc->mask);
+ dma_writel(dw, CLEAR.XFER, dwc->mask);
+
+ spin_unlock_bh(&dwc->lock);
+
+ for (i = 0; i < cdesc->periods; i++)
+ dwc_desc_put(dwc, cdesc->desc[i]);
+
+ kfree(cdesc->desc);
+ kfree(cdesc);
+
+ clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
+}
+EXPORT_SYMBOL(dw_dma_cyclic_free);
+
+/*----------------------------------------------------------------------*/
+
+static void dw_dma_off(struct dw_dma *dw)
+{
+ dma_writel(dw, CFG, 0);
+
+ channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+ while (dma_readl(dw, CFG) & DW_CFG_DMA_EN)
+ cpu_relax();
+}
+
+static int __init dw_probe(struct platform_device *pdev)
+{
+ struct dw_dma_platform_data *pdata;
+ struct resource *io;
+ struct dw_dma *dw;
+ size_t size;
+ int irq;
+ int err;
+ int i;
+
+ pdata = pdev->dev.platform_data;
+ if (!pdata || pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS)
+ return -EINVAL;
+
+ io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!io)
+ return -EINVAL;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ size = sizeof(struct dw_dma);
+ size += pdata->nr_channels * sizeof(struct dw_dma_chan);
+ dw = kzalloc(size, GFP_KERNEL);
+ if (!dw)
+ return -ENOMEM;
+
+ if (!request_mem_region(io->start, DW_REGLEN, pdev->dev.driver->name)) {
+ err = -EBUSY;
+ goto err_kfree;
+ }
+
+ dw->regs = ioremap(io->start, DW_REGLEN);
+ if (!dw->regs) {
+ err = -ENOMEM;
+ goto err_release_r;
+ }
+
+ dw->clk = clk_get(&pdev->dev, "hclk");
+ if (IS_ERR(dw->clk)) {
+ err = PTR_ERR(dw->clk);
+ goto err_clk;
+ }
+ clk_enable(dw->clk);
+
+ /* force dma off, just in case */
+ dw_dma_off(dw);
+
+ err = request_irq(irq, dw_dma_interrupt, 0, "dw_dmac", dw);
+ if (err)
+ goto err_irq;
+
+ platform_set_drvdata(pdev, dw);
+
+ tasklet_init(&dw->tasklet, dw_dma_tasklet, (unsigned long)dw);
+
+ dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
+
+ INIT_LIST_HEAD(&dw->dma.channels);
+ for (i = 0; i < pdata->nr_channels; i++, dw->dma.chancnt++) {
+ struct dw_dma_chan *dwc = &dw->chan[i];
+
+ dwc->chan.device = &dw->dma;
+ dwc->chan.cookie = dwc->completed = 1;
+ dwc->chan.chan_id = i;
+ list_add_tail(&dwc->chan.device_node, &dw->dma.channels);
+
+ dwc->ch_regs = &__dw_regs(dw)->CHAN[i];
+ spin_lock_init(&dwc->lock);
+ dwc->mask = 1 << i;
+
+ INIT_LIST_HEAD(&dwc->active_list);
+ INIT_LIST_HEAD(&dwc->queue);
+ INIT_LIST_HEAD(&dwc->free_list);
+
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+ }
+
+ /* Clear/disable all interrupts on all channels. */
+ dma_writel(dw, CLEAR.XFER, dw->all_chan_mask);
+ dma_writel(dw, CLEAR.BLOCK, dw->all_chan_mask);
+ dma_writel(dw, CLEAR.SRC_TRAN, dw->all_chan_mask);
+ dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask);
+ dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask);
+
+ channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+ dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+ dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
+ dw->dma.dev = &pdev->dev;
+ dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources;
+ dw->dma.device_free_chan_resources = dwc_free_chan_resources;
+
+ dw->dma.device_prep_dma_memcpy = dwc_prep_dma_memcpy;
+
+ dw->dma.device_prep_slave_sg = dwc_prep_slave_sg;
+ dw->dma.device_terminate_all = dwc_terminate_all;
+
+ dw->dma.device_is_tx_complete = dwc_is_tx_complete;
+ dw->dma.device_issue_pending = dwc_issue_pending;
+
+ dma_writel(dw, CFG, DW_CFG_DMA_EN);
+
+ printk(KERN_INFO "%s: DesignWare DMA Controller, %d channels\n",
+ dev_name(&pdev->dev), dw->dma.chancnt);
+
+ dma_async_device_register(&dw->dma);
+
+ return 0;
+
+err_irq:
+ clk_disable(dw->clk);
+ clk_put(dw->clk);
+err_clk:
+ iounmap(dw->regs);
+ dw->regs = NULL;
+err_release_r:
+ release_resource(io);
+err_kfree:
+ kfree(dw);
+ return err;
+}
+
+static int __exit dw_remove(struct platform_device *pdev)
+{
+ struct dw_dma *dw = platform_get_drvdata(pdev);
+ struct dw_dma_chan *dwc, *_dwc;
+ struct resource *io;
+
+ dw_dma_off(dw);
+ dma_async_device_unregister(&dw->dma);
+
+ free_irq(platform_get_irq(pdev, 0), dw);
+ tasklet_kill(&dw->tasklet);
+
+ list_for_each_entry_safe(dwc, _dwc, &dw->dma.channels,
+ chan.device_node) {
+ list_del(&dwc->chan.device_node);
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+ }
+
+ clk_disable(dw->clk);
+ clk_put(dw->clk);
+
+ iounmap(dw->regs);
+ dw->regs = NULL;
+
+ io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ release_mem_region(io->start, DW_REGLEN);
+
+ kfree(dw);
+
+ return 0;
+}
+
+static void dw_shutdown(struct platform_device *pdev)
+{
+ struct dw_dma *dw = platform_get_drvdata(pdev);
+
+ dw_dma_off(platform_get_drvdata(pdev));
+ clk_disable(dw->clk);
+}
+
+static int dw_suspend_noirq(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct dw_dma *dw = platform_get_drvdata(pdev);
+
+ dw_dma_off(platform_get_drvdata(pdev));
+ clk_disable(dw->clk);
+ return 0;
+}
+
+static int dw_resume_noirq(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct dw_dma *dw = platform_get_drvdata(pdev);
+
+ clk_enable(dw->clk);
+ dma_writel(dw, CFG, DW_CFG_DMA_EN);
+ return 0;
+}
+
+static const struct dev_pm_ops dw_dev_pm_ops = {
+ .suspend_noirq = dw_suspend_noirq,
+ .resume_noirq = dw_resume_noirq,
+};
+
+static struct platform_driver dw_driver = {
+ .remove = __exit_p(dw_remove),
+ .shutdown = dw_shutdown,
+ .driver = {
+ .name = "dw_dmac",
+ .pm = &dw_dev_pm_ops,
+ },
+};
+
+static int __init dw_init(void)
+{
+ return platform_driver_probe(&dw_driver, dw_probe);
+}
+module_init(dw_init);
+
+static void __exit dw_exit(void)
+{
+ platform_driver_unregister(&dw_driver);
+}
+module_exit(dw_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller driver");
+MODULE_AUTHOR("Haavard Skinnemoen <haavard.skinnemoen@atmel.com>");
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
new file mode 100644
index 00000000000..d9a939f67f4
--- /dev/null
+++ b/drivers/dma/dw_dmac_regs.h
@@ -0,0 +1,229 @@
+/*
+ * Driver for the Synopsys DesignWare AHB DMA Controller
+ *
+ * Copyright (C) 2005-2007 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/dw_dmac.h>
+
+#define DW_DMA_MAX_NR_CHANNELS 8
+
+/*
+ * Redefine this macro to handle differences between 32- and 64-bit
+ * addressing, big vs. little endian, etc.
+ */
+#define DW_REG(name) u32 name; u32 __pad_##name
+
+/* Hardware register definitions. */
+struct dw_dma_chan_regs {
+ DW_REG(SAR); /* Source Address Register */
+ DW_REG(DAR); /* Destination Address Register */
+ DW_REG(LLP); /* Linked List Pointer */
+ u32 CTL_LO; /* Control Register Low */
+ u32 CTL_HI; /* Control Register High */
+ DW_REG(SSTAT);
+ DW_REG(DSTAT);
+ DW_REG(SSTATAR);
+ DW_REG(DSTATAR);
+ u32 CFG_LO; /* Configuration Register Low */
+ u32 CFG_HI; /* Configuration Register High */
+ DW_REG(SGR);
+ DW_REG(DSR);
+};
+
+struct dw_dma_irq_regs {
+ DW_REG(XFER);
+ DW_REG(BLOCK);
+ DW_REG(SRC_TRAN);
+ DW_REG(DST_TRAN);
+ DW_REG(ERROR);
+};
+
+struct dw_dma_regs {
+ /* per-channel registers */
+ struct dw_dma_chan_regs CHAN[DW_DMA_MAX_NR_CHANNELS];
+
+ /* irq handling */
+ struct dw_dma_irq_regs RAW; /* r */
+ struct dw_dma_irq_regs STATUS; /* r (raw & mask) */
+ struct dw_dma_irq_regs MASK; /* rw (set = irq enabled) */
+ struct dw_dma_irq_regs CLEAR; /* w (ack, affects "raw") */
+
+ DW_REG(STATUS_INT); /* r */
+
+ /* software handshaking */
+ DW_REG(REQ_SRC);
+ DW_REG(REQ_DST);
+ DW_REG(SGL_REQ_SRC);
+ DW_REG(SGL_REQ_DST);
+ DW_REG(LAST_SRC);
+ DW_REG(LAST_DST);
+
+ /* miscellaneous */
+ DW_REG(CFG);
+ DW_REG(CH_EN);
+ DW_REG(ID);
+ DW_REG(TEST);
+
+ /* optional encoded params, 0x3c8..0x3 */
+};
+
+/* Bitfields in CTL_LO */
+#define DWC_CTLL_INT_EN (1 << 0) /* irqs enabled? */
+#define DWC_CTLL_DST_WIDTH(n) ((n)<<1) /* bytes per element */
+#define DWC_CTLL_SRC_WIDTH(n) ((n)<<4)
+#define DWC_CTLL_DST_INC (0<<7) /* DAR update/not */
+#define DWC_CTLL_DST_DEC (1<<7)
+#define DWC_CTLL_DST_FIX (2<<7)
+#define DWC_CTLL_SRC_INC (0<<7) /* SAR update/not */
+#define DWC_CTLL_SRC_DEC (1<<9)
+#define DWC_CTLL_SRC_FIX (2<<9)
+#define DWC_CTLL_DST_MSIZE(n) ((n)<<11) /* burst, #elements */
+#define DWC_CTLL_SRC_MSIZE(n) ((n)<<14)
+#define DWC_CTLL_S_GATH_EN (1 << 17) /* src gather, !FIX */
+#define DWC_CTLL_D_SCAT_EN (1 << 18) /* dst scatter, !FIX */
+#define DWC_CTLL_FC_M2M (0 << 20) /* mem-to-mem */
+#define DWC_CTLL_FC_M2P (1 << 20) /* mem-to-periph */
+#define DWC_CTLL_FC_P2M (2 << 20) /* periph-to-mem */
+#define DWC_CTLL_FC_P2P (3 << 20) /* periph-to-periph */
+/* plus 4 transfer types for peripheral-as-flow-controller */
+#define DWC_CTLL_DMS(n) ((n)<<23) /* dst master select */
+#define DWC_CTLL_SMS(n) ((n)<<25) /* src master select */
+#define DWC_CTLL_LLP_D_EN (1 << 27) /* dest block chain */
+#define DWC_CTLL_LLP_S_EN (1 << 28) /* src block chain */
+
+/* Bitfields in CTL_HI */
+#define DWC_CTLH_DONE 0x00001000
+#define DWC_CTLH_BLOCK_TS_MASK 0x00000fff
+
+/* Bitfields in CFG_LO. Platform-configurable bits are in <linux/dw_dmac.h> */
+#define DWC_CFGL_CH_SUSP (1 << 8) /* pause xfer */
+#define DWC_CFGL_FIFO_EMPTY (1 << 9) /* pause xfer */
+#define DWC_CFGL_HS_DST (1 << 10) /* handshake w/dst */
+#define DWC_CFGL_HS_SRC (1 << 11) /* handshake w/src */
+#define DWC_CFGL_MAX_BURST(x) ((x) << 20)
+#define DWC_CFGL_RELOAD_SAR (1 << 30)
+#define DWC_CFGL_RELOAD_DAR (1 << 31)
+
+/* Bitfields in CFG_HI. Platform-configurable bits are in <linux/dw_dmac.h> */
+#define DWC_CFGH_DS_UPD_EN (1 << 5)
+#define DWC_CFGH_SS_UPD_EN (1 << 6)
+
+/* Bitfields in SGR */
+#define DWC_SGR_SGI(x) ((x) << 0)
+#define DWC_SGR_SGC(x) ((x) << 20)
+
+/* Bitfields in DSR */
+#define DWC_DSR_DSI(x) ((x) << 0)
+#define DWC_DSR_DSC(x) ((x) << 20)
+
+/* Bitfields in CFG */
+#define DW_CFG_DMA_EN (1 << 0)
+
+#define DW_REGLEN 0x400
+
+enum dw_dmac_flags {
+ DW_DMA_IS_CYCLIC = 0,
+};
+
+struct dw_dma_chan {
+ struct dma_chan chan;
+ void __iomem *ch_regs;
+ u8 mask;
+
+ spinlock_t lock;
+
+ /* these other elements are all protected by lock */
+ unsigned long flags;
+ dma_cookie_t completed;
+ struct list_head active_list;
+ struct list_head queue;
+ struct list_head free_list;
+ struct dw_cyclic_desc *cdesc;
+
+ unsigned int descs_allocated;
+};
+
+static inline struct dw_dma_chan_regs __iomem *
+__dwc_regs(struct dw_dma_chan *dwc)
+{
+ return dwc->ch_regs;
+}
+
+#define channel_readl(dwc, name) \
+ __raw_readl(&(__dwc_regs(dwc)->name))
+#define channel_writel(dwc, name, val) \
+ __raw_writel((val), &(__dwc_regs(dwc)->name))
+
+static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct dw_dma_chan, chan);
+}
+
+struct dw_dma {
+ struct dma_device dma;
+ void __iomem *regs;
+ struct tasklet_struct tasklet;
+ struct clk *clk;
+
+ u8 all_chan_mask;
+
+ struct dw_dma_chan chan[0];
+};
+
+static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw)
+{
+ return dw->regs;
+}
+
+#define dma_readl(dw, name) \
+ __raw_readl(&(__dw_regs(dw)->name))
+#define dma_writel(dw, name, val) \
+ __raw_writel((val), &(__dw_regs(dw)->name))
+
+#define channel_set_bit(dw, reg, mask) \
+ dma_writel(dw, reg, ((mask) << 8) | (mask))
+#define channel_clear_bit(dw, reg, mask) \
+ dma_writel(dw, reg, ((mask) << 8) | 0)
+
+static inline struct dw_dma *to_dw_dma(struct dma_device *ddev)
+{
+ return container_of(ddev, struct dw_dma, dma);
+}
+
+/* LLI == Linked List Item; a.k.a. DMA block descriptor */
+struct dw_lli {
+ /* values that are not changed by hardware */
+ dma_addr_t sar;
+ dma_addr_t dar;
+ dma_addr_t llp; /* chain to next lli */
+ u32 ctllo;
+ /* values that may get written back: */
+ u32 ctlhi;
+ /* sstat and dstat can snapshot peripheral register state.
+ * silicon config may discard either or both...
+ */
+ u32 sstat;
+ u32 dstat;
+};
+
+struct dw_desc {
+ /* FIRST values the hardware uses */
+ struct dw_lli lli;
+
+ /* THEN values for driver housekeeping */
+ struct list_head desc_node;
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor txd;
+ size_t len;
+};
+
+static inline struct dw_desc *
+txd_to_dw_desc(struct dma_async_tx_descriptor *txd)
+{
+ return container_of(txd, struct dw_desc, txd);
+}
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
new file mode 100644
index 00000000000..296f9e747fa
--- /dev/null
+++ b/drivers/dma/fsldma.c
@@ -0,0 +1,1312 @@
+/*
+ * Freescale MPC85xx, MPC83xx DMA Engine support
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author:
+ * Zhang Wei <wei.zhang@freescale.com>, Jul 2007
+ * Ebony Zhu <ebony.zhu@freescale.com>, May 2007
+ *
+ * Description:
+ * DMA engine driver for Freescale MPC8540 DMA controller, which is
+ * also fit for MPC8560, MPC8555, MPC8548, MPC8641, and etc.
+ * The support for MPC8349 DMA contorller is also added.
+ *
+ * This driver instructs the DMA controller to issue the PCI Read Multiple
+ * command for PCI read operations, instead of using the default PCI Read Line
+ * command. Please be aware that this setting may result in read pre-fetching
+ * on some platforms.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/of_platform.h>
+
+#include <asm/fsldma.h>
+#include "fsldma.h"
+
+static void dma_init(struct fsl_dma_chan *fsl_chan)
+{
+ /* Reset the channel */
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, 0, 32);
+
+ switch (fsl_chan->feature & FSL_DMA_IP_MASK) {
+ case FSL_DMA_IP_85XX:
+ /* Set the channel to below modes:
+ * EIE - Error interrupt enable
+ * EOSIE - End of segments interrupt enable (basic mode)
+ * EOLNIE - End of links interrupt enable
+ */
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, FSL_DMA_MR_EIE
+ | FSL_DMA_MR_EOLNIE | FSL_DMA_MR_EOSIE, 32);
+ break;
+ case FSL_DMA_IP_83XX:
+ /* Set the channel to below modes:
+ * EOTIE - End-of-transfer interrupt enable
+ * PRC_RM - PCI read multiple
+ */
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, FSL_DMA_MR_EOTIE
+ | FSL_DMA_MR_PRC_RM, 32);
+ break;
+ }
+
+}
+
+static void set_sr(struct fsl_dma_chan *fsl_chan, u32 val)
+{
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->sr, val, 32);
+}
+
+static u32 get_sr(struct fsl_dma_chan *fsl_chan)
+{
+ return DMA_IN(fsl_chan, &fsl_chan->reg_base->sr, 32);
+}
+
+static void set_desc_cnt(struct fsl_dma_chan *fsl_chan,
+ struct fsl_dma_ld_hw *hw, u32 count)
+{
+ hw->count = CPU_TO_DMA(fsl_chan, count, 32);
+}
+
+static void set_desc_src(struct fsl_dma_chan *fsl_chan,
+ struct fsl_dma_ld_hw *hw, dma_addr_t src)
+{
+ u64 snoop_bits;
+
+ snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+ ? ((u64)FSL_DMA_SATR_SREADTYPE_SNOOP_READ << 32) : 0;
+ hw->src_addr = CPU_TO_DMA(fsl_chan, snoop_bits | src, 64);
+}
+
+static void set_desc_dest(struct fsl_dma_chan *fsl_chan,
+ struct fsl_dma_ld_hw *hw, dma_addr_t dest)
+{
+ u64 snoop_bits;
+
+ snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+ ? ((u64)FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE << 32) : 0;
+ hw->dst_addr = CPU_TO_DMA(fsl_chan, snoop_bits | dest, 64);
+}
+
+static void set_desc_next(struct fsl_dma_chan *fsl_chan,
+ struct fsl_dma_ld_hw *hw, dma_addr_t next)
+{
+ u64 snoop_bits;
+
+ snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
+ ? FSL_DMA_SNEN : 0;
+ hw->next_ln_addr = CPU_TO_DMA(fsl_chan, snoop_bits | next, 64);
+}
+
+static void set_cdar(struct fsl_dma_chan *fsl_chan, dma_addr_t addr)
+{
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->cdar, addr | FSL_DMA_SNEN, 64);
+}
+
+static dma_addr_t get_cdar(struct fsl_dma_chan *fsl_chan)
+{
+ return DMA_IN(fsl_chan, &fsl_chan->reg_base->cdar, 64) & ~FSL_DMA_SNEN;
+}
+
+static void set_ndar(struct fsl_dma_chan *fsl_chan, dma_addr_t addr)
+{
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->ndar, addr, 64);
+}
+
+static dma_addr_t get_ndar(struct fsl_dma_chan *fsl_chan)
+{
+ return DMA_IN(fsl_chan, &fsl_chan->reg_base->ndar, 64);
+}
+
+static u32 get_bcr(struct fsl_dma_chan *fsl_chan)
+{
+ return DMA_IN(fsl_chan, &fsl_chan->reg_base->bcr, 32);
+}
+
+static int dma_is_idle(struct fsl_dma_chan *fsl_chan)
+{
+ u32 sr = get_sr(fsl_chan);
+ return (!(sr & FSL_DMA_SR_CB)) || (sr & FSL_DMA_SR_CH);
+}
+
+static void dma_start(struct fsl_dma_chan *fsl_chan)
+{
+ u32 mr_set = 0;
+
+ if (fsl_chan->feature & FSL_DMA_CHAN_PAUSE_EXT) {
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->bcr, 0, 32);
+ mr_set |= FSL_DMA_MR_EMP_EN;
+ } else if ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
+ & ~FSL_DMA_MR_EMP_EN, 32);
+ }
+
+ if (fsl_chan->feature & FSL_DMA_CHAN_START_EXT)
+ mr_set |= FSL_DMA_MR_EMS_EN;
+ else
+ mr_set |= FSL_DMA_MR_CS;
+
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
+ | mr_set, 32);
+}
+
+static void dma_halt(struct fsl_dma_chan *fsl_chan)
+{
+ int i;
+
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) | FSL_DMA_MR_CA,
+ 32);
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) & ~(FSL_DMA_MR_CS
+ | FSL_DMA_MR_EMS_EN | FSL_DMA_MR_CA), 32);
+
+ for (i = 0; i < 100; i++) {
+ if (dma_is_idle(fsl_chan))
+ break;
+ udelay(10);
+ }
+ if (i >= 100 && !dma_is_idle(fsl_chan))
+ dev_err(fsl_chan->dev, "DMA halt timeout!\n");
+}
+
+static void set_ld_eol(struct fsl_dma_chan *fsl_chan,
+ struct fsl_desc_sw *desc)
+{
+ u64 snoop_bits;
+
+ snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
+ ? FSL_DMA_SNEN : 0;
+
+ desc->hw.next_ln_addr = CPU_TO_DMA(fsl_chan,
+ DMA_TO_CPU(fsl_chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL
+ | snoop_bits, 64);
+}
+
+static void append_ld_queue(struct fsl_dma_chan *fsl_chan,
+ struct fsl_desc_sw *new_desc)
+{
+ struct fsl_desc_sw *queue_tail = to_fsl_desc(fsl_chan->ld_queue.prev);
+
+ if (list_empty(&fsl_chan->ld_queue))
+ return;
+
+ /* Link to the new descriptor physical address and
+ * Enable End-of-segment interrupt for
+ * the last link descriptor.
+ * (the previous node's next link descriptor)
+ *
+ * For FSL_DMA_IP_83xx, the snoop enable bit need be set.
+ */
+ queue_tail->hw.next_ln_addr = CPU_TO_DMA(fsl_chan,
+ new_desc->async_tx.phys | FSL_DMA_EOSIE |
+ (((fsl_chan->feature & FSL_DMA_IP_MASK)
+ == FSL_DMA_IP_83XX) ? FSL_DMA_SNEN : 0), 64);
+}
+
+/**
+ * fsl_chan_set_src_loop_size - Set source address hold transfer size
+ * @fsl_chan : Freescale DMA channel
+ * @size : Address loop size, 0 for disable loop
+ *
+ * The set source address hold transfer size. The source
+ * address hold or loop transfer size is when the DMA transfer
+ * data from source address (SA), if the loop size is 4, the DMA will
+ * read data from SA, SA + 1, SA + 2, SA + 3, then loop back to SA,
+ * SA + 1 ... and so on.
+ */
+static void fsl_chan_set_src_loop_size(struct fsl_dma_chan *fsl_chan, int size)
+{
+ switch (size) {
+ case 0:
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) &
+ (~FSL_DMA_MR_SAHE), 32);
+ break;
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) |
+ FSL_DMA_MR_SAHE | (__ilog2(size) << 14),
+ 32);
+ break;
+ }
+}
+
+/**
+ * fsl_chan_set_dest_loop_size - Set destination address hold transfer size
+ * @fsl_chan : Freescale DMA channel
+ * @size : Address loop size, 0 for disable loop
+ *
+ * The set destination address hold transfer size. The destination
+ * address hold or loop transfer size is when the DMA transfer
+ * data to destination address (TA), if the loop size is 4, the DMA will
+ * write data to TA, TA + 1, TA + 2, TA + 3, then loop back to TA,
+ * TA + 1 ... and so on.
+ */
+static void fsl_chan_set_dest_loop_size(struct fsl_dma_chan *fsl_chan, int size)
+{
+ switch (size) {
+ case 0:
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) &
+ (~FSL_DMA_MR_DAHE), 32);
+ break;
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) |
+ FSL_DMA_MR_DAHE | (__ilog2(size) << 16),
+ 32);
+ break;
+ }
+}
+
+/**
+ * fsl_chan_set_request_count - Set DMA Request Count for external control
+ * @fsl_chan : Freescale DMA channel
+ * @size : Number of bytes to transfer in a single request
+ *
+ * The Freescale DMA channel can be controlled by the external signal DREQ#.
+ * The DMA request count is how many bytes are allowed to transfer before
+ * pausing the channel, after which a new assertion of DREQ# resumes channel
+ * operation.
+ *
+ * A size of 0 disables external pause control. The maximum size is 1024.
+ */
+static void fsl_chan_set_request_count(struct fsl_dma_chan *fsl_chan, int size)
+{
+ BUG_ON(size > 1024);
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
+ | ((__ilog2(size) << 24) & 0x0f000000),
+ 32);
+}
+
+/**
+ * fsl_chan_toggle_ext_pause - Toggle channel external pause status
+ * @fsl_chan : Freescale DMA channel
+ * @enable : 0 is disabled, 1 is enabled.
+ *
+ * The Freescale DMA channel can be controlled by the external signal DREQ#.
+ * The DMA Request Count feature should be used in addition to this feature
+ * to set the number of bytes to transfer before pausing the channel.
+ */
+static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int enable)
+{
+ if (enable)
+ fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT;
+ else
+ fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT;
+}
+
+/**
+ * fsl_chan_toggle_ext_start - Toggle channel external start status
+ * @fsl_chan : Freescale DMA channel
+ * @enable : 0 is disabled, 1 is enabled.
+ *
+ * If enable the external start, the channel can be started by an
+ * external DMA start pin. So the dma_start() does not start the
+ * transfer immediately. The DMA channel will wait for the
+ * control pin asserted.
+ */
+static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable)
+{
+ if (enable)
+ fsl_chan->feature |= FSL_DMA_CHAN_START_EXT;
+ else
+ fsl_chan->feature &= ~FSL_DMA_CHAN_START_EXT;
+}
+
+static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan);
+ struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
+ struct fsl_desc_sw *child;
+ unsigned long flags;
+ dma_cookie_t cookie;
+
+ /* cookie increment and adding to ld_queue must be atomic */
+ spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+
+ cookie = fsl_chan->common.cookie;
+ list_for_each_entry(child, &desc->tx_list, node) {
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+
+ desc->async_tx.cookie = cookie;
+ }
+
+ fsl_chan->common.cookie = cookie;
+ append_ld_queue(fsl_chan, desc);
+ list_splice_init(&desc->tx_list, fsl_chan->ld_queue.prev);
+
+ spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+
+ return cookie;
+}
+
+/**
+ * fsl_dma_alloc_descriptor - Allocate descriptor from channel's DMA pool.
+ * @fsl_chan : Freescale DMA channel
+ *
+ * Return - The descriptor allocated. NULL for failed.
+ */
+static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
+ struct fsl_dma_chan *fsl_chan)
+{
+ dma_addr_t pdesc;
+ struct fsl_desc_sw *desc_sw;
+
+ desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc);
+ if (desc_sw) {
+ memset(desc_sw, 0, sizeof(struct fsl_desc_sw));
+ INIT_LIST_HEAD(&desc_sw->tx_list);
+ dma_async_tx_descriptor_init(&desc_sw->async_tx,
+ &fsl_chan->common);
+ desc_sw->async_tx.tx_submit = fsl_dma_tx_submit;
+ desc_sw->async_tx.phys = pdesc;
+ }
+
+ return desc_sw;
+}
+
+
+/**
+ * fsl_dma_alloc_chan_resources - Allocate resources for DMA channel.
+ * @fsl_chan : Freescale DMA channel
+ *
+ * This function will create a dma pool for descriptor allocation.
+ *
+ * Return - The number of descriptors allocated.
+ */
+static int fsl_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
+
+ /* Has this channel already been allocated? */
+ if (fsl_chan->desc_pool)
+ return 1;
+
+ /* We need the descriptor to be aligned to 32bytes
+ * for meeting FSL DMA specification requirement.
+ */
+ fsl_chan->desc_pool = dma_pool_create("fsl_dma_engine_desc_pool",
+ fsl_chan->dev, sizeof(struct fsl_desc_sw),
+ 32, 0);
+ if (!fsl_chan->desc_pool) {
+ dev_err(fsl_chan->dev, "No memory for channel %d "
+ "descriptor dma pool.\n", fsl_chan->id);
+ return 0;
+ }
+
+ return 1;
+}
+
+/**
+ * fsl_dma_free_chan_resources - Free all resources of the channel.
+ * @fsl_chan : Freescale DMA channel
+ */
+static void fsl_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
+ struct fsl_desc_sw *desc, *_desc;
+ unsigned long flags;
+
+ dev_dbg(fsl_chan->dev, "Free all channel resources.\n");
+ spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+ list_for_each_entry_safe(desc, _desc, &fsl_chan->ld_queue, node) {
+#ifdef FSL_DMA_LD_DEBUG
+ dev_dbg(fsl_chan->dev,
+ "LD %p will be released.\n", desc);
+#endif
+ list_del(&desc->node);
+ /* free link descriptor */
+ dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys);
+ }
+ spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+ dma_pool_destroy(fsl_chan->desc_pool);
+
+ fsl_chan->desc_pool = NULL;
+}
+
+static struct dma_async_tx_descriptor *
+fsl_dma_prep_interrupt(struct dma_chan *chan, unsigned long flags)
+{
+ struct fsl_dma_chan *fsl_chan;
+ struct fsl_desc_sw *new;
+
+ if (!chan)
+ return NULL;
+
+ fsl_chan = to_fsl_chan(chan);
+
+ new = fsl_dma_alloc_descriptor(fsl_chan);
+ if (!new) {
+ dev_err(fsl_chan->dev, "No free memory for link descriptor\n");
+ return NULL;
+ }
+
+ new->async_tx.cookie = -EBUSY;
+ new->async_tx.flags = flags;
+
+ /* Insert the link descriptor to the LD ring */
+ list_add_tail(&new->node, &new->tx_list);
+
+ /* Set End-of-link to the last link descriptor of new list*/
+ set_ld_eol(fsl_chan, new);
+
+ return &new->async_tx;
+}
+
+static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
+ struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
+ size_t len, unsigned long flags)
+{
+ struct fsl_dma_chan *fsl_chan;
+ struct fsl_desc_sw *first = NULL, *prev = NULL, *new;
+ struct list_head *list;
+ size_t copy;
+
+ if (!chan)
+ return NULL;
+
+ if (!len)
+ return NULL;
+
+ fsl_chan = to_fsl_chan(chan);
+
+ do {
+
+ /* Allocate the link descriptor from DMA pool */
+ new = fsl_dma_alloc_descriptor(fsl_chan);
+ if (!new) {
+ dev_err(fsl_chan->dev,
+ "No free memory for link descriptor\n");
+ goto fail;
+ }
+#ifdef FSL_DMA_LD_DEBUG
+ dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new);
+#endif
+
+ copy = min(len, (size_t)FSL_DMA_BCR_MAX_CNT);
+
+ set_desc_cnt(fsl_chan, &new->hw, copy);
+ set_desc_src(fsl_chan, &new->hw, dma_src);
+ set_desc_dest(fsl_chan, &new->hw, dma_dest);
+
+ if (!first)
+ first = new;
+ else
+ set_desc_next(fsl_chan, &prev->hw, new->async_tx.phys);
+
+ new->async_tx.cookie = 0;
+ async_tx_ack(&new->async_tx);
+
+ prev = new;
+ len -= copy;
+ dma_src += copy;
+ dma_dest += copy;
+
+ /* Insert the link descriptor to the LD ring */
+ list_add_tail(&new->node, &first->tx_list);
+ } while (len);
+
+ new->async_tx.flags = flags; /* client is in control of this ack */
+ new->async_tx.cookie = -EBUSY;
+
+ /* Set End-of-link to the last link descriptor of new list*/
+ set_ld_eol(fsl_chan, new);
+
+ return &first->async_tx;
+
+fail:
+ if (!first)
+ return NULL;
+
+ list = &first->tx_list;
+ list_for_each_entry_safe_reverse(new, prev, list, node) {
+ list_del(&new->node);
+ dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
+ }
+
+ return NULL;
+}
+
+/**
+ * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
+ * @chan: DMA channel
+ * @sgl: scatterlist to transfer to/from
+ * @sg_len: number of entries in @scatterlist
+ * @direction: DMA direction
+ * @flags: DMAEngine flags
+ *
+ * Prepare a set of descriptors for a DMA_SLAVE transaction. Following the
+ * DMA_SLAVE API, this gets the device-specific information from the
+ * chan->private variable.
+ */
+static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
+ enum dma_data_direction direction, unsigned long flags)
+{
+ struct fsl_dma_chan *fsl_chan;
+ struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL;
+ struct fsl_dma_slave *slave;
+ struct list_head *tx_list;
+ size_t copy;
+
+ int i;
+ struct scatterlist *sg;
+ size_t sg_used;
+ size_t hw_used;
+ struct fsl_dma_hw_addr *hw;
+ dma_addr_t dma_dst, dma_src;
+
+ if (!chan)
+ return NULL;
+
+ if (!chan->private)
+ return NULL;
+
+ fsl_chan = to_fsl_chan(chan);
+ slave = chan->private;
+
+ if (list_empty(&slave->addresses))
+ return NULL;
+
+ hw = list_first_entry(&slave->addresses, struct fsl_dma_hw_addr, entry);
+ hw_used = 0;
+
+ /*
+ * Build the hardware transaction to copy from the scatterlist to
+ * the hardware, or from the hardware to the scatterlist
+ *
+ * If you are copying from the hardware to the scatterlist and it
+ * takes two hardware entries to fill an entire page, then both
+ * hardware entries will be coalesced into the same page
+ *
+ * If you are copying from the scatterlist to the hardware and a
+ * single page can fill two hardware entries, then the data will
+ * be read out of the page into the first hardware entry, and so on
+ */
+ for_each_sg(sgl, sg, sg_len, i) {
+ sg_used = 0;
+
+ /* Loop until the entire scatterlist entry is used */
+ while (sg_used < sg_dma_len(sg)) {
+
+ /*
+ * If we've used up the current hardware address/length
+ * pair, we need to load a new one
+ *
+ * This is done in a while loop so that descriptors with
+ * length == 0 will be skipped
+ */
+ while (hw_used >= hw->length) {
+
+ /*
+ * If the current hardware entry is the last
+ * entry in the list, we're finished
+ */
+ if (list_is_last(&hw->entry, &slave->addresses))
+ goto finished;
+
+ /* Get the next hardware address/length pair */
+ hw = list_entry(hw->entry.next,
+ struct fsl_dma_hw_addr, entry);
+ hw_used = 0;
+ }
+
+ /* Allocate the link descriptor from DMA pool */
+ new = fsl_dma_alloc_descriptor(fsl_chan);
+ if (!new) {
+ dev_err(fsl_chan->dev, "No free memory for "
+ "link descriptor\n");
+ goto fail;
+ }
+#ifdef FSL_DMA_LD_DEBUG
+ dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new);
+#endif
+
+ /*
+ * Calculate the maximum number of bytes to transfer,
+ * making sure it is less than the DMA controller limit
+ */
+ copy = min_t(size_t, sg_dma_len(sg) - sg_used,
+ hw->length - hw_used);
+ copy = min_t(size_t, copy, FSL_DMA_BCR_MAX_CNT);
+
+ /*
+ * DMA_FROM_DEVICE
+ * from the hardware to the scatterlist
+ *
+ * DMA_TO_DEVICE
+ * from the scatterlist to the hardware
+ */
+ if (direction == DMA_FROM_DEVICE) {
+ dma_src = hw->address + hw_used;
+ dma_dst = sg_dma_address(sg) + sg_used;
+ } else {
+ dma_src = sg_dma_address(sg) + sg_used;
+ dma_dst = hw->address + hw_used;
+ }
+
+ /* Fill in the descriptor */
+ set_desc_cnt(fsl_chan, &new->hw, copy);
+ set_desc_src(fsl_chan, &new->hw, dma_src);
+ set_desc_dest(fsl_chan, &new->hw, dma_dst);
+
+ /*
+ * If this is not the first descriptor, chain the
+ * current descriptor after the previous descriptor
+ */
+ if (!first) {
+ first = new;
+ } else {
+ set_desc_next(fsl_chan, &prev->hw,
+ new->async_tx.phys);
+ }
+
+ new->async_tx.cookie = 0;
+ async_tx_ack(&new->async_tx);
+
+ prev = new;
+ sg_used += copy;
+ hw_used += copy;
+
+ /* Insert the link descriptor into the LD ring */
+ list_add_tail(&new->node, &first->tx_list);
+ }
+ }
+
+finished:
+
+ /* All of the hardware address/length pairs had length == 0 */
+ if (!first || !new)
+ return NULL;
+
+ new->async_tx.flags = flags;
+ new->async_tx.cookie = -EBUSY;
+
+ /* Set End-of-link to the last link descriptor of new list */
+ set_ld_eol(fsl_chan, new);
+
+ /* Enable extra controller features */
+ if (fsl_chan->set_src_loop_size)
+ fsl_chan->set_src_loop_size(fsl_chan, slave->src_loop_size);
+
+ if (fsl_chan->set_dest_loop_size)
+ fsl_chan->set_dest_loop_size(fsl_chan, slave->dst_loop_size);
+
+ if (fsl_chan->toggle_ext_start)
+ fsl_chan->toggle_ext_start(fsl_chan, slave->external_start);
+
+ if (fsl_chan->toggle_ext_pause)
+ fsl_chan->toggle_ext_pause(fsl_chan, slave->external_pause);
+
+ if (fsl_chan->set_request_count)
+ fsl_chan->set_request_count(fsl_chan, slave->request_count);
+
+ return &first->async_tx;
+
+fail:
+ /* If first was not set, then we failed to allocate the very first
+ * descriptor, and we're done */
+ if (!first)
+ return NULL;
+
+ /*
+ * First is set, so all of the descriptors we allocated have been added
+ * to first->tx_list, INCLUDING "first" itself. Therefore we
+ * must traverse the list backwards freeing each descriptor in turn
+ *
+ * We're re-using variables for the loop, oh well
+ */
+ tx_list = &first->tx_list;
+ list_for_each_entry_safe_reverse(new, prev, tx_list, node) {
+ list_del_init(&new->node);
+ dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
+ }
+
+ return NULL;
+}
+
+static void fsl_dma_device_terminate_all(struct dma_chan *chan)
+{
+ struct fsl_dma_chan *fsl_chan;
+ struct fsl_desc_sw *desc, *tmp;
+ unsigned long flags;
+
+ if (!chan)
+ return;
+
+ fsl_chan = to_fsl_chan(chan);
+
+ /* Halt the DMA engine */
+ dma_halt(fsl_chan);
+
+ spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+
+ /* Remove and free all of the descriptors in the LD queue */
+ list_for_each_entry_safe(desc, tmp, &fsl_chan->ld_queue, node) {
+ list_del(&desc->node);
+ dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys);
+ }
+
+ spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+}
+
+/**
+ * fsl_dma_update_completed_cookie - Update the completed cookie.
+ * @fsl_chan : Freescale DMA channel
+ */
+static void fsl_dma_update_completed_cookie(struct fsl_dma_chan *fsl_chan)
+{
+ struct fsl_desc_sw *cur_desc, *desc;
+ dma_addr_t ld_phy;
+
+ ld_phy = get_cdar(fsl_chan) & FSL_DMA_NLDA_MASK;
+
+ if (ld_phy) {
+ cur_desc = NULL;
+ list_for_each_entry(desc, &fsl_chan->ld_queue, node)
+ if (desc->async_tx.phys == ld_phy) {
+ cur_desc = desc;
+ break;
+ }
+
+ if (cur_desc && cur_desc->async_tx.cookie) {
+ if (dma_is_idle(fsl_chan))
+ fsl_chan->completed_cookie =
+ cur_desc->async_tx.cookie;
+ else
+ fsl_chan->completed_cookie =
+ cur_desc->async_tx.cookie - 1;
+ }
+ }
+}
+
+/**
+ * fsl_chan_ld_cleanup - Clean up link descriptors
+ * @fsl_chan : Freescale DMA channel
+ *
+ * This function clean up the ld_queue of DMA channel.
+ * If 'in_intr' is set, the function will move the link descriptor to
+ * the recycle list. Otherwise, free it directly.
+ */
+static void fsl_chan_ld_cleanup(struct fsl_dma_chan *fsl_chan)
+{
+ struct fsl_desc_sw *desc, *_desc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+
+ dev_dbg(fsl_chan->dev, "chan completed_cookie = %d\n",
+ fsl_chan->completed_cookie);
+ list_for_each_entry_safe(desc, _desc, &fsl_chan->ld_queue, node) {
+ dma_async_tx_callback callback;
+ void *callback_param;
+
+ if (dma_async_is_complete(desc->async_tx.cookie,
+ fsl_chan->completed_cookie, fsl_chan->common.cookie)
+ == DMA_IN_PROGRESS)
+ break;
+
+ callback = desc->async_tx.callback;
+ callback_param = desc->async_tx.callback_param;
+
+ /* Remove from ld_queue list */
+ list_del(&desc->node);
+
+ dev_dbg(fsl_chan->dev, "link descriptor %p will be recycle.\n",
+ desc);
+ dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys);
+
+ /* Run the link descriptor callback function */
+ if (callback) {
+ spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+ dev_dbg(fsl_chan->dev, "link descriptor %p callback\n",
+ desc);
+ callback(callback_param);
+ spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+ }
+ }
+ spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+}
+
+/**
+ * fsl_chan_xfer_ld_queue - Transfer link descriptors in channel ld_queue.
+ * @fsl_chan : Freescale DMA channel
+ */
+static void fsl_chan_xfer_ld_queue(struct fsl_dma_chan *fsl_chan)
+{
+ struct list_head *ld_node;
+ dma_addr_t next_dest_addr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+
+ if (!dma_is_idle(fsl_chan))
+ goto out_unlock;
+
+ dma_halt(fsl_chan);
+
+ /* If there are some link descriptors
+ * not transfered in queue. We need to start it.
+ */
+
+ /* Find the first un-transfer desciptor */
+ for (ld_node = fsl_chan->ld_queue.next;
+ (ld_node != &fsl_chan->ld_queue)
+ && (dma_async_is_complete(
+ to_fsl_desc(ld_node)->async_tx.cookie,
+ fsl_chan->completed_cookie,
+ fsl_chan->common.cookie) == DMA_SUCCESS);
+ ld_node = ld_node->next);
+
+ if (ld_node != &fsl_chan->ld_queue) {
+ /* Get the ld start address from ld_queue */
+ next_dest_addr = to_fsl_desc(ld_node)->async_tx.phys;
+ dev_dbg(fsl_chan->dev, "xfer LDs staring from 0x%llx\n",
+ (unsigned long long)next_dest_addr);
+ set_cdar(fsl_chan, next_dest_addr);
+ dma_start(fsl_chan);
+ } else {
+ set_cdar(fsl_chan, 0);
+ set_ndar(fsl_chan, 0);
+ }
+
+out_unlock:
+ spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+}
+
+/**
+ * fsl_dma_memcpy_issue_pending - Issue the DMA start command
+ * @fsl_chan : Freescale DMA channel
+ */
+static void fsl_dma_memcpy_issue_pending(struct dma_chan *chan)
+{
+ struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
+
+#ifdef FSL_DMA_LD_DEBUG
+ struct fsl_desc_sw *ld;
+ unsigned long flags;
+
+ spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+ if (list_empty(&fsl_chan->ld_queue)) {
+ spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+ return;
+ }
+
+ dev_dbg(fsl_chan->dev, "--memcpy issue--\n");
+ list_for_each_entry(ld, &fsl_chan->ld_queue, node) {
+ int i;
+ dev_dbg(fsl_chan->dev, "Ch %d, LD %08x\n",
+ fsl_chan->id, ld->async_tx.phys);
+ for (i = 0; i < 8; i++)
+ dev_dbg(fsl_chan->dev, "LD offset %d: %08x\n",
+ i, *(((u32 *)&ld->hw) + i));
+ }
+ dev_dbg(fsl_chan->dev, "----------------\n");
+ spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+#endif
+
+ fsl_chan_xfer_ld_queue(fsl_chan);
+}
+
+/**
+ * fsl_dma_is_complete - Determine the DMA status
+ * @fsl_chan : Freescale DMA channel
+ */
+static enum dma_status fsl_dma_is_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done,
+ dma_cookie_t *used)
+{
+ struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+
+ fsl_chan_ld_cleanup(fsl_chan);
+
+ last_used = chan->cookie;
+ last_complete = fsl_chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static irqreturn_t fsl_dma_chan_do_interrupt(int irq, void *data)
+{
+ struct fsl_dma_chan *fsl_chan = (struct fsl_dma_chan *)data;
+ u32 stat;
+ int update_cookie = 0;
+ int xfer_ld_q = 0;
+
+ stat = get_sr(fsl_chan);
+ dev_dbg(fsl_chan->dev, "event: channel %d, stat = 0x%x\n",
+ fsl_chan->id, stat);
+ set_sr(fsl_chan, stat); /* Clear the event register */
+
+ stat &= ~(FSL_DMA_SR_CB | FSL_DMA_SR_CH);
+ if (!stat)
+ return IRQ_NONE;
+
+ if (stat & FSL_DMA_SR_TE)
+ dev_err(fsl_chan->dev, "Transfer Error!\n");
+
+ /* Programming Error
+ * The DMA_INTERRUPT async_tx is a NULL transfer, which will
+ * triger a PE interrupt.
+ */
+ if (stat & FSL_DMA_SR_PE) {
+ dev_dbg(fsl_chan->dev, "event: Programming Error INT\n");
+ if (get_bcr(fsl_chan) == 0) {
+ /* BCR register is 0, this is a DMA_INTERRUPT async_tx.
+ * Now, update the completed cookie, and continue the
+ * next uncompleted transfer.
+ */
+ update_cookie = 1;
+ xfer_ld_q = 1;
+ }
+ stat &= ~FSL_DMA_SR_PE;
+ }
+
+ /* If the link descriptor segment transfer finishes,
+ * we will recycle the used descriptor.
+ */
+ if (stat & FSL_DMA_SR_EOSI) {
+ dev_dbg(fsl_chan->dev, "event: End-of-segments INT\n");
+ dev_dbg(fsl_chan->dev, "event: clndar 0x%llx, nlndar 0x%llx\n",
+ (unsigned long long)get_cdar(fsl_chan),
+ (unsigned long long)get_ndar(fsl_chan));
+ stat &= ~FSL_DMA_SR_EOSI;
+ update_cookie = 1;
+ }
+
+ /* For MPC8349, EOCDI event need to update cookie
+ * and start the next transfer if it exist.
+ */
+ if (stat & FSL_DMA_SR_EOCDI) {
+ dev_dbg(fsl_chan->dev, "event: End-of-Chain link INT\n");
+ stat &= ~FSL_DMA_SR_EOCDI;
+ update_cookie = 1;
+ xfer_ld_q = 1;
+ }
+
+ /* If it current transfer is the end-of-transfer,
+ * we should clear the Channel Start bit for
+ * prepare next transfer.
+ */
+ if (stat & FSL_DMA_SR_EOLNI) {
+ dev_dbg(fsl_chan->dev, "event: End-of-link INT\n");
+ stat &= ~FSL_DMA_SR_EOLNI;
+ xfer_ld_q = 1;
+ }
+
+ if (update_cookie)
+ fsl_dma_update_completed_cookie(fsl_chan);
+ if (xfer_ld_q)
+ fsl_chan_xfer_ld_queue(fsl_chan);
+ if (stat)
+ dev_dbg(fsl_chan->dev, "event: unhandled sr 0x%02x\n",
+ stat);
+
+ dev_dbg(fsl_chan->dev, "event: Exit\n");
+ tasklet_schedule(&fsl_chan->tasklet);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t fsl_dma_do_interrupt(int irq, void *data)
+{
+ struct fsl_dma_device *fdev = (struct fsl_dma_device *)data;
+ u32 gsr;
+ int ch_nr;
+
+ gsr = (fdev->feature & FSL_DMA_BIG_ENDIAN) ? in_be32(fdev->reg_base)
+ : in_le32(fdev->reg_base);
+ ch_nr = (32 - ffs(gsr)) / 8;
+
+ return fdev->chan[ch_nr] ? fsl_dma_chan_do_interrupt(irq,
+ fdev->chan[ch_nr]) : IRQ_NONE;
+}
+
+static void dma_do_tasklet(unsigned long data)
+{
+ struct fsl_dma_chan *fsl_chan = (struct fsl_dma_chan *)data;
+ fsl_chan_ld_cleanup(fsl_chan);
+}
+
+static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
+ struct device_node *node, u32 feature, const char *compatible)
+{
+ struct fsl_dma_chan *new_fsl_chan;
+ int err;
+
+ /* alloc channel */
+ new_fsl_chan = kzalloc(sizeof(struct fsl_dma_chan), GFP_KERNEL);
+ if (!new_fsl_chan) {
+ dev_err(fdev->dev, "No free memory for allocating "
+ "dma channels!\n");
+ return -ENOMEM;
+ }
+
+ /* get dma channel register base */
+ err = of_address_to_resource(node, 0, &new_fsl_chan->reg);
+ if (err) {
+ dev_err(fdev->dev, "Can't get %s property 'reg'\n",
+ node->full_name);
+ goto err_no_reg;
+ }
+
+ new_fsl_chan->feature = feature;
+
+ if (!fdev->feature)
+ fdev->feature = new_fsl_chan->feature;
+
+ /* If the DMA device's feature is different than its channels',
+ * report the bug.
+ */
+ WARN_ON(fdev->feature != new_fsl_chan->feature);
+
+ new_fsl_chan->dev = fdev->dev;
+ new_fsl_chan->reg_base = ioremap(new_fsl_chan->reg.start,
+ new_fsl_chan->reg.end - new_fsl_chan->reg.start + 1);
+
+ new_fsl_chan->id = ((new_fsl_chan->reg.start - 0x100) & 0xfff) >> 7;
+ if (new_fsl_chan->id >= FSL_DMA_MAX_CHANS_PER_DEVICE) {
+ dev_err(fdev->dev, "There is no %d channel!\n",
+ new_fsl_chan->id);
+ err = -EINVAL;
+ goto err_no_chan;
+ }
+ fdev->chan[new_fsl_chan->id] = new_fsl_chan;
+ tasklet_init(&new_fsl_chan->tasklet, dma_do_tasklet,
+ (unsigned long)new_fsl_chan);
+
+ /* Init the channel */
+ dma_init(new_fsl_chan);
+
+ /* Clear cdar registers */
+ set_cdar(new_fsl_chan, 0);
+
+ switch (new_fsl_chan->feature & FSL_DMA_IP_MASK) {
+ case FSL_DMA_IP_85XX:
+ new_fsl_chan->toggle_ext_pause = fsl_chan_toggle_ext_pause;
+ case FSL_DMA_IP_83XX:
+ new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start;
+ new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size;
+ new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size;
+ new_fsl_chan->set_request_count = fsl_chan_set_request_count;
+ }
+
+ spin_lock_init(&new_fsl_chan->desc_lock);
+ INIT_LIST_HEAD(&new_fsl_chan->ld_queue);
+
+ new_fsl_chan->common.device = &fdev->common;
+
+ /* Add the channel to DMA device channel list */
+ list_add_tail(&new_fsl_chan->common.device_node,
+ &fdev->common.channels);
+ fdev->common.chancnt++;
+
+ new_fsl_chan->irq = irq_of_parse_and_map(node, 0);
+ if (new_fsl_chan->irq != NO_IRQ) {
+ err = request_irq(new_fsl_chan->irq,
+ &fsl_dma_chan_do_interrupt, IRQF_SHARED,
+ "fsldma-channel", new_fsl_chan);
+ if (err) {
+ dev_err(fdev->dev, "DMA channel %s request_irq error "
+ "with return %d\n", node->full_name, err);
+ goto err_no_irq;
+ }
+ }
+
+ dev_info(fdev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
+ compatible,
+ new_fsl_chan->irq != NO_IRQ ? new_fsl_chan->irq : fdev->irq);
+
+ return 0;
+
+err_no_irq:
+ list_del(&new_fsl_chan->common.device_node);
+err_no_chan:
+ iounmap(new_fsl_chan->reg_base);
+err_no_reg:
+ kfree(new_fsl_chan);
+ return err;
+}
+
+static void fsl_dma_chan_remove(struct fsl_dma_chan *fchan)
+{
+ if (fchan->irq != NO_IRQ)
+ free_irq(fchan->irq, fchan);
+ list_del(&fchan->common.device_node);
+ iounmap(fchan->reg_base);
+ kfree(fchan);
+}
+
+static int __devinit of_fsl_dma_probe(struct of_device *dev,
+ const struct of_device_id *match)
+{
+ int err;
+ struct fsl_dma_device *fdev;
+ struct device_node *child;
+
+ fdev = kzalloc(sizeof(struct fsl_dma_device), GFP_KERNEL);
+ if (!fdev) {
+ dev_err(&dev->dev, "No enough memory for 'priv'\n");
+ return -ENOMEM;
+ }
+ fdev->dev = &dev->dev;
+ INIT_LIST_HEAD(&fdev->common.channels);
+
+ /* get DMA controller register base */
+ err = of_address_to_resource(dev->node, 0, &fdev->reg);
+ if (err) {
+ dev_err(&dev->dev, "Can't get %s property 'reg'\n",
+ dev->node->full_name);
+ goto err_no_reg;
+ }
+
+ dev_info(&dev->dev, "Probe the Freescale DMA driver for %s "
+ "controller at 0x%llx...\n",
+ match->compatible, (unsigned long long)fdev->reg.start);
+ fdev->reg_base = ioremap(fdev->reg.start, fdev->reg.end
+ - fdev->reg.start + 1);
+
+ dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
+ dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
+ dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
+ fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
+ fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
+ fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
+ fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
+ fdev->common.device_is_tx_complete = fsl_dma_is_complete;
+ fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
+ fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg;
+ fdev->common.device_terminate_all = fsl_dma_device_terminate_all;
+ fdev->common.dev = &dev->dev;
+
+ fdev->irq = irq_of_parse_and_map(dev->node, 0);
+ if (fdev->irq != NO_IRQ) {
+ err = request_irq(fdev->irq, &fsl_dma_do_interrupt, IRQF_SHARED,
+ "fsldma-device", fdev);
+ if (err) {
+ dev_err(&dev->dev, "DMA device request_irq error "
+ "with return %d\n", err);
+ goto err;
+ }
+ }
+
+ dev_set_drvdata(&(dev->dev), fdev);
+
+ /* We cannot use of_platform_bus_probe() because there is no
+ * of_platform_bus_remove. Instead, we manually instantiate every DMA
+ * channel object.
+ */
+ for_each_child_of_node(dev->node, child) {
+ if (of_device_is_compatible(child, "fsl,eloplus-dma-channel"))
+ fsl_dma_chan_probe(fdev, child,
+ FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN,
+ "fsl,eloplus-dma-channel");
+ if (of_device_is_compatible(child, "fsl,elo-dma-channel"))
+ fsl_dma_chan_probe(fdev, child,
+ FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN,
+ "fsl,elo-dma-channel");
+ }
+
+ dma_async_device_register(&fdev->common);
+ return 0;
+
+err:
+ iounmap(fdev->reg_base);
+err_no_reg:
+ kfree(fdev);
+ return err;
+}
+
+static int of_fsl_dma_remove(struct of_device *of_dev)
+{
+ struct fsl_dma_device *fdev;
+ unsigned int i;
+
+ fdev = dev_get_drvdata(&of_dev->dev);
+
+ dma_async_device_unregister(&fdev->common);
+
+ for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++)
+ if (fdev->chan[i])
+ fsl_dma_chan_remove(fdev->chan[i]);
+
+ if (fdev->irq != NO_IRQ)
+ free_irq(fdev->irq, fdev);
+
+ iounmap(fdev->reg_base);
+
+ kfree(fdev);
+ dev_set_drvdata(&of_dev->dev, NULL);
+
+ return 0;
+}
+
+static struct of_device_id of_fsl_dma_ids[] = {
+ { .compatible = "fsl,eloplus-dma", },
+ { .compatible = "fsl,elo-dma", },
+ {}
+};
+
+static struct of_platform_driver of_fsl_dma_driver = {
+ .name = "fsl-elo-dma",
+ .match_table = of_fsl_dma_ids,
+ .probe = of_fsl_dma_probe,
+ .remove = of_fsl_dma_remove,
+};
+
+static __init int of_fsl_dma_init(void)
+{
+ int ret;
+
+ pr_info("Freescale Elo / Elo Plus DMA driver\n");
+
+ ret = of_register_platform_driver(&of_fsl_dma_driver);
+ if (ret)
+ pr_err("fsldma: failed to register platform driver\n");
+
+ return ret;
+}
+
+static void __exit of_fsl_dma_exit(void)
+{
+ of_unregister_platform_driver(&of_fsl_dma_driver);
+}
+
+subsys_initcall(of_fsl_dma_init);
+module_exit(of_fsl_dma_exit);
+
+MODULE_DESCRIPTION("Freescale Elo / Elo Plus DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
new file mode 100644
index 00000000000..0df14cbb8ca
--- /dev/null
+++ b/drivers/dma/fsldma.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author:
+ * Zhang Wei <wei.zhang@freescale.com>, Jul 2007
+ * Ebony Zhu <ebony.zhu@freescale.com>, May 2007
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+#ifndef __DMA_FSLDMA_H
+#define __DMA_FSLDMA_H
+
+#include <linux/device.h>
+#include <linux/dmapool.h>
+#include <linux/dmaengine.h>
+
+/* Define data structures needed by Freescale
+ * MPC8540 and MPC8349 DMA controller.
+ */
+#define FSL_DMA_MR_CS 0x00000001
+#define FSL_DMA_MR_CC 0x00000002
+#define FSL_DMA_MR_CA 0x00000008
+#define FSL_DMA_MR_EIE 0x00000040
+#define FSL_DMA_MR_XFE 0x00000020
+#define FSL_DMA_MR_EOLNIE 0x00000100
+#define FSL_DMA_MR_EOLSIE 0x00000080
+#define FSL_DMA_MR_EOSIE 0x00000200
+#define FSL_DMA_MR_CDSM 0x00000010
+#define FSL_DMA_MR_CTM 0x00000004
+#define FSL_DMA_MR_EMP_EN 0x00200000
+#define FSL_DMA_MR_EMS_EN 0x00040000
+#define FSL_DMA_MR_DAHE 0x00002000
+#define FSL_DMA_MR_SAHE 0x00001000
+
+/* Special MR definition for MPC8349 */
+#define FSL_DMA_MR_EOTIE 0x00000080
+#define FSL_DMA_MR_PRC_RM 0x00000800
+
+#define FSL_DMA_SR_CH 0x00000020
+#define FSL_DMA_SR_PE 0x00000010
+#define FSL_DMA_SR_CB 0x00000004
+#define FSL_DMA_SR_TE 0x00000080
+#define FSL_DMA_SR_EOSI 0x00000002
+#define FSL_DMA_SR_EOLSI 0x00000001
+#define FSL_DMA_SR_EOCDI 0x00000001
+#define FSL_DMA_SR_EOLNI 0x00000008
+
+#define FSL_DMA_SATR_SBPATMU 0x20000000
+#define FSL_DMA_SATR_STRANSINT_RIO 0x00c00000
+#define FSL_DMA_SATR_SREADTYPE_SNOOP_READ 0x00050000
+#define FSL_DMA_SATR_SREADTYPE_BP_IORH 0x00020000
+#define FSL_DMA_SATR_SREADTYPE_BP_NREAD 0x00040000
+#define FSL_DMA_SATR_SREADTYPE_BP_MREAD 0x00070000
+
+#define FSL_DMA_DATR_DBPATMU 0x20000000
+#define FSL_DMA_DATR_DTRANSINT_RIO 0x00c00000
+#define FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE 0x00050000
+#define FSL_DMA_DATR_DWRITETYPE_BP_FLUSH 0x00010000
+
+#define FSL_DMA_EOL ((u64)0x1)
+#define FSL_DMA_SNEN ((u64)0x10)
+#define FSL_DMA_EOSIE 0x8
+#define FSL_DMA_NLDA_MASK (~(u64)0x1f)
+
+#define FSL_DMA_BCR_MAX_CNT 0x03ffffffu
+
+#define FSL_DMA_DGSR_TE 0x80
+#define FSL_DMA_DGSR_CH 0x20
+#define FSL_DMA_DGSR_PE 0x10
+#define FSL_DMA_DGSR_EOLNI 0x08
+#define FSL_DMA_DGSR_CB 0x04
+#define FSL_DMA_DGSR_EOSI 0x02
+#define FSL_DMA_DGSR_EOLSI 0x01
+
+typedef u64 __bitwise v64;
+typedef u32 __bitwise v32;
+
+struct fsl_dma_ld_hw {
+ v64 src_addr;
+ v64 dst_addr;
+ v64 next_ln_addr;
+ v32 count;
+ v32 reserve;
+} __attribute__((aligned(32)));
+
+struct fsl_desc_sw {
+ struct fsl_dma_ld_hw hw;
+ struct list_head node;
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor async_tx;
+ struct list_head *ld;
+ void *priv;
+} __attribute__((aligned(32)));
+
+struct fsl_dma_chan_regs {
+ u32 mr; /* 0x00 - Mode Register */
+ u32 sr; /* 0x04 - Status Register */
+ u64 cdar; /* 0x08 - Current descriptor address register */
+ u64 sar; /* 0x10 - Source Address Register */
+ u64 dar; /* 0x18 - Destination Address Register */
+ u32 bcr; /* 0x20 - Byte Count Register */
+ u64 ndar; /* 0x24 - Next Descriptor Address Register */
+};
+
+struct fsl_dma_chan;
+#define FSL_DMA_MAX_CHANS_PER_DEVICE 4
+
+struct fsl_dma_device {
+ void __iomem *reg_base; /* DGSR register base */
+ struct resource reg; /* Resource for register */
+ struct device *dev;
+ struct dma_device common;
+ struct fsl_dma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE];
+ u32 feature; /* The same as DMA channels */
+ int irq; /* Channel IRQ */
+};
+
+/* Define macros for fsl_dma_chan->feature property */
+#define FSL_DMA_LITTLE_ENDIAN 0x00000000
+#define FSL_DMA_BIG_ENDIAN 0x00000001
+
+#define FSL_DMA_IP_MASK 0x00000ff0
+#define FSL_DMA_IP_85XX 0x00000010
+#define FSL_DMA_IP_83XX 0x00000020
+
+#define FSL_DMA_CHAN_PAUSE_EXT 0x00001000
+#define FSL_DMA_CHAN_START_EXT 0x00002000
+
+struct fsl_dma_chan {
+ struct fsl_dma_chan_regs __iomem *reg_base;
+ dma_cookie_t completed_cookie; /* The maximum cookie completed */
+ spinlock_t desc_lock; /* Descriptor operation lock */
+ struct list_head ld_queue; /* Link descriptors queue */
+ struct dma_chan common; /* DMA common channel */
+ struct dma_pool *desc_pool; /* Descriptors pool */
+ struct device *dev; /* Channel device */
+ struct resource reg; /* Resource for register */
+ int irq; /* Channel IRQ */
+ int id; /* Raw id of this channel */
+ struct tasklet_struct tasklet;
+ u32 feature;
+
+ void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int enable);
+ void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable);
+ void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
+ void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
+ void (*set_request_count)(struct fsl_dma_chan *fsl_chan, int size);
+};
+
+#define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common)
+#define to_fsl_desc(lh) container_of(lh, struct fsl_desc_sw, node)
+#define tx_to_fsl_desc(tx) container_of(tx, struct fsl_desc_sw, async_tx)
+
+#ifndef __powerpc64__
+static u64 in_be64(const u64 __iomem *addr)
+{
+ return ((u64)in_be32((u32 __iomem *)addr) << 32) |
+ (in_be32((u32 __iomem *)addr + 1));
+}
+
+static void out_be64(u64 __iomem *addr, u64 val)
+{
+ out_be32((u32 __iomem *)addr, val >> 32);
+ out_be32((u32 __iomem *)addr + 1, (u32)val);
+}
+
+/* There is no asm instructions for 64 bits reverse loads and stores */
+static u64 in_le64(const u64 __iomem *addr)
+{
+ return ((u64)in_le32((u32 __iomem *)addr + 1) << 32) |
+ (in_le32((u32 __iomem *)addr));
+}
+
+static void out_le64(u64 __iomem *addr, u64 val)
+{
+ out_le32((u32 __iomem *)addr + 1, val >> 32);
+ out_le32((u32 __iomem *)addr, (u32)val);
+}
+#endif
+
+#define DMA_IN(fsl_chan, addr, width) \
+ (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \
+ in_be##width(addr) : in_le##width(addr))
+#define DMA_OUT(fsl_chan, addr, val, width) \
+ (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \
+ out_be##width(addr, val) : out_le##width(addr, val))
+
+#define DMA_TO_CPU(fsl_chan, d, width) \
+ (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \
+ be##width##_to_cpu((__force __be##width)(v##width)d) : \
+ le##width##_to_cpu((__force __le##width)(v##width)d))
+#define CPU_TO_DMA(fsl_chan, c, width) \
+ (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \
+ (__force v##width)cpu_to_be##width(c) : \
+ (__force v##width)cpu_to_le##width(c))
+
+#endif /* __DMA_FSLDMA_H */
diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile
new file mode 100644
index 00000000000..8997d3fb905
--- /dev/null
+++ b/drivers/dma/ioat/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
+ioatdma-objs := pci.o dma.o dma_v2.o dma_v3.o dca.o
diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c
new file mode 100644
index 00000000000..abd9038e06b
--- /dev/null
+++ b/drivers/dma/ioat/dca.c
@@ -0,0 +1,684 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+
+/* either a kernel change is needed, or we need something like this in kernel */
+#ifndef CONFIG_SMP
+#include <asm/smp.h>
+#undef cpu_physical_id
+#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
+#endif
+
+#include "dma.h"
+#include "registers.h"
+
+/*
+ * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
+ * contain the bit number of the APIC ID to map into the DCA tag. If the valid
+ * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
+ */
+#define DCA_TAG_MAP_VALID 0x80
+
+#define DCA3_TAG_MAP_BIT_TO_INV 0x80
+#define DCA3_TAG_MAP_BIT_TO_SEL 0x40
+#define DCA3_TAG_MAP_LITERAL_VAL 0x1
+
+#define DCA_TAG_MAP_MASK 0xDF
+
+/* expected tag map bytes for I/OAT ver.2 */
+#define DCA2_TAG_MAP_BYTE0 0x80
+#define DCA2_TAG_MAP_BYTE1 0x0
+#define DCA2_TAG_MAP_BYTE2 0x81
+#define DCA2_TAG_MAP_BYTE3 0x82
+#define DCA2_TAG_MAP_BYTE4 0x82
+
+/* verify if tag map matches expected values */
+static inline int dca2_tag_map_valid(u8 *tag_map)
+{
+ return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) &&
+ (tag_map[1] == DCA2_TAG_MAP_BYTE1) &&
+ (tag_map[2] == DCA2_TAG_MAP_BYTE2) &&
+ (tag_map[3] == DCA2_TAG_MAP_BYTE3) &&
+ (tag_map[4] == DCA2_TAG_MAP_BYTE4));
+}
+
+/*
+ * "Legacy" DCA systems do not implement the DCA register set in the
+ * I/OAT device. Software needs direct support for their tag mappings.
+ */
+
+#define APICID_BIT(x) (DCA_TAG_MAP_VALID | (x))
+#define IOAT_TAG_MAP_LEN 8
+
+static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = {
+ 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
+static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = {
+ 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
+static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = {
+ 1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), };
+static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 };
+
+/* pack PCI B/D/F into a u16 */
+static inline u16 dcaid_from_pcidev(struct pci_dev *pci)
+{
+ return (pci->bus->number << 8) | pci->devfn;
+}
+
+static int dca_enabled_in_bios(struct pci_dev *pdev)
+{
+ /* CPUID level 9 returns DCA configuration */
+ /* Bit 0 indicates DCA enabled by the BIOS */
+ unsigned long cpuid_level_9;
+ int res;
+
+ cpuid_level_9 = cpuid_eax(9);
+ res = test_bit(0, &cpuid_level_9);
+ if (!res)
+ dev_dbg(&pdev->dev, "DCA is disabled in BIOS\n");
+
+ return res;
+}
+
+int system_has_dca_enabled(struct pci_dev *pdev)
+{
+ if (boot_cpu_has(X86_FEATURE_DCA))
+ return dca_enabled_in_bios(pdev);
+
+ dev_dbg(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n");
+ return 0;
+}
+
+struct ioat_dca_slot {
+ struct pci_dev *pdev; /* requester device */
+ u16 rid; /* requester id, as used by IOAT */
+};
+
+#define IOAT_DCA_MAX_REQ 6
+#define IOAT3_DCA_MAX_REQ 2
+
+struct ioat_dca_priv {
+ void __iomem *iobase;
+ void __iomem *dca_base;
+ int max_requesters;
+ int requester_count;
+ u8 tag_map[IOAT_TAG_MAP_LEN];
+ struct ioat_dca_slot req_slots[0];
+};
+
+/* 5000 series chipset DCA Port Requester ID Table Entry Format
+ * [15:8] PCI-Express Bus Number
+ * [7:3] PCI-Express Device Number
+ * [2:0] PCI-Express Function Number
+ *
+ * 5000 series chipset DCA control register format
+ * [7:1] Reserved (0)
+ * [0] Ignore Function Number
+ */
+
+static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 id;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+ id = dcaid_from_pcidev(pdev);
+
+ if (ioatdca->requester_count == ioatdca->max_requesters)
+ return -ENODEV;
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == NULL) {
+ /* found an empty slot */
+ ioatdca->requester_count++;
+ ioatdca->req_slots[i].pdev = pdev;
+ ioatdca->req_slots[i].rid = id;
+ writew(id, ioatdca->dca_base + (i * 4));
+ /* make sure the ignore function bit is off */
+ writeb(0, ioatdca->dca_base + (i * 4) + 2);
+ return i;
+ }
+ }
+ /* Error, ioatdma->requester_count is out of whack */
+ return -EFAULT;
+}
+
+static int ioat_dca_remove_requester(struct dca_provider *dca,
+ struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == pdev) {
+ writew(0, ioatdca->dca_base + (i * 4));
+ ioatdca->req_slots[i].pdev = NULL;
+ ioatdca->req_slots[i].rid = 0;
+ ioatdca->requester_count--;
+ return i;
+ }
+ }
+ return -ENODEV;
+}
+
+static u8 ioat_dca_get_tag(struct dca_provider *dca,
+ struct device *dev,
+ int cpu)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ int i, apic_id, bit, value;
+ u8 entry, tag;
+
+ tag = 0;
+ apic_id = cpu_physical_id(cpu);
+
+ for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+ entry = ioatdca->tag_map[i];
+ if (entry & DCA_TAG_MAP_VALID) {
+ bit = entry & ~DCA_TAG_MAP_VALID;
+ value = (apic_id & (1 << bit)) ? 1 : 0;
+ } else {
+ value = entry ? 1 : 0;
+ }
+ tag |= (value << i);
+ }
+ return tag;
+}
+
+static int ioat_dca_dev_managed(struct dca_provider *dca,
+ struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+
+ pdev = to_pci_dev(dev);
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == pdev)
+ return 1;
+ }
+ return 0;
+}
+
+static struct dca_ops ioat_dca_ops = {
+ .add_requester = ioat_dca_add_requester,
+ .remove_requester = ioat_dca_remove_requester,
+ .get_tag = ioat_dca_get_tag,
+ .dev_managed = ioat_dca_dev_managed,
+};
+
+
+struct dca_provider * __devinit
+ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+ struct dca_provider *dca;
+ struct ioat_dca_priv *ioatdca;
+ u8 *tag_map = NULL;
+ int i;
+ int err;
+ u8 version;
+ u8 max_requesters;
+
+ if (!system_has_dca_enabled(pdev))
+ return NULL;
+
+ /* I/OAT v1 systems must have a known tag_map to support DCA */
+ switch (pdev->vendor) {
+ case PCI_VENDOR_ID_INTEL:
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_INTEL_IOAT:
+ tag_map = ioat_tag_map_BNB;
+ break;
+ case PCI_DEVICE_ID_INTEL_IOAT_CNB:
+ tag_map = ioat_tag_map_CNB;
+ break;
+ case PCI_DEVICE_ID_INTEL_IOAT_SCNB:
+ tag_map = ioat_tag_map_SCNB;
+ break;
+ }
+ break;
+ case PCI_VENDOR_ID_UNISYS:
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR:
+ tag_map = ioat_tag_map_UNISYS;
+ break;
+ }
+ break;
+ }
+ if (tag_map == NULL)
+ return NULL;
+
+ version = readb(iobase + IOAT_VER_OFFSET);
+ if (version == IOAT_VER_3_0)
+ max_requesters = IOAT3_DCA_MAX_REQ;
+ else
+ max_requesters = IOAT_DCA_MAX_REQ;
+
+ dca = alloc_dca_provider(&ioat_dca_ops,
+ sizeof(*ioatdca) +
+ (sizeof(struct ioat_dca_slot) * max_requesters));
+ if (!dca)
+ return NULL;
+
+ ioatdca = dca_priv(dca);
+ ioatdca->max_requesters = max_requesters;
+ ioatdca->dca_base = iobase + 0x54;
+
+ /* copy over the APIC ID to DCA tag mapping */
+ for (i = 0; i < IOAT_TAG_MAP_LEN; i++)
+ ioatdca->tag_map[i] = tag_map[i];
+
+ err = register_dca_provider(dca, &pdev->dev);
+ if (err) {
+ free_dca_provider(dca);
+ return NULL;
+ }
+
+ return dca;
+}
+
+
+static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 id;
+ u16 global_req_table;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+ id = dcaid_from_pcidev(pdev);
+
+ if (ioatdca->requester_count == ioatdca->max_requesters)
+ return -ENODEV;
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == NULL) {
+ /* found an empty slot */
+ ioatdca->requester_count++;
+ ioatdca->req_slots[i].pdev = pdev;
+ ioatdca->req_slots[i].rid = id;
+ global_req_table =
+ readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+ writel(id | IOAT_DCA_GREQID_VALID,
+ ioatdca->iobase + global_req_table + (i * 4));
+ return i;
+ }
+ }
+ /* Error, ioatdma->requester_count is out of whack */
+ return -EFAULT;
+}
+
+static int ioat2_dca_remove_requester(struct dca_provider *dca,
+ struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 global_req_table;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == pdev) {
+ global_req_table =
+ readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+ writel(0, ioatdca->iobase + global_req_table + (i * 4));
+ ioatdca->req_slots[i].pdev = NULL;
+ ioatdca->req_slots[i].rid = 0;
+ ioatdca->requester_count--;
+ return i;
+ }
+ }
+ return -ENODEV;
+}
+
+static u8 ioat2_dca_get_tag(struct dca_provider *dca,
+ struct device *dev,
+ int cpu)
+{
+ u8 tag;
+
+ tag = ioat_dca_get_tag(dca, dev, cpu);
+ tag = (~tag) & 0x1F;
+ return tag;
+}
+
+static struct dca_ops ioat2_dca_ops = {
+ .add_requester = ioat2_dca_add_requester,
+ .remove_requester = ioat2_dca_remove_requester,
+ .get_tag = ioat2_dca_get_tag,
+ .dev_managed = ioat_dca_dev_managed,
+};
+
+static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
+{
+ int slots = 0;
+ u32 req;
+ u16 global_req_table;
+
+ global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET);
+ if (global_req_table == 0)
+ return 0;
+ do {
+ req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+ slots++;
+ } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+ return slots;
+}
+
+struct dca_provider * __devinit
+ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+ struct dca_provider *dca;
+ struct ioat_dca_priv *ioatdca;
+ int slots;
+ int i;
+ int err;
+ u32 tag_map;
+ u16 dca_offset;
+ u16 csi_fsb_control;
+ u16 pcie_control;
+ u8 bit;
+
+ if (!system_has_dca_enabled(pdev))
+ return NULL;
+
+ dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+ if (dca_offset == 0)
+ return NULL;
+
+ slots = ioat2_dca_count_dca_slots(iobase, dca_offset);
+ if (slots == 0)
+ return NULL;
+
+ dca = alloc_dca_provider(&ioat2_dca_ops,
+ sizeof(*ioatdca)
+ + (sizeof(struct ioat_dca_slot) * slots));
+ if (!dca)
+ return NULL;
+
+ ioatdca = dca_priv(dca);
+ ioatdca->iobase = iobase;
+ ioatdca->dca_base = iobase + dca_offset;
+ ioatdca->max_requesters = slots;
+
+ /* some bios might not know to turn these on */
+ csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+ if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) {
+ csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH;
+ writew(csi_fsb_control,
+ ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+ }
+ pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+ if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) {
+ pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR;
+ writew(pcie_control,
+ ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+ }
+
+
+ /* TODO version, compatibility and configuration checks */
+
+ /* copy out the APIC to DCA tag map */
+ tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET);
+ for (i = 0; i < 5; i++) {
+ bit = (tag_map >> (4 * i)) & 0x0f;
+ if (bit < 8)
+ ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID;
+ else
+ ioatdca->tag_map[i] = 0;
+ }
+
+ if (!dca2_tag_map_valid(ioatdca->tag_map)) {
+ dev_err(&pdev->dev, "APICID_TAG_MAP set incorrectly by BIOS, "
+ "disabling DCA\n");
+ free_dca_provider(dca);
+ return NULL;
+ }
+
+ err = register_dca_provider(dca, &pdev->dev);
+ if (err) {
+ free_dca_provider(dca);
+ return NULL;
+ }
+
+ return dca;
+}
+
+static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 id;
+ u16 global_req_table;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+ id = dcaid_from_pcidev(pdev);
+
+ if (ioatdca->requester_count == ioatdca->max_requesters)
+ return -ENODEV;
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == NULL) {
+ /* found an empty slot */
+ ioatdca->requester_count++;
+ ioatdca->req_slots[i].pdev = pdev;
+ ioatdca->req_slots[i].rid = id;
+ global_req_table =
+ readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+ writel(id | IOAT_DCA_GREQID_VALID,
+ ioatdca->iobase + global_req_table + (i * 4));
+ return i;
+ }
+ }
+ /* Error, ioatdma->requester_count is out of whack */
+ return -EFAULT;
+}
+
+static int ioat3_dca_remove_requester(struct dca_provider *dca,
+ struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 global_req_table;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == pdev) {
+ global_req_table =
+ readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+ writel(0, ioatdca->iobase + global_req_table + (i * 4));
+ ioatdca->req_slots[i].pdev = NULL;
+ ioatdca->req_slots[i].rid = 0;
+ ioatdca->requester_count--;
+ return i;
+ }
+ }
+ return -ENODEV;
+}
+
+static u8 ioat3_dca_get_tag(struct dca_provider *dca,
+ struct device *dev,
+ int cpu)
+{
+ u8 tag;
+
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ int i, apic_id, bit, value;
+ u8 entry;
+
+ tag = 0;
+ apic_id = cpu_physical_id(cpu);
+
+ for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+ entry = ioatdca->tag_map[i];
+ if (entry & DCA3_TAG_MAP_BIT_TO_SEL) {
+ bit = entry &
+ ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV);
+ value = (apic_id & (1 << bit)) ? 1 : 0;
+ } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) {
+ bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV;
+ value = (apic_id & (1 << bit)) ? 0 : 1;
+ } else {
+ value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0;
+ }
+ tag |= (value << i);
+ }
+
+ return tag;
+}
+
+static struct dca_ops ioat3_dca_ops = {
+ .add_requester = ioat3_dca_add_requester,
+ .remove_requester = ioat3_dca_remove_requester,
+ .get_tag = ioat3_dca_get_tag,
+ .dev_managed = ioat_dca_dev_managed,
+};
+
+static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
+{
+ int slots = 0;
+ u32 req;
+ u16 global_req_table;
+
+ global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET);
+ if (global_req_table == 0)
+ return 0;
+
+ do {
+ req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+ slots++;
+ } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+ return slots;
+}
+
+struct dca_provider * __devinit
+ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+ struct dca_provider *dca;
+ struct ioat_dca_priv *ioatdca;
+ int slots;
+ int i;
+ int err;
+ u16 dca_offset;
+ u16 csi_fsb_control;
+ u16 pcie_control;
+ u8 bit;
+
+ union {
+ u64 full;
+ struct {
+ u32 low;
+ u32 high;
+ };
+ } tag_map;
+
+ if (!system_has_dca_enabled(pdev))
+ return NULL;
+
+ dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+ if (dca_offset == 0)
+ return NULL;
+
+ slots = ioat3_dca_count_dca_slots(iobase, dca_offset);
+ if (slots == 0)
+ return NULL;
+
+ dca = alloc_dca_provider(&ioat3_dca_ops,
+ sizeof(*ioatdca)
+ + (sizeof(struct ioat_dca_slot) * slots));
+ if (!dca)
+ return NULL;
+
+ ioatdca = dca_priv(dca);
+ ioatdca->iobase = iobase;
+ ioatdca->dca_base = iobase + dca_offset;
+ ioatdca->max_requesters = slots;
+
+ /* some bios might not know to turn these on */
+ csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+ if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) {
+ csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH;
+ writew(csi_fsb_control,
+ ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+ }
+ pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+ if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) {
+ pcie_control |= IOAT3_PCI_CONTROL_MEMWR;
+ writew(pcie_control,
+ ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+ }
+
+
+ /* TODO version, compatibility and configuration checks */
+
+ /* copy out the APIC to DCA tag map */
+ tag_map.low =
+ readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW);
+ tag_map.high =
+ readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH);
+ for (i = 0; i < 8; i++) {
+ bit = tag_map.full >> (8 * i);
+ ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK;
+ }
+
+ err = register_dca_provider(dca, &pdev->dev);
+ if (err) {
+ free_dca_provider(dca);
+ return NULL;
+ }
+
+ return dca;
+}
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
new file mode 100644
index 00000000000..dcc4ab78b32
--- /dev/null
+++ b/drivers/dma/ioat/dma.c
@@ -0,0 +1,1238 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/i7300_idle.h>
+#include "dma.h"
+#include "registers.h"
+#include "hw.h"
+
+int ioat_pending_level = 4;
+module_param(ioat_pending_level, int, 0644);
+MODULE_PARM_DESC(ioat_pending_level,
+ "high-water mark for pushing ioat descriptors (default: 4)");
+
+/* internal functions */
+static void ioat1_cleanup(struct ioat_dma_chan *ioat);
+static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat);
+
+/**
+ * ioat_dma_do_interrupt - handler used for single vector interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
+{
+ struct ioatdma_device *instance = data;
+ struct ioat_chan_common *chan;
+ unsigned long attnstatus;
+ int bit;
+ u8 intrctrl;
+
+ intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
+
+ if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
+ return IRQ_NONE;
+
+ if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
+ writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+ return IRQ_NONE;
+ }
+
+ attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
+ for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
+ chan = ioat_chan_by_index(instance, bit);
+ tasklet_schedule(&chan->cleanup_task);
+ }
+
+ writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+ return IRQ_HANDLED;
+}
+
+/**
+ * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
+{
+ struct ioat_chan_common *chan = data;
+
+ tasklet_schedule(&chan->cleanup_task);
+
+ return IRQ_HANDLED;
+}
+
+static void ioat1_cleanup_tasklet(unsigned long data);
+
+/* common channel initialization */
+void ioat_init_channel(struct ioatdma_device *device,
+ struct ioat_chan_common *chan, int idx,
+ void (*timer_fn)(unsigned long),
+ void (*tasklet)(unsigned long),
+ unsigned long ioat)
+{
+ struct dma_device *dma = &device->common;
+
+ chan->device = device;
+ chan->reg_base = device->reg_base + (0x80 * (idx + 1));
+ spin_lock_init(&chan->cleanup_lock);
+ chan->common.device = dma;
+ list_add_tail(&chan->common.device_node, &dma->channels);
+ device->idx[idx] = chan;
+ init_timer(&chan->timer);
+ chan->timer.function = timer_fn;
+ chan->timer.data = ioat;
+ tasklet_init(&chan->cleanup_task, tasklet, ioat);
+ tasklet_disable(&chan->cleanup_task);
+}
+
+static void ioat1_timer_event(unsigned long data);
+
+/**
+ * ioat1_dma_enumerate_channels - find and initialize the device's channels
+ * @device: the device to be enumerated
+ */
+static int ioat1_enumerate_channels(struct ioatdma_device *device)
+{
+ u8 xfercap_scale;
+ u32 xfercap;
+ int i;
+ struct ioat_dma_chan *ioat;
+ struct device *dev = &device->pdev->dev;
+ struct dma_device *dma = &device->common;
+
+ INIT_LIST_HEAD(&dma->channels);
+ dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
+ dma->chancnt &= 0x1f; /* bits [4:0] valid */
+ if (dma->chancnt > ARRAY_SIZE(device->idx)) {
+ dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
+ dma->chancnt, ARRAY_SIZE(device->idx));
+ dma->chancnt = ARRAY_SIZE(device->idx);
+ }
+ xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
+ xfercap_scale &= 0x1f; /* bits [4:0] valid */
+ xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
+ dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap);
+
+#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
+ if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
+ dma->chancnt--;
+#endif
+ for (i = 0; i < dma->chancnt; i++) {
+ ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
+ if (!ioat)
+ break;
+
+ ioat_init_channel(device, &ioat->base, i,
+ ioat1_timer_event,
+ ioat1_cleanup_tasklet,
+ (unsigned long) ioat);
+ ioat->xfercap = xfercap;
+ spin_lock_init(&ioat->desc_lock);
+ INIT_LIST_HEAD(&ioat->free_desc);
+ INIT_LIST_HEAD(&ioat->used_desc);
+ }
+ dma->chancnt = i;
+ return i;
+}
+
+/**
+ * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
+ * descriptors to hw
+ * @chan: DMA channel handle
+ */
+static inline void
+__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat)
+{
+ void __iomem *reg_base = ioat->base.reg_base;
+
+ dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n",
+ __func__, ioat->pending);
+ ioat->pending = 0;
+ writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET);
+}
+
+static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(chan);
+
+ if (ioat->pending > 0) {
+ spin_lock_bh(&ioat->desc_lock);
+ __ioat1_dma_memcpy_issue_pending(ioat);
+ spin_unlock_bh(&ioat->desc_lock);
+ }
+}
+
+/**
+ * ioat1_reset_channel - restart a channel
+ * @ioat: IOAT DMA channel handle
+ */
+static void ioat1_reset_channel(struct ioat_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ void __iomem *reg_base = chan->reg_base;
+ u32 chansts, chanerr;
+
+ dev_warn(to_dev(chan), "reset\n");
+ chanerr = readl(reg_base + IOAT_CHANERR_OFFSET);
+ chansts = *chan->completion & IOAT_CHANSTS_STATUS;
+ if (chanerr) {
+ dev_err(to_dev(chan),
+ "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
+ chan_num(chan), chansts, chanerr);
+ writel(chanerr, reg_base + IOAT_CHANERR_OFFSET);
+ }
+
+ /*
+ * whack it upside the head with a reset
+ * and wait for things to settle out.
+ * force the pending count to a really big negative
+ * to make sure no one forces an issue_pending
+ * while we're waiting.
+ */
+
+ ioat->pending = INT_MIN;
+ writeb(IOAT_CHANCMD_RESET,
+ reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
+ set_bit(IOAT_RESET_PENDING, &chan->state);
+ mod_timer(&chan->timer, jiffies + RESET_DELAY);
+}
+
+static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct dma_chan *c = tx->chan;
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+ struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_desc_sw *first;
+ struct ioat_desc_sw *chain_tail;
+ dma_cookie_t cookie;
+
+ spin_lock_bh(&ioat->desc_lock);
+ /* cookie incr and addition to used_list must be atomic */
+ cookie = c->cookie;
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+ c->cookie = cookie;
+ tx->cookie = cookie;
+ dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
+
+ /* write address into NextDescriptor field of last desc in chain */
+ first = to_ioat_desc(desc->tx_list.next);
+ chain_tail = to_ioat_desc(ioat->used_desc.prev);
+ /* make descriptor updates globally visible before chaining */
+ wmb();
+ chain_tail->hw->next = first->txd.phys;
+ list_splice_tail_init(&desc->tx_list, &ioat->used_desc);
+ dump_desc_dbg(ioat, chain_tail);
+ dump_desc_dbg(ioat, first);
+
+ if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+ ioat->active += desc->hw->tx_cnt;
+ ioat->pending += desc->hw->tx_cnt;
+ if (ioat->pending >= ioat_pending_level)
+ __ioat1_dma_memcpy_issue_pending(ioat);
+ spin_unlock_bh(&ioat->desc_lock);
+
+ return cookie;
+}
+
+/**
+ * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
+ * @ioat: the channel supplying the memory pool for the descriptors
+ * @flags: allocation flags
+ */
+static struct ioat_desc_sw *
+ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags)
+{
+ struct ioat_dma_descriptor *desc;
+ struct ioat_desc_sw *desc_sw;
+ struct ioatdma_device *ioatdma_device;
+ dma_addr_t phys;
+
+ ioatdma_device = ioat->base.device;
+ desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
+ if (unlikely(!desc))
+ return NULL;
+
+ desc_sw = kzalloc(sizeof(*desc_sw), flags);
+ if (unlikely(!desc_sw)) {
+ pci_pool_free(ioatdma_device->dma_pool, desc, phys);
+ return NULL;
+ }
+
+ memset(desc, 0, sizeof(*desc));
+
+ INIT_LIST_HEAD(&desc_sw->tx_list);
+ dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common);
+ desc_sw->txd.tx_submit = ioat1_tx_submit;
+ desc_sw->hw = desc;
+ desc_sw->txd.phys = phys;
+ set_desc_id(desc_sw, -1);
+
+ return desc_sw;
+}
+
+static int ioat_initial_desc_count = 256;
+module_param(ioat_initial_desc_count, int, 0644);
+MODULE_PARM_DESC(ioat_initial_desc_count,
+ "ioat1: initial descriptors per channel (default: 256)");
+/**
+ * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors
+ * @chan: the channel to be filled out
+ */
+static int ioat1_dma_alloc_chan_resources(struct dma_chan *c)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_desc_sw *desc;
+ u32 chanerr;
+ int i;
+ LIST_HEAD(tmp_list);
+
+ /* have we already been set up? */
+ if (!list_empty(&ioat->free_desc))
+ return ioat->desccount;
+
+ /* Setup register to interrupt and write completion status on error */
+ writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+ chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ if (chanerr) {
+ dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
+ writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+ }
+
+ /* Allocate descriptors */
+ for (i = 0; i < ioat_initial_desc_count; i++) {
+ desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL);
+ if (!desc) {
+ dev_err(to_dev(chan), "Only %d initial descriptors\n", i);
+ break;
+ }
+ set_desc_id(desc, i);
+ list_add_tail(&desc->node, &tmp_list);
+ }
+ spin_lock_bh(&ioat->desc_lock);
+ ioat->desccount = i;
+ list_splice(&tmp_list, &ioat->free_desc);
+ spin_unlock_bh(&ioat->desc_lock);
+
+ /* allocate a completion writeback area */
+ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+ chan->completion = pci_pool_alloc(chan->device->completion_pool,
+ GFP_KERNEL, &chan->completion_dma);
+ memset(chan->completion, 0, sizeof(*chan->completion));
+ writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
+ chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+ writel(((u64) chan->completion_dma) >> 32,
+ chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+ tasklet_enable(&chan->cleanup_task);
+ ioat1_dma_start_null_desc(ioat); /* give chain to dma device */
+ dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
+ __func__, ioat->desccount);
+ return ioat->desccount;
+}
+
+/**
+ * ioat1_dma_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
+static void ioat1_dma_free_chan_resources(struct dma_chan *c)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioatdma_device *ioatdma_device = chan->device;
+ struct ioat_desc_sw *desc, *_desc;
+ int in_use_descs = 0;
+
+ /* Before freeing channel resources first check
+ * if they have been previously allocated for this channel.
+ */
+ if (ioat->desccount == 0)
+ return;
+
+ tasklet_disable(&chan->cleanup_task);
+ del_timer_sync(&chan->timer);
+ ioat1_cleanup(ioat);
+
+ /* Delay 100ms after reset to allow internal DMA logic to quiesce
+ * before removing DMA descriptor resources.
+ */
+ writeb(IOAT_CHANCMD_RESET,
+ chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
+ mdelay(100);
+
+ spin_lock_bh(&ioat->desc_lock);
+ list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) {
+ dev_dbg(to_dev(chan), "%s: freeing %d from used list\n",
+ __func__, desc_id(desc));
+ dump_desc_dbg(ioat, desc);
+ in_use_descs++;
+ list_del(&desc->node);
+ pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+ desc->txd.phys);
+ kfree(desc);
+ }
+ list_for_each_entry_safe(desc, _desc,
+ &ioat->free_desc, node) {
+ list_del(&desc->node);
+ pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+ desc->txd.phys);
+ kfree(desc);
+ }
+ spin_unlock_bh(&ioat->desc_lock);
+
+ pci_pool_free(ioatdma_device->completion_pool,
+ chan->completion,
+ chan->completion_dma);
+
+ /* one is ok since we left it on there on purpose */
+ if (in_use_descs > 1)
+ dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
+ in_use_descs - 1);
+
+ chan->last_completion = 0;
+ chan->completion_dma = 0;
+ ioat->pending = 0;
+ ioat->desccount = 0;
+}
+
+/**
+ * ioat1_dma_get_next_descriptor - return the next available descriptor
+ * @ioat: IOAT DMA channel handle
+ *
+ * Gets the next descriptor from the chain, and must be called with the
+ * channel's desc_lock held. Allocates more descriptors if the channel
+ * has run out.
+ */
+static struct ioat_desc_sw *
+ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat)
+{
+ struct ioat_desc_sw *new;
+
+ if (!list_empty(&ioat->free_desc)) {
+ new = to_ioat_desc(ioat->free_desc.next);
+ list_del(&new->node);
+ } else {
+ /* try to get another desc */
+ new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC);
+ if (!new) {
+ dev_err(to_dev(&ioat->base), "alloc failed\n");
+ return NULL;
+ }
+ }
+ dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n",
+ __func__, desc_id(new));
+ prefetch(new->hw);
+ return new;
+}
+
+static struct dma_async_tx_descriptor *
+ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+ struct ioat_desc_sw *desc;
+ size_t copy;
+ LIST_HEAD(chain);
+ dma_addr_t src = dma_src;
+ dma_addr_t dest = dma_dest;
+ size_t total_len = len;
+ struct ioat_dma_descriptor *hw = NULL;
+ int tx_cnt = 0;
+
+ spin_lock_bh(&ioat->desc_lock);
+ desc = ioat1_dma_get_next_descriptor(ioat);
+ do {
+ if (!desc)
+ break;
+
+ tx_cnt++;
+ copy = min_t(size_t, len, ioat->xfercap);
+
+ hw = desc->hw;
+ hw->size = copy;
+ hw->ctl = 0;
+ hw->src_addr = src;
+ hw->dst_addr = dest;
+
+ list_add_tail(&desc->node, &chain);
+
+ len -= copy;
+ dest += copy;
+ src += copy;
+ if (len) {
+ struct ioat_desc_sw *next;
+
+ async_tx_ack(&desc->txd);
+ next = ioat1_dma_get_next_descriptor(ioat);
+ hw->next = next ? next->txd.phys : 0;
+ dump_desc_dbg(ioat, desc);
+ desc = next;
+ } else
+ hw->next = 0;
+ } while (len);
+
+ if (!desc) {
+ struct ioat_chan_common *chan = &ioat->base;
+
+ dev_err(to_dev(chan),
+ "chan%d - get_next_desc failed\n", chan_num(chan));
+ list_splice(&chain, &ioat->free_desc);
+ spin_unlock_bh(&ioat->desc_lock);
+ return NULL;
+ }
+ spin_unlock_bh(&ioat->desc_lock);
+
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ list_splice(&chain, &desc->tx_list);
+ hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ hw->ctl_f.compl_write = 1;
+ hw->tx_cnt = tx_cnt;
+ dump_desc_dbg(ioat, desc);
+
+ return &desc->txd;
+}
+
+static void ioat1_cleanup_tasklet(unsigned long data)
+{
+ struct ioat_dma_chan *chan = (void *)data;
+
+ ioat1_cleanup(chan);
+ writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
+ size_t len, struct ioat_dma_descriptor *hw)
+{
+ struct pci_dev *pdev = chan->device->pdev;
+ size_t offset = len - hw->size;
+
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+ ioat_unmap(pdev, hw->dst_addr - offset, len,
+ PCI_DMA_FROMDEVICE, flags, 1);
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP))
+ ioat_unmap(pdev, hw->src_addr - offset, len,
+ PCI_DMA_TODEVICE, flags, 0);
+}
+
+unsigned long ioat_get_current_completion(struct ioat_chan_common *chan)
+{
+ unsigned long phys_complete;
+ u64 completion;
+
+ completion = *chan->completion;
+ phys_complete = ioat_chansts_to_addr(completion);
+
+ dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__,
+ (unsigned long long) phys_complete);
+
+ if (is_ioat_halted(completion)) {
+ u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ dev_err(to_dev(chan), "Channel halted, chanerr = %x\n",
+ chanerr);
+
+ /* TODO do something to salvage the situation */
+ }
+
+ return phys_complete;
+}
+
+bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
+ unsigned long *phys_complete)
+{
+ *phys_complete = ioat_get_current_completion(chan);
+ if (*phys_complete == chan->last_completion)
+ return false;
+ clear_bit(IOAT_COMPLETION_ACK, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+ return true;
+}
+
+static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ struct list_head *_desc, *n;
+ struct dma_async_tx_descriptor *tx;
+
+ dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n",
+ __func__, phys_complete);
+ list_for_each_safe(_desc, n, &ioat->used_desc) {
+ struct ioat_desc_sw *desc;
+
+ prefetch(n);
+ desc = list_entry(_desc, typeof(*desc), node);
+ tx = &desc->txd;
+ /*
+ * Incoming DMA requests may use multiple descriptors,
+ * due to exceeding xfercap, perhaps. If so, only the
+ * last one will have a cookie, and require unmapping.
+ */
+ dump_desc_dbg(ioat, desc);
+ if (tx->cookie) {
+ chan->completed_cookie = tx->cookie;
+ tx->cookie = 0;
+ ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+ ioat->active -= desc->hw->tx_cnt;
+ if (tx->callback) {
+ tx->callback(tx->callback_param);
+ tx->callback = NULL;
+ }
+ }
+
+ if (tx->phys != phys_complete) {
+ /*
+ * a completed entry, but not the last, so clean
+ * up if the client is done with the descriptor
+ */
+ if (async_tx_test_ack(tx))
+ list_move_tail(&desc->node, &ioat->free_desc);
+ } else {
+ /*
+ * last used desc. Do not remove, so we can
+ * append from it.
+ */
+
+ /* if nothing else is pending, cancel the
+ * completion timeout
+ */
+ if (n == &ioat->used_desc) {
+ dev_dbg(to_dev(chan),
+ "%s cancel completion timeout\n",
+ __func__);
+ clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+ }
+
+ /* TODO check status bits? */
+ break;
+ }
+ }
+
+ chan->last_completion = phys_complete;
+}
+
+/**
+ * ioat1_cleanup - cleanup up finished descriptors
+ * @chan: ioat channel to be cleaned up
+ *
+ * To prevent lock contention we defer cleanup when the locks are
+ * contended with a terminal timeout that forces cleanup and catches
+ * completion notification errors.
+ */
+static void ioat1_cleanup(struct ioat_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ unsigned long phys_complete;
+
+ prefetch(chan->completion);
+
+ if (!spin_trylock_bh(&chan->cleanup_lock))
+ return;
+
+ if (!ioat_cleanup_preamble(chan, &phys_complete)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ if (!spin_trylock_bh(&ioat->desc_lock)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ __cleanup(ioat, phys_complete);
+
+ spin_unlock_bh(&ioat->desc_lock);
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static void ioat1_timer_event(unsigned long data)
+{
+ struct ioat_dma_chan *ioat = (void *) data;
+ struct ioat_chan_common *chan = &ioat->base;
+
+ dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state);
+
+ spin_lock_bh(&chan->cleanup_lock);
+ if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) {
+ struct ioat_desc_sw *desc;
+
+ spin_lock_bh(&ioat->desc_lock);
+
+ /* restart active descriptors */
+ desc = to_ioat_desc(ioat->used_desc.prev);
+ ioat_set_chainaddr(ioat, desc->txd.phys);
+ ioat_start(chan);
+
+ ioat->pending = 0;
+ set_bit(IOAT_COMPLETION_PENDING, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ spin_unlock_bh(&ioat->desc_lock);
+ } else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+ unsigned long phys_complete;
+
+ spin_lock_bh(&ioat->desc_lock);
+ /* if we haven't made progress and we have already
+ * acknowledged a pending completion once, then be more
+ * forceful with a restart
+ */
+ if (ioat_cleanup_preamble(chan, &phys_complete))
+ __cleanup(ioat, phys_complete);
+ else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
+ ioat1_reset_channel(ioat);
+ else {
+ u64 status = ioat_chansts(chan);
+
+ /* manually update the last completion address */
+ if (ioat_chansts_to_addr(status) != 0)
+ *chan->completion = status;
+
+ set_bit(IOAT_COMPLETION_ACK, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ }
+ spin_unlock_bh(&ioat->desc_lock);
+ }
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static enum dma_status
+ioat1_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+
+ if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
+ return DMA_SUCCESS;
+
+ ioat1_cleanup(ioat);
+
+ return ioat_is_complete(c, cookie, done, used);
+}
+
+static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_desc_sw *desc;
+ struct ioat_dma_descriptor *hw;
+
+ spin_lock_bh(&ioat->desc_lock);
+
+ desc = ioat1_dma_get_next_descriptor(ioat);
+
+ if (!desc) {
+ dev_err(to_dev(chan),
+ "Unable to start null desc - get next desc failed\n");
+ spin_unlock_bh(&ioat->desc_lock);
+ return;
+ }
+
+ hw = desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = 1;
+ hw->ctl_f.compl_write = 1;
+ /* set size to non-zero value (channel returns error when size is 0) */
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ hw->src_addr = 0;
+ hw->dst_addr = 0;
+ async_tx_ack(&desc->txd);
+ hw->next = 0;
+ list_add_tail(&desc->node, &ioat->used_desc);
+ dump_desc_dbg(ioat, desc);
+
+ ioat_set_chainaddr(ioat, desc->txd.phys);
+ ioat_start(chan);
+ spin_unlock_bh(&ioat->desc_lock);
+}
+
+/*
+ * Perform a IOAT transaction to verify the HW works.
+ */
+#define IOAT_TEST_SIZE 2000
+
+static void __devinit ioat_dma_test_callback(void *dma_async_param)
+{
+ struct completion *cmp = dma_async_param;
+
+ complete(cmp);
+}
+
+/**
+ * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
+ * @device: device to be tested
+ */
+int __devinit ioat_dma_self_test(struct ioatdma_device *device)
+{
+ int i;
+ u8 *src;
+ u8 *dest;
+ struct dma_device *dma = &device->common;
+ struct device *dev = &device->pdev->dev;
+ struct dma_chan *dma_chan;
+ struct dma_async_tx_descriptor *tx;
+ dma_addr_t dma_dest, dma_src;
+ dma_cookie_t cookie;
+ int err = 0;
+ struct completion cmp;
+ unsigned long tmo;
+ unsigned long flags;
+
+ src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+ dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+ if (!dest) {
+ kfree(src);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffer */
+ for (i = 0; i < IOAT_TEST_SIZE; i++)
+ src[i] = (u8)i;
+
+ /* Start copy, using first DMA channel */
+ dma_chan = container_of(dma->channels.next, struct dma_chan,
+ device_node);
+ if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+ dev_err(dev, "selftest cannot allocate chan resource\n");
+ err = -ENODEV;
+ goto out;
+ }
+
+ dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
+ dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
+ flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE |
+ DMA_PREP_INTERRUPT;
+ tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
+ IOAT_TEST_SIZE, flags);
+ if (!tx) {
+ dev_err(dev, "Self-test prep failed, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test setup failed, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (tmo == 0 ||
+ dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL)
+ != DMA_SUCCESS) {
+ dev_err(dev, "Self-test copy timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ if (memcmp(src, dest, IOAT_TEST_SIZE)) {
+ dev_err(dev, "Self-test copy failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ dma->device_free_chan_resources(dma_chan);
+out:
+ kfree(src);
+ kfree(dest);
+ return err;
+}
+
+static char ioat_interrupt_style[32] = "msix";
+module_param_string(ioat_interrupt_style, ioat_interrupt_style,
+ sizeof(ioat_interrupt_style), 0644);
+MODULE_PARM_DESC(ioat_interrupt_style,
+ "set ioat interrupt style: msix (default), "
+ "msix-single-vector, msi, intx)");
+
+/**
+ * ioat_dma_setup_interrupts - setup interrupt handler
+ * @device: ioat device
+ */
+static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
+{
+ struct ioat_chan_common *chan;
+ struct pci_dev *pdev = device->pdev;
+ struct device *dev = &pdev->dev;
+ struct msix_entry *msix;
+ int i, j, msixcnt;
+ int err = -EINVAL;
+ u8 intrctrl = 0;
+
+ if (!strcmp(ioat_interrupt_style, "msix"))
+ goto msix;
+ if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
+ goto msix_single_vector;
+ if (!strcmp(ioat_interrupt_style, "msi"))
+ goto msi;
+ if (!strcmp(ioat_interrupt_style, "intx"))
+ goto intx;
+ dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
+ goto err_no_irq;
+
+msix:
+ /* The number of MSI-X vectors should equal the number of channels */
+ msixcnt = device->common.chancnt;
+ for (i = 0; i < msixcnt; i++)
+ device->msix_entries[i].entry = i;
+
+ err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
+ if (err < 0)
+ goto msi;
+ if (err > 0)
+ goto msix_single_vector;
+
+ for (i = 0; i < msixcnt; i++) {
+ msix = &device->msix_entries[i];
+ chan = ioat_chan_by_index(device, i);
+ err = devm_request_irq(dev, msix->vector,
+ ioat_dma_do_interrupt_msix, 0,
+ "ioat-msix", chan);
+ if (err) {
+ for (j = 0; j < i; j++) {
+ msix = &device->msix_entries[j];
+ chan = ioat_chan_by_index(device, j);
+ devm_free_irq(dev, msix->vector, chan);
+ }
+ goto msix_single_vector;
+ }
+ }
+ intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+ goto done;
+
+msix_single_vector:
+ msix = &device->msix_entries[0];
+ msix->entry = 0;
+ err = pci_enable_msix(pdev, device->msix_entries, 1);
+ if (err)
+ goto msi;
+
+ err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
+ "ioat-msix", device);
+ if (err) {
+ pci_disable_msix(pdev);
+ goto msi;
+ }
+ goto done;
+
+msi:
+ err = pci_enable_msi(pdev);
+ if (err)
+ goto intx;
+
+ err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
+ "ioat-msi", device);
+ if (err) {
+ pci_disable_msi(pdev);
+ goto intx;
+ }
+ goto done;
+
+intx:
+ err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
+ IRQF_SHARED, "ioat-intx", device);
+ if (err)
+ goto err_no_irq;
+
+done:
+ if (device->intr_quirk)
+ device->intr_quirk(device);
+ intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
+ writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
+ return 0;
+
+err_no_irq:
+ /* Disable all interrupt generation */
+ writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+ dev_err(dev, "no usable interrupts\n");
+ return err;
+}
+
+static void ioat_disable_interrupts(struct ioatdma_device *device)
+{
+ /* Disable all interrupt generation */
+ writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+}
+
+int __devinit ioat_probe(struct ioatdma_device *device)
+{
+ int err = -ENODEV;
+ struct dma_device *dma = &device->common;
+ struct pci_dev *pdev = device->pdev;
+ struct device *dev = &pdev->dev;
+
+ /* DMA coherent memory pool for DMA descriptor allocations */
+ device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
+ sizeof(struct ioat_dma_descriptor),
+ 64, 0);
+ if (!device->dma_pool) {
+ err = -ENOMEM;
+ goto err_dma_pool;
+ }
+
+ device->completion_pool = pci_pool_create("completion_pool", pdev,
+ sizeof(u64), SMP_CACHE_BYTES,
+ SMP_CACHE_BYTES);
+
+ if (!device->completion_pool) {
+ err = -ENOMEM;
+ goto err_completion_pool;
+ }
+
+ device->enumerate_channels(device);
+
+ dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+ dma->dev = &pdev->dev;
+
+ if (!dma->chancnt) {
+ dev_err(dev, "channel enumeration error\n");
+ goto err_setup_interrupts;
+ }
+
+ err = ioat_dma_setup_interrupts(device);
+ if (err)
+ goto err_setup_interrupts;
+
+ err = device->self_test(device);
+ if (err)
+ goto err_self_test;
+
+ return 0;
+
+err_self_test:
+ ioat_disable_interrupts(device);
+err_setup_interrupts:
+ pci_pool_destroy(device->completion_pool);
+err_completion_pool:
+ pci_pool_destroy(device->dma_pool);
+err_dma_pool:
+ return err;
+}
+
+int __devinit ioat_register(struct ioatdma_device *device)
+{
+ int err = dma_async_device_register(&device->common);
+
+ if (err) {
+ ioat_disable_interrupts(device);
+ pci_pool_destroy(device->completion_pool);
+ pci_pool_destroy(device->dma_pool);
+ }
+
+ return err;
+}
+
+/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */
+static void ioat1_intr_quirk(struct ioatdma_device *device)
+{
+ struct pci_dev *pdev = device->pdev;
+ u32 dmactrl;
+
+ pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
+ if (pdev->msi_enabled)
+ dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
+ else
+ dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN;
+ pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl);
+}
+
+static ssize_t ring_size_show(struct dma_chan *c, char *page)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+
+ return sprintf(page, "%d\n", ioat->desccount);
+}
+static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
+
+static ssize_t ring_active_show(struct dma_chan *c, char *page)
+{
+ struct ioat_dma_chan *ioat = to_ioat_chan(c);
+
+ return sprintf(page, "%d\n", ioat->active);
+}
+static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
+
+static ssize_t cap_show(struct dma_chan *c, char *page)
+{
+ struct dma_device *dma = c->device;
+
+ return sprintf(page, "copy%s%s%s%s%s%s\n",
+ dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
+ dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
+ dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
+ dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
+ dma_has_cap(DMA_MEMSET, dma->cap_mask) ? " fill" : "",
+ dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "");
+
+}
+struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap);
+
+static ssize_t version_show(struct dma_chan *c, char *page)
+{
+ struct dma_device *dma = c->device;
+ struct ioatdma_device *device = to_ioatdma_device(dma);
+
+ return sprintf(page, "%d.%d\n",
+ device->version >> 4, device->version & 0xf);
+}
+struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version);
+
+static struct attribute *ioat1_attrs[] = {
+ &ring_size_attr.attr,
+ &ring_active_attr.attr,
+ &ioat_cap_attr.attr,
+ &ioat_version_attr.attr,
+ NULL,
+};
+
+static ssize_t
+ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+ struct ioat_sysfs_entry *entry;
+ struct ioat_chan_common *chan;
+
+ entry = container_of(attr, struct ioat_sysfs_entry, attr);
+ chan = container_of(kobj, struct ioat_chan_common, kobj);
+
+ if (!entry->show)
+ return -EIO;
+ return entry->show(&chan->common, page);
+}
+
+struct sysfs_ops ioat_sysfs_ops = {
+ .show = ioat_attr_show,
+};
+
+static struct kobj_type ioat1_ktype = {
+ .sysfs_ops = &ioat_sysfs_ops,
+ .default_attrs = ioat1_attrs,
+};
+
+void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type)
+{
+ struct dma_device *dma = &device->common;
+ struct dma_chan *c;
+
+ list_for_each_entry(c, &dma->channels, device_node) {
+ struct ioat_chan_common *chan = to_chan_common(c);
+ struct kobject *parent = &c->dev->device.kobj;
+ int err;
+
+ err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata");
+ if (err) {
+ dev_warn(to_dev(chan),
+ "sysfs init error (%d), continuing...\n", err);
+ kobject_put(&chan->kobj);
+ set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state);
+ }
+ }
+}
+
+void ioat_kobject_del(struct ioatdma_device *device)
+{
+ struct dma_device *dma = &device->common;
+ struct dma_chan *c;
+
+ list_for_each_entry(c, &dma->channels, device_node) {
+ struct ioat_chan_common *chan = to_chan_common(c);
+
+ if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) {
+ kobject_del(&chan->kobj);
+ kobject_put(&chan->kobj);
+ }
+ }
+}
+
+int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca)
+{
+ struct pci_dev *pdev = device->pdev;
+ struct dma_device *dma;
+ int err;
+
+ device->intr_quirk = ioat1_intr_quirk;
+ device->enumerate_channels = ioat1_enumerate_channels;
+ device->self_test = ioat_dma_self_test;
+ dma = &device->common;
+ dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
+ dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
+ dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources;
+ dma->device_free_chan_resources = ioat1_dma_free_chan_resources;
+ dma->device_is_tx_complete = ioat1_dma_is_complete;
+
+ err = ioat_probe(device);
+ if (err)
+ return err;
+ ioat_set_tcp_copy_break(4096);
+ err = ioat_register(device);
+ if (err)
+ return err;
+ ioat_kobject_add(device, &ioat1_ktype);
+
+ if (dca)
+ device->dca = ioat_dca_init(pdev, device->reg_base);
+
+ return err;
+}
+
+void __devexit ioat_dma_remove(struct ioatdma_device *device)
+{
+ struct dma_device *dma = &device->common;
+
+ ioat_disable_interrupts(device);
+
+ ioat_kobject_del(device);
+
+ dma_async_device_unregister(dma);
+
+ pci_pool_destroy(device->dma_pool);
+ pci_pool_destroy(device->completion_pool);
+
+ INIT_LIST_HEAD(&dma->channels);
+}
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
new file mode 100644
index 00000000000..bbc3e78ef33
--- /dev/null
+++ b/drivers/dma/ioat/dma.h
@@ -0,0 +1,353 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_H
+#define IOATDMA_H
+
+#include <linux/dmaengine.h>
+#include "hw.h"
+#include "registers.h"
+#include <linux/init.h>
+#include <linux/dmapool.h>
+#include <linux/cache.h>
+#include <linux/pci_ids.h>
+#include <net/tcp.h>
+
+#define IOAT_DMA_VERSION "4.00"
+
+#define IOAT_LOW_COMPLETION_MASK 0xffffffc0
+#define IOAT_DMA_DCA_ANY_CPU ~0
+
+#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
+#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
+#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd)
+#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev)
+
+#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
+/**
+ * struct ioatdma_device - internal representation of a IOAT device
+ * @pdev: PCI-Express device
+ * @reg_base: MMIO register space base address
+ * @dma_pool: for allocating DMA descriptors
+ * @common: embedded struct dma_device
+ * @version: version of ioatdma device
+ * @msix_entries: irq handlers
+ * @idx: per channel data
+ * @dca: direct cache access context
+ * @intr_quirk: interrupt setup quirk (for ioat_v1 devices)
+ * @enumerate_channels: hw version specific channel enumeration
+ * @reset_hw: hw version specific channel (re)initialization
+ * @cleanup_tasklet: select between the v2 and v3 cleanup routines
+ * @timer_fn: select between the v2 and v3 timer watchdog routines
+ * @self_test: hardware version specific self test for each supported op type
+ *
+ * Note: the v3 cleanup routine supports raid operations
+ */
+struct ioatdma_device {
+ struct pci_dev *pdev;
+ void __iomem *reg_base;
+ struct pci_pool *dma_pool;
+ struct pci_pool *completion_pool;
+ struct dma_device common;
+ u8 version;
+ struct msix_entry msix_entries[4];
+ struct ioat_chan_common *idx[4];
+ struct dca_provider *dca;
+ void (*intr_quirk)(struct ioatdma_device *device);
+ int (*enumerate_channels)(struct ioatdma_device *device);
+ int (*reset_hw)(struct ioat_chan_common *chan);
+ void (*cleanup_tasklet)(unsigned long data);
+ void (*timer_fn)(unsigned long data);
+ int (*self_test)(struct ioatdma_device *device);
+};
+
+struct ioat_chan_common {
+ struct dma_chan common;
+ void __iomem *reg_base;
+ unsigned long last_completion;
+ spinlock_t cleanup_lock;
+ dma_cookie_t completed_cookie;
+ unsigned long state;
+ #define IOAT_COMPLETION_PENDING 0
+ #define IOAT_COMPLETION_ACK 1
+ #define IOAT_RESET_PENDING 2
+ #define IOAT_KOBJ_INIT_FAIL 3
+ struct timer_list timer;
+ #define COMPLETION_TIMEOUT msecs_to_jiffies(100)
+ #define IDLE_TIMEOUT msecs_to_jiffies(2000)
+ #define RESET_DELAY msecs_to_jiffies(100)
+ struct ioatdma_device *device;
+ dma_addr_t completion_dma;
+ u64 *completion;
+ struct tasklet_struct cleanup_task;
+ struct kobject kobj;
+};
+
+struct ioat_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct dma_chan *, char *);
+};
+
+/**
+ * struct ioat_dma_chan - internal representation of a DMA channel
+ */
+struct ioat_dma_chan {
+ struct ioat_chan_common base;
+
+ size_t xfercap; /* XFERCAP register value expanded out */
+
+ spinlock_t desc_lock;
+ struct list_head free_desc;
+ struct list_head used_desc;
+
+ int pending;
+ u16 desccount;
+ u16 active;
+};
+
+static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c)
+{
+ return container_of(c, struct ioat_chan_common, common);
+}
+
+static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c)
+{
+ struct ioat_chan_common *chan = to_chan_common(c);
+
+ return container_of(chan, struct ioat_dma_chan, base);
+}
+
+/**
+ * ioat_is_complete - poll the status of an ioat transaction
+ * @c: channel handle
+ * @cookie: transaction identifier
+ * @done: if set, updated with last completed transaction
+ * @used: if set, updated with last used transaction
+ */
+static inline enum dma_status
+ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct ioat_chan_common *chan = to_chan_common(c);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+
+ last_used = c->cookie;
+ last_complete = chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+/* wrapper around hardware descriptor format + additional software fields */
+
+/**
+ * struct ioat_desc_sw - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor (for memcpy)
+ * @node: this descriptor will either be on the free list,
+ * or attached to a transaction list (tx_list)
+ * @txd: the generic software descriptor for all engines
+ * @id: identifier for debug
+ */
+struct ioat_desc_sw {
+ struct ioat_dma_descriptor *hw;
+ struct list_head node;
+ size_t len;
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor txd;
+ #ifdef DEBUG
+ int id;
+ #endif
+};
+
+#ifdef DEBUG
+#define set_desc_id(desc, i) ((desc)->id = (i))
+#define desc_id(desc) ((desc)->id)
+#else
+#define set_desc_id(desc, i)
+#define desc_id(desc) (0)
+#endif
+
+static inline void
+__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw,
+ struct dma_async_tx_descriptor *tx, int id)
+{
+ struct device *dev = to_dev(chan);
+
+ dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x"
+ " ctl: %#x (op: %d int_en: %d compl: %d)\n", id,
+ (unsigned long long) tx->phys,
+ (unsigned long long) hw->next, tx->cookie, tx->flags,
+ hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write);
+}
+
+#define dump_desc_dbg(c, d) \
+ ({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; })
+
+static inline void ioat_set_tcp_copy_break(unsigned long copybreak)
+{
+ #ifdef CONFIG_NET_DMA
+ sysctl_tcp_dma_copybreak = copybreak;
+ #endif
+}
+
+static inline struct ioat_chan_common *
+ioat_chan_by_index(struct ioatdma_device *device, int index)
+{
+ return device->idx[index];
+}
+
+static inline u64 ioat_chansts(struct ioat_chan_common *chan)
+{
+ u8 ver = chan->device->version;
+ u64 status;
+ u32 status_lo;
+
+ /* We need to read the low address first as this causes the
+ * chipset to latch the upper bits for the subsequent read
+ */
+ status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver));
+ status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver));
+ status <<= 32;
+ status |= status_lo;
+
+ return status;
+}
+
+static inline void ioat_start(struct ioat_chan_common *chan)
+{
+ u8 ver = chan->device->version;
+
+ writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline u64 ioat_chansts_to_addr(u64 status)
+{
+ return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+}
+
+static inline u32 ioat_chanerr(struct ioat_chan_common *chan)
+{
+ return readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+}
+
+static inline void ioat_suspend(struct ioat_chan_common *chan)
+{
+ u8 ver = chan->device->version;
+
+ writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline void ioat_reset(struct ioat_chan_common *chan)
+{
+ u8 ver = chan->device->version;
+
+ writeb(IOAT_CHANCMD_RESET, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline bool ioat_reset_pending(struct ioat_chan_common *chan)
+{
+ u8 ver = chan->device->version;
+ u8 cmd;
+
+ cmd = readb(chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+ return (cmd & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET;
+}
+
+static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+
+ writel(addr & 0x00000000FFFFFFFF,
+ chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
+ writel(addr >> 32,
+ chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
+}
+
+static inline bool is_ioat_active(unsigned long status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
+}
+
+static inline bool is_ioat_idle(unsigned long status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE);
+}
+
+static inline bool is_ioat_halted(unsigned long status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
+}
+
+static inline bool is_ioat_suspended(unsigned long status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
+}
+
+/* channel was fatally programmed */
+static inline bool is_ioat_bug(unsigned long err)
+{
+ return !!err;
+}
+
+static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
+ int direction, enum dma_ctrl_flags flags, bool dst)
+{
+ if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
+ (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
+ pci_unmap_single(pdev, addr, len, direction);
+ else
+ pci_unmap_page(pdev, addr, len, direction);
+}
+
+int __devinit ioat_probe(struct ioatdma_device *device);
+int __devinit ioat_register(struct ioatdma_device *device);
+int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca);
+int __devinit ioat_dma_self_test(struct ioatdma_device *device);
+void __devexit ioat_dma_remove(struct ioatdma_device *device);
+struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev,
+ void __iomem *iobase);
+unsigned long ioat_get_current_completion(struct ioat_chan_common *chan);
+void ioat_init_channel(struct ioatdma_device *device,
+ struct ioat_chan_common *chan, int idx,
+ void (*timer_fn)(unsigned long),
+ void (*tasklet)(unsigned long),
+ unsigned long ioat);
+void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
+ size_t len, struct ioat_dma_descriptor *hw);
+bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
+ unsigned long *phys_complete);
+void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
+void ioat_kobject_del(struct ioatdma_device *device);
+extern struct sysfs_ops ioat_sysfs_ops;
+extern struct ioat_sysfs_entry ioat_version_attr;
+extern struct ioat_sysfs_entry ioat_cap_attr;
+#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
new file mode 100644
index 00000000000..5cc37afe2bc
--- /dev/null
+++ b/drivers/dma/ioat/dma_v2.c
@@ -0,0 +1,908 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine (versions >= 2), which
+ * does asynchronous data movement and checksumming operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/i7300_idle.h>
+#include "dma.h"
+#include "dma_v2.h"
+#include "registers.h"
+#include "hw.h"
+
+int ioat_ring_alloc_order = 8;
+module_param(ioat_ring_alloc_order, int, 0644);
+MODULE_PARM_DESC(ioat_ring_alloc_order,
+ "ioat2+: allocate 2^n descriptors per channel"
+ " (default: 8 max: 16)");
+static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER;
+module_param(ioat_ring_max_alloc_order, int, 0644);
+MODULE_PARM_DESC(ioat_ring_max_alloc_order,
+ "ioat2+: upper limit for ring size (default: 16)");
+
+void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
+{
+ void * __iomem reg_base = ioat->base.reg_base;
+
+ ioat->pending = 0;
+ ioat->dmacount += ioat2_ring_pending(ioat);
+ ioat->issued = ioat->head;
+ /* make descriptor updates globally visible before notifying channel */
+ wmb();
+ writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+ dev_dbg(to_dev(&ioat->base),
+ "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+ __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
+}
+
+void ioat2_issue_pending(struct dma_chan *chan)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(chan);
+
+ spin_lock_bh(&ioat->ring_lock);
+ if (ioat->pending == 1)
+ __ioat2_issue_pending(ioat);
+ spin_unlock_bh(&ioat->ring_lock);
+}
+
+/**
+ * ioat2_update_pending - log pending descriptors
+ * @ioat: ioat2+ channel
+ *
+ * set pending to '1' unless pending is already set to '2', pending == 2
+ * indicates that submission is temporarily blocked due to an in-flight
+ * reset. If we are already above the ioat_pending_level threshold then
+ * just issue pending.
+ *
+ * called with ring_lock held
+ */
+static void ioat2_update_pending(struct ioat2_dma_chan *ioat)
+{
+ if (unlikely(ioat->pending == 2))
+ return;
+ else if (ioat2_ring_pending(ioat) > ioat_pending_level)
+ __ioat2_issue_pending(ioat);
+ else
+ ioat->pending = 1;
+}
+
+static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_ring_ent *desc;
+ struct ioat_dma_descriptor *hw;
+ int idx;
+
+ if (ioat2_ring_space(ioat) < 1) {
+ dev_err(to_dev(&ioat->base),
+ "Unable to start null desc - ring full\n");
+ return;
+ }
+
+ dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n",
+ __func__, ioat->head, ioat->tail, ioat->issued);
+ idx = ioat2_desc_alloc(ioat, 1);
+ desc = ioat2_get_ring_ent(ioat, idx);
+
+ hw = desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = 1;
+ hw->ctl_f.compl_write = 1;
+ /* set size to non-zero value (channel returns error when size is 0) */
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ hw->src_addr = 0;
+ hw->dst_addr = 0;
+ async_tx_ack(&desc->txd);
+ ioat2_set_chainaddr(ioat, desc->txd.phys);
+ dump_desc_dbg(ioat, desc);
+ __ioat2_issue_pending(ioat);
+}
+
+static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
+{
+ spin_lock_bh(&ioat->ring_lock);
+ __ioat2_start_null_desc(ioat);
+ spin_unlock_bh(&ioat->ring_lock);
+}
+
+static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ struct dma_async_tx_descriptor *tx;
+ struct ioat_ring_ent *desc;
+ bool seen_current = false;
+ u16 active;
+ int i;
+
+ dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
+ __func__, ioat->head, ioat->tail, ioat->issued);
+
+ active = ioat2_ring_active(ioat);
+ for (i = 0; i < active && !seen_current; i++) {
+ prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
+ desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
+ tx = &desc->txd;
+ dump_desc_dbg(ioat, desc);
+ if (tx->cookie) {
+ ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+ chan->completed_cookie = tx->cookie;
+ tx->cookie = 0;
+ if (tx->callback) {
+ tx->callback(tx->callback_param);
+ tx->callback = NULL;
+ }
+ }
+
+ if (tx->phys == phys_complete)
+ seen_current = true;
+ }
+ ioat->tail += i;
+ BUG_ON(!seen_current); /* no active descs have written a completion? */
+
+ chan->last_completion = phys_complete;
+ if (ioat->head == ioat->tail) {
+ dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
+ __func__);
+ clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+ mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+ }
+}
+
+/**
+ * ioat2_cleanup - clean finished descriptors (advance tail pointer)
+ * @chan: ioat channel to be cleaned up
+ */
+static void ioat2_cleanup(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ unsigned long phys_complete;
+
+ prefetch(chan->completion);
+
+ if (!spin_trylock_bh(&chan->cleanup_lock))
+ return;
+
+ if (!ioat_cleanup_preamble(chan, &phys_complete)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ if (!spin_trylock_bh(&ioat->ring_lock)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ __cleanup(ioat, phys_complete);
+
+ spin_unlock_bh(&ioat->ring_lock);
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+void ioat2_cleanup_tasklet(unsigned long data)
+{
+ struct ioat2_dma_chan *ioat = (void *) data;
+
+ ioat2_cleanup(ioat);
+ writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+void __ioat2_restart_chan(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+
+ /* set the tail to be re-issued */
+ ioat->issued = ioat->tail;
+ ioat->dmacount = 0;
+ set_bit(IOAT_COMPLETION_PENDING, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+ dev_dbg(to_dev(chan),
+ "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+ __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
+
+ if (ioat2_ring_pending(ioat)) {
+ struct ioat_ring_ent *desc;
+
+ desc = ioat2_get_ring_ent(ioat, ioat->tail);
+ ioat2_set_chainaddr(ioat, desc->txd.phys);
+ __ioat2_issue_pending(ioat);
+ } else
+ __ioat2_start_null_desc(ioat);
+}
+
+int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo)
+{
+ unsigned long end = jiffies + tmo;
+ int err = 0;
+ u32 status;
+
+ status = ioat_chansts(chan);
+ if (is_ioat_active(status) || is_ioat_idle(status))
+ ioat_suspend(chan);
+ while (is_ioat_active(status) || is_ioat_idle(status)) {
+ if (tmo && time_after(jiffies, end)) {
+ err = -ETIMEDOUT;
+ break;
+ }
+ status = ioat_chansts(chan);
+ cpu_relax();
+ }
+
+ return err;
+}
+
+int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo)
+{
+ unsigned long end = jiffies + tmo;
+ int err = 0;
+
+ ioat_reset(chan);
+ while (ioat_reset_pending(chan)) {
+ if (end && time_after(jiffies, end)) {
+ err = -ETIMEDOUT;
+ break;
+ }
+ cpu_relax();
+ }
+
+ return err;
+}
+
+static void ioat2_restart_channel(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ unsigned long phys_complete;
+
+ ioat2_quiesce(chan, 0);
+ if (ioat_cleanup_preamble(chan, &phys_complete))
+ __cleanup(ioat, phys_complete);
+
+ __ioat2_restart_chan(ioat);
+}
+
+void ioat2_timer_event(unsigned long data)
+{
+ struct ioat2_dma_chan *ioat = (void *) data;
+ struct ioat_chan_common *chan = &ioat->base;
+
+ spin_lock_bh(&chan->cleanup_lock);
+ if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+ unsigned long phys_complete;
+ u64 status;
+
+ spin_lock_bh(&ioat->ring_lock);
+ status = ioat_chansts(chan);
+
+ /* when halted due to errors check for channel
+ * programming errors before advancing the completion state
+ */
+ if (is_ioat_halted(status)) {
+ u32 chanerr;
+
+ chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ dev_err(to_dev(chan), "%s: Channel halted (%x)\n",
+ __func__, chanerr);
+ BUG_ON(is_ioat_bug(chanerr));
+ }
+
+ /* if we haven't made progress and we have already
+ * acknowledged a pending completion once, then be more
+ * forceful with a restart
+ */
+ if (ioat_cleanup_preamble(chan, &phys_complete))
+ __cleanup(ioat, phys_complete);
+ else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
+ ioat2_restart_channel(ioat);
+ else {
+ set_bit(IOAT_COMPLETION_ACK, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ }
+ spin_unlock_bh(&ioat->ring_lock);
+ } else {
+ u16 active;
+
+ /* if the ring is idle, empty, and oversized try to step
+ * down the size
+ */
+ spin_lock_bh(&ioat->ring_lock);
+ active = ioat2_ring_active(ioat);
+ if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
+ reshape_ring(ioat, ioat->alloc_order-1);
+ spin_unlock_bh(&ioat->ring_lock);
+
+ /* keep shrinking until we get back to our minimum
+ * default size
+ */
+ if (ioat->alloc_order > ioat_get_alloc_order())
+ mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+ }
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static int ioat2_reset_hw(struct ioat_chan_common *chan)
+{
+ /* throw away whatever the channel was doing and get it initialized */
+ u32 chanerr;
+
+ ioat2_quiesce(chan, msecs_to_jiffies(100));
+
+ chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+
+ return ioat2_reset_sync(chan, msecs_to_jiffies(200));
+}
+
+/**
+ * ioat2_enumerate_channels - find and initialize the device's channels
+ * @device: the device to be enumerated
+ */
+int ioat2_enumerate_channels(struct ioatdma_device *device)
+{
+ struct ioat2_dma_chan *ioat;
+ struct device *dev = &device->pdev->dev;
+ struct dma_device *dma = &device->common;
+ u8 xfercap_log;
+ int i;
+
+ INIT_LIST_HEAD(&dma->channels);
+ dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
+ dma->chancnt &= 0x1f; /* bits [4:0] valid */
+ if (dma->chancnt > ARRAY_SIZE(device->idx)) {
+ dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
+ dma->chancnt, ARRAY_SIZE(device->idx));
+ dma->chancnt = ARRAY_SIZE(device->idx);
+ }
+ xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
+ xfercap_log &= 0x1f; /* bits [4:0] valid */
+ if (xfercap_log == 0)
+ return 0;
+ dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
+
+ /* FIXME which i/oat version is i7300? */
+#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
+ if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
+ dma->chancnt--;
+#endif
+ for (i = 0; i < dma->chancnt; i++) {
+ ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
+ if (!ioat)
+ break;
+
+ ioat_init_channel(device, &ioat->base, i,
+ device->timer_fn,
+ device->cleanup_tasklet,
+ (unsigned long) ioat);
+ ioat->xfercap_log = xfercap_log;
+ spin_lock_init(&ioat->ring_lock);
+ if (device->reset_hw(&ioat->base)) {
+ i = 0;
+ break;
+ }
+ }
+ dma->chancnt = i;
+ return i;
+}
+
+static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
+{
+ struct dma_chan *c = tx->chan;
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ dma_cookie_t cookie = c->cookie;
+
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+ tx->cookie = cookie;
+ c->cookie = cookie;
+ dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
+
+ if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ ioat2_update_pending(ioat);
+ spin_unlock_bh(&ioat->ring_lock);
+
+ return cookie;
+}
+
+static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
+{
+ struct ioat_dma_descriptor *hw;
+ struct ioat_ring_ent *desc;
+ struct ioatdma_device *dma;
+ dma_addr_t phys;
+
+ dma = to_ioatdma_device(chan->device);
+ hw = pci_pool_alloc(dma->dma_pool, flags, &phys);
+ if (!hw)
+ return NULL;
+ memset(hw, 0, sizeof(*hw));
+
+ desc = kmem_cache_alloc(ioat2_cache, flags);
+ if (!desc) {
+ pci_pool_free(dma->dma_pool, hw, phys);
+ return NULL;
+ }
+ memset(desc, 0, sizeof(*desc));
+
+ dma_async_tx_descriptor_init(&desc->txd, chan);
+ desc->txd.tx_submit = ioat2_tx_submit_unlock;
+ desc->hw = hw;
+ desc->txd.phys = phys;
+ return desc;
+}
+
+static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
+{
+ struct ioatdma_device *dma;
+
+ dma = to_ioatdma_device(chan->device);
+ pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys);
+ kmem_cache_free(ioat2_cache, desc);
+}
+
+static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
+{
+ struct ioat_ring_ent **ring;
+ int descs = 1 << order;
+ int i;
+
+ if (order > ioat_get_max_alloc_order())
+ return NULL;
+
+ /* allocate the array to hold the software ring */
+ ring = kcalloc(descs, sizeof(*ring), flags);
+ if (!ring)
+ return NULL;
+ for (i = 0; i < descs; i++) {
+ ring[i] = ioat2_alloc_ring_ent(c, flags);
+ if (!ring[i]) {
+ while (i--)
+ ioat2_free_ring_ent(ring[i], c);
+ kfree(ring);
+ return NULL;
+ }
+ set_desc_id(ring[i], i);
+ }
+
+ /* link descs */
+ for (i = 0; i < descs-1; i++) {
+ struct ioat_ring_ent *next = ring[i+1];
+ struct ioat_dma_descriptor *hw = ring[i]->hw;
+
+ hw->next = next->txd.phys;
+ }
+ ring[i]->hw->next = ring[0]->txd.phys;
+
+ return ring;
+}
+
+/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring
+ * @chan: channel to be initialized
+ */
+int ioat2_alloc_chan_resources(struct dma_chan *c)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_ring_ent **ring;
+ int order;
+
+ /* have we already been set up? */
+ if (ioat->ring)
+ return 1 << ioat->alloc_order;
+
+ /* Setup register to interrupt and write completion status on error */
+ writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+ /* allocate a completion writeback area */
+ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+ chan->completion = pci_pool_alloc(chan->device->completion_pool,
+ GFP_KERNEL, &chan->completion_dma);
+ if (!chan->completion)
+ return -ENOMEM;
+
+ memset(chan->completion, 0, sizeof(*chan->completion));
+ writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
+ chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+ writel(((u64) chan->completion_dma) >> 32,
+ chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+ order = ioat_get_alloc_order();
+ ring = ioat2_alloc_ring(c, order, GFP_KERNEL);
+ if (!ring)
+ return -ENOMEM;
+
+ spin_lock_bh(&ioat->ring_lock);
+ ioat->ring = ring;
+ ioat->head = 0;
+ ioat->issued = 0;
+ ioat->tail = 0;
+ ioat->pending = 0;
+ ioat->alloc_order = order;
+ spin_unlock_bh(&ioat->ring_lock);
+
+ tasklet_enable(&chan->cleanup_task);
+ ioat2_start_null_desc(ioat);
+
+ return 1 << ioat->alloc_order;
+}
+
+bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
+{
+ /* reshape differs from normal ring allocation in that we want
+ * to allocate a new software ring while only
+ * extending/truncating the hardware ring
+ */
+ struct ioat_chan_common *chan = &ioat->base;
+ struct dma_chan *c = &chan->common;
+ const u16 curr_size = ioat2_ring_mask(ioat) + 1;
+ const u16 active = ioat2_ring_active(ioat);
+ const u16 new_size = 1 << order;
+ struct ioat_ring_ent **ring;
+ u16 i;
+
+ if (order > ioat_get_max_alloc_order())
+ return false;
+
+ /* double check that we have at least 1 free descriptor */
+ if (active == curr_size)
+ return false;
+
+ /* when shrinking, verify that we can hold the current active
+ * set in the new ring
+ */
+ if (active >= new_size)
+ return false;
+
+ /* allocate the array to hold the software ring */
+ ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
+ if (!ring)
+ return false;
+
+ /* allocate/trim descriptors as needed */
+ if (new_size > curr_size) {
+ /* copy current descriptors to the new ring */
+ for (i = 0; i < curr_size; i++) {
+ u16 curr_idx = (ioat->tail+i) & (curr_size-1);
+ u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+ ring[new_idx] = ioat->ring[curr_idx];
+ set_desc_id(ring[new_idx], new_idx);
+ }
+
+ /* add new descriptors to the ring */
+ for (i = curr_size; i < new_size; i++) {
+ u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+ ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT);
+ if (!ring[new_idx]) {
+ while (i--) {
+ u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+ ioat2_free_ring_ent(ring[new_idx], c);
+ }
+ kfree(ring);
+ return false;
+ }
+ set_desc_id(ring[new_idx], new_idx);
+ }
+
+ /* hw link new descriptors */
+ for (i = curr_size-1; i < new_size; i++) {
+ u16 new_idx = (ioat->tail+i) & (new_size-1);
+ struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)];
+ struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
+
+ hw->next = next->txd.phys;
+ }
+ } else {
+ struct ioat_dma_descriptor *hw;
+ struct ioat_ring_ent *next;
+
+ /* copy current descriptors to the new ring, dropping the
+ * removed descriptors
+ */
+ for (i = 0; i < new_size; i++) {
+ u16 curr_idx = (ioat->tail+i) & (curr_size-1);
+ u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+ ring[new_idx] = ioat->ring[curr_idx];
+ set_desc_id(ring[new_idx], new_idx);
+ }
+
+ /* free deleted descriptors */
+ for (i = new_size; i < curr_size; i++) {
+ struct ioat_ring_ent *ent;
+
+ ent = ioat2_get_ring_ent(ioat, ioat->tail+i);
+ ioat2_free_ring_ent(ent, c);
+ }
+
+ /* fix up hardware ring */
+ hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw;
+ next = ring[(ioat->tail+new_size) & (new_size-1)];
+ hw->next = next->txd.phys;
+ }
+
+ dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
+ __func__, new_size);
+
+ kfree(ioat->ring);
+ ioat->ring = ring;
+ ioat->alloc_order = order;
+
+ return true;
+}
+
+/**
+ * ioat2_alloc_and_lock - common descriptor alloc boilerplate for ioat2,3 ops
+ * @idx: gets starting descriptor index on successful allocation
+ * @ioat: ioat2,3 channel (ring) to operate on
+ * @num_descs: allocation length
+ */
+int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+
+ spin_lock_bh(&ioat->ring_lock);
+ /* never allow the last descriptor to be consumed, we need at
+ * least one free at all times to allow for on-the-fly ring
+ * resizing.
+ */
+ while (unlikely(ioat2_ring_space(ioat) <= num_descs)) {
+ if (reshape_ring(ioat, ioat->alloc_order + 1) &&
+ ioat2_ring_space(ioat) > num_descs)
+ break;
+
+ if (printk_ratelimit())
+ dev_dbg(to_dev(chan),
+ "%s: ring full! num_descs: %d (%x:%x:%x)\n",
+ __func__, num_descs, ioat->head, ioat->tail,
+ ioat->issued);
+ spin_unlock_bh(&ioat->ring_lock);
+
+ /* progress reclaim in the allocation failure case we
+ * may be called under bh_disabled so we need to trigger
+ * the timer event directly
+ */
+ spin_lock_bh(&chan->cleanup_lock);
+ if (jiffies > chan->timer.expires &&
+ timer_pending(&chan->timer)) {
+ struct ioatdma_device *device = chan->device;
+
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ spin_unlock_bh(&chan->cleanup_lock);
+ device->timer_fn((unsigned long) ioat);
+ } else
+ spin_unlock_bh(&chan->cleanup_lock);
+ return -ENOMEM;
+ }
+
+ dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n",
+ __func__, num_descs, ioat->head, ioat->tail, ioat->issued);
+
+ *idx = ioat2_desc_alloc(ioat, num_descs);
+ return 0; /* with ioat->ring_lock held */
+}
+
+struct dma_async_tx_descriptor *
+ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_dma_descriptor *hw;
+ struct ioat_ring_ent *desc;
+ dma_addr_t dst = dma_dest;
+ dma_addr_t src = dma_src;
+ size_t total_len = len;
+ int num_descs;
+ u16 idx;
+ int i;
+
+ num_descs = ioat2_xferlen_to_descs(ioat, len);
+ if (likely(num_descs) &&
+ ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
+ /* pass */;
+ else
+ return NULL;
+ i = 0;
+ do {
+ size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+ desc = ioat2_get_ring_ent(ioat, idx + i);
+ hw = desc->hw;
+
+ hw->size = copy;
+ hw->ctl = 0;
+ hw->src_addr = src;
+ hw->dst_addr = dst;
+
+ len -= copy;
+ dst += copy;
+ src += copy;
+ dump_desc_dbg(ioat, desc);
+ } while (++i < num_descs);
+
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+ hw->ctl_f.compl_write = 1;
+ dump_desc_dbg(ioat, desc);
+ /* we leave the channel locked to ensure in order submission */
+
+ return &desc->txd;
+}
+
+/**
+ * ioat2_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
+void ioat2_free_chan_resources(struct dma_chan *c)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioatdma_device *device = chan->device;
+ struct ioat_ring_ent *desc;
+ const u16 total_descs = 1 << ioat->alloc_order;
+ int descs;
+ int i;
+
+ /* Before freeing channel resources first check
+ * if they have been previously allocated for this channel.
+ */
+ if (!ioat->ring)
+ return;
+
+ tasklet_disable(&chan->cleanup_task);
+ del_timer_sync(&chan->timer);
+ device->cleanup_tasklet((unsigned long) ioat);
+ device->reset_hw(chan);
+
+ spin_lock_bh(&ioat->ring_lock);
+ descs = ioat2_ring_space(ioat);
+ dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs);
+ for (i = 0; i < descs; i++) {
+ desc = ioat2_get_ring_ent(ioat, ioat->head + i);
+ ioat2_free_ring_ent(desc, c);
+ }
+
+ if (descs < total_descs)
+ dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
+ total_descs - descs);
+
+ for (i = 0; i < total_descs - descs; i++) {
+ desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
+ dump_desc_dbg(ioat, desc);
+ ioat2_free_ring_ent(desc, c);
+ }
+
+ kfree(ioat->ring);
+ ioat->ring = NULL;
+ ioat->alloc_order = 0;
+ pci_pool_free(device->completion_pool, chan->completion,
+ chan->completion_dma);
+ spin_unlock_bh(&ioat->ring_lock);
+
+ chan->last_completion = 0;
+ chan->completion_dma = 0;
+ ioat->pending = 0;
+ ioat->dmacount = 0;
+}
+
+enum dma_status
+ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioatdma_device *device = ioat->base.device;
+
+ if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
+ return DMA_SUCCESS;
+
+ device->cleanup_tasklet((unsigned long) ioat);
+
+ return ioat_is_complete(c, cookie, done, used);
+}
+
+static ssize_t ring_size_show(struct dma_chan *c, char *page)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+ return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1);
+}
+static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
+
+static ssize_t ring_active_show(struct dma_chan *c, char *page)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+ /* ...taken outside the lock, no need to be precise */
+ return sprintf(page, "%d\n", ioat2_ring_active(ioat));
+}
+static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
+
+static struct attribute *ioat2_attrs[] = {
+ &ring_size_attr.attr,
+ &ring_active_attr.attr,
+ &ioat_cap_attr.attr,
+ &ioat_version_attr.attr,
+ NULL,
+};
+
+struct kobj_type ioat2_ktype = {
+ .sysfs_ops = &ioat_sysfs_ops,
+ .default_attrs = ioat2_attrs,
+};
+
+int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
+{
+ struct pci_dev *pdev = device->pdev;
+ struct dma_device *dma;
+ struct dma_chan *c;
+ struct ioat_chan_common *chan;
+ int err;
+
+ device->enumerate_channels = ioat2_enumerate_channels;
+ device->reset_hw = ioat2_reset_hw;
+ device->cleanup_tasklet = ioat2_cleanup_tasklet;
+ device->timer_fn = ioat2_timer_event;
+ device->self_test = ioat_dma_self_test;
+ dma = &device->common;
+ dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
+ dma->device_issue_pending = ioat2_issue_pending;
+ dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
+ dma->device_free_chan_resources = ioat2_free_chan_resources;
+ dma->device_is_tx_complete = ioat2_is_complete;
+
+ err = ioat_probe(device);
+ if (err)
+ return err;
+ ioat_set_tcp_copy_break(2048);
+
+ list_for_each_entry(c, &dma->channels, device_node) {
+ chan = to_chan_common(c);
+ writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU,
+ chan->reg_base + IOAT_DCACTRL_OFFSET);
+ }
+
+ err = ioat_register(device);
+ if (err)
+ return err;
+
+ ioat_kobject_add(device, &ioat2_ktype);
+
+ if (dca)
+ device->dca = ioat2_dca_init(pdev, device->reg_base);
+
+ return err;
+}
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h
new file mode 100644
index 00000000000..3afad8da43c
--- /dev/null
+++ b/drivers/dma/ioat/dma_v2.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_V2_H
+#define IOATDMA_V2_H
+
+#include <linux/dmaengine.h>
+#include "dma.h"
+#include "hw.h"
+
+
+extern int ioat_pending_level;
+extern int ioat_ring_alloc_order;
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
+#define IOAT_MAX_ORDER 16
+#define ioat_get_alloc_order() \
+ (min(ioat_ring_alloc_order, IOAT_MAX_ORDER))
+#define ioat_get_max_alloc_order() \
+ (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER))
+
+/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes
+ * @base: common ioat channel parameters
+ * @xfercap_log; log2 of channel max transfer length (for fast division)
+ * @head: allocated index
+ * @issued: hardware notification point
+ * @tail: cleanup index
+ * @pending: lock free indicator for issued != head
+ * @dmacount: identical to 'head' except for occasionally resetting to zero
+ * @alloc_order: log2 of the number of allocated descriptors
+ * @ring: software ring buffer implementation of hardware ring
+ * @ring_lock: protects ring attributes
+ */
+struct ioat2_dma_chan {
+ struct ioat_chan_common base;
+ size_t xfercap_log;
+ u16 head;
+ u16 issued;
+ u16 tail;
+ u16 dmacount;
+ u16 alloc_order;
+ int pending;
+ struct ioat_ring_ent **ring;
+ spinlock_t ring_lock;
+};
+
+static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c)
+{
+ struct ioat_chan_common *chan = to_chan_common(c);
+
+ return container_of(chan, struct ioat2_dma_chan, base);
+}
+
+static inline u16 ioat2_ring_mask(struct ioat2_dma_chan *ioat)
+{
+ return (1 << ioat->alloc_order) - 1;
+}
+
+/* count of descriptors in flight with the engine */
+static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat)
+{
+ return (ioat->head - ioat->tail) & ioat2_ring_mask(ioat);
+}
+
+/* count of descriptors pending submission to hardware */
+static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat)
+{
+ return (ioat->head - ioat->issued) & ioat2_ring_mask(ioat);
+}
+
+static inline u16 ioat2_ring_space(struct ioat2_dma_chan *ioat)
+{
+ u16 num_descs = ioat2_ring_mask(ioat) + 1;
+ u16 active = ioat2_ring_active(ioat);
+
+ BUG_ON(active > num_descs);
+
+ return num_descs - active;
+}
+
+/* assumes caller already checked space */
+static inline u16 ioat2_desc_alloc(struct ioat2_dma_chan *ioat, u16 len)
+{
+ ioat->head += len;
+ return ioat->head - len;
+}
+
+static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len)
+{
+ u16 num_descs = len >> ioat->xfercap_log;
+
+ num_descs += !!(len & ((1 << ioat->xfercap_log) - 1));
+ return num_descs;
+}
+
+/**
+ * struct ioat_ring_ent - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor (for memcpy)
+ * @fill: hardware fill descriptor
+ * @xor: hardware xor descriptor
+ * @xor_ex: hardware xor extension descriptor
+ * @pq: hardware pq descriptor
+ * @pq_ex: hardware pq extension descriptor
+ * @pqu: hardware pq update descriptor
+ * @raw: hardware raw (un-typed) descriptor
+ * @txd: the generic software descriptor for all engines
+ * @len: total transaction length for unmap
+ * @result: asynchronous result of validate operations
+ * @id: identifier for debug
+ */
+
+struct ioat_ring_ent {
+ union {
+ struct ioat_dma_descriptor *hw;
+ struct ioat_fill_descriptor *fill;
+ struct ioat_xor_descriptor *xor;
+ struct ioat_xor_ext_descriptor *xor_ex;
+ struct ioat_pq_descriptor *pq;
+ struct ioat_pq_ext_descriptor *pq_ex;
+ struct ioat_pq_update_descriptor *pqu;
+ struct ioat_raw_descriptor *raw;
+ };
+ size_t len;
+ struct dma_async_tx_descriptor txd;
+ enum sum_check_flags *result;
+ #ifdef DEBUG
+ int id;
+ #endif
+};
+
+static inline struct ioat_ring_ent *
+ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx)
+{
+ return ioat->ring[idx & ioat2_ring_mask(ioat)];
+}
+
+static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+
+ writel(addr & 0x00000000FFFFFFFF,
+ chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
+ writel(addr >> 32,
+ chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
+}
+
+int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca);
+int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca);
+struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs);
+int ioat2_enumerate_channels(struct ioatdma_device *device);
+struct dma_async_tx_descriptor *
+ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags);
+void ioat2_issue_pending(struct dma_chan *chan);
+int ioat2_alloc_chan_resources(struct dma_chan *c);
+void ioat2_free_chan_resources(struct dma_chan *c);
+enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used);
+void __ioat2_restart_chan(struct ioat2_dma_chan *ioat);
+bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
+void __ioat2_issue_pending(struct ioat2_dma_chan *ioat);
+void ioat2_cleanup_tasklet(unsigned long data);
+void ioat2_timer_event(unsigned long data);
+int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo);
+int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo);
+extern struct kobj_type ioat2_ktype;
+extern struct kmem_cache *ioat2_cache;
+#endif /* IOATDMA_V2_H */
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
new file mode 100644
index 00000000000..9908c9e94b2
--- /dev/null
+++ b/drivers/dma/ioat/dma_v3.c
@@ -0,0 +1,1281 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2004-2009 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Support routines for v3+ hardware
+ */
+
+#include <linux/pci.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include "registers.h"
+#include "hw.h"
+#include "dma.h"
+#include "dma_v2.h"
+
+/* ioat hardware assumes at least two sources for raid operations */
+#define src_cnt_to_sw(x) ((x) + 2)
+#define src_cnt_to_hw(x) ((x) - 2)
+
+/* provide a lookup table for setting the source address in the base or
+ * extended descriptor of an xor or pq descriptor
+ */
+static const u8 xor_idx_to_desc __read_mostly = 0xd0;
+static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 };
+static const u8 pq_idx_to_desc __read_mostly = 0xf8;
+static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 };
+
+static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
+{
+ struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
+
+ return raw->field[xor_idx_to_field[idx]];
+}
+
+static void xor_set_src(struct ioat_raw_descriptor *descs[2],
+ dma_addr_t addr, u32 offset, int idx)
+{
+ struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
+
+ raw->field[xor_idx_to_field[idx]] = addr + offset;
+}
+
+static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
+{
+ struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+ return raw->field[pq_idx_to_field[idx]];
+}
+
+static void pq_set_src(struct ioat_raw_descriptor *descs[2],
+ dma_addr_t addr, u32 offset, u8 coef, int idx)
+{
+ struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
+ struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+ raw->field[pq_idx_to_field[idx]] = addr + offset;
+ pq->coef[idx] = coef;
+}
+
+static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
+ struct ioat_ring_ent *desc, int idx)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ struct pci_dev *pdev = chan->device->pdev;
+ size_t len = desc->len;
+ size_t offset = len - desc->hw->size;
+ struct dma_async_tx_descriptor *tx = &desc->txd;
+ enum dma_ctrl_flags flags = tx->flags;
+
+ switch (desc->hw->ctl_f.op) {
+ case IOAT_OP_COPY:
+ if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
+ ioat_dma_unmap(chan, flags, len, desc->hw);
+ break;
+ case IOAT_OP_FILL: {
+ struct ioat_fill_descriptor *hw = desc->fill;
+
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+ ioat_unmap(pdev, hw->dst_addr - offset, len,
+ PCI_DMA_FROMDEVICE, flags, 1);
+ break;
+ }
+ case IOAT_OP_XOR_VAL:
+ case IOAT_OP_XOR: {
+ struct ioat_xor_descriptor *xor = desc->xor;
+ struct ioat_ring_ent *ext;
+ struct ioat_xor_ext_descriptor *xor_ex = NULL;
+ int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
+ struct ioat_raw_descriptor *descs[2];
+ int i;
+
+ if (src_cnt > 5) {
+ ext = ioat2_get_ring_ent(ioat, idx + 1);
+ xor_ex = ext->xor_ex;
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ descs[0] = (struct ioat_raw_descriptor *) xor;
+ descs[1] = (struct ioat_raw_descriptor *) xor_ex;
+ for (i = 0; i < src_cnt; i++) {
+ dma_addr_t src = xor_get_src(descs, i);
+
+ ioat_unmap(pdev, src - offset, len,
+ PCI_DMA_TODEVICE, flags, 0);
+ }
+
+ /* dest is a source in xor validate operations */
+ if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
+ ioat_unmap(pdev, xor->dst_addr - offset, len,
+ PCI_DMA_TODEVICE, flags, 1);
+ break;
+ }
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+ ioat_unmap(pdev, xor->dst_addr - offset, len,
+ PCI_DMA_FROMDEVICE, flags, 1);
+ break;
+ }
+ case IOAT_OP_PQ_VAL:
+ case IOAT_OP_PQ: {
+ struct ioat_pq_descriptor *pq = desc->pq;
+ struct ioat_ring_ent *ext;
+ struct ioat_pq_ext_descriptor *pq_ex = NULL;
+ int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
+ struct ioat_raw_descriptor *descs[2];
+ int i;
+
+ if (src_cnt > 3) {
+ ext = ioat2_get_ring_ent(ioat, idx + 1);
+ pq_ex = ext->pq_ex;
+ }
+
+ /* in the 'continue' case don't unmap the dests as sources */
+ if (dmaf_p_disabled_continue(flags))
+ src_cnt--;
+ else if (dmaf_continue(flags))
+ src_cnt -= 3;
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ descs[0] = (struct ioat_raw_descriptor *) pq;
+ descs[1] = (struct ioat_raw_descriptor *) pq_ex;
+ for (i = 0; i < src_cnt; i++) {
+ dma_addr_t src = pq_get_src(descs, i);
+
+ ioat_unmap(pdev, src - offset, len,
+ PCI_DMA_TODEVICE, flags, 0);
+ }
+
+ /* the dests are sources in pq validate operations */
+ if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
+ if (!(flags & DMA_PREP_PQ_DISABLE_P))
+ ioat_unmap(pdev, pq->p_addr - offset,
+ len, PCI_DMA_TODEVICE, flags, 0);
+ if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+ ioat_unmap(pdev, pq->q_addr - offset,
+ len, PCI_DMA_TODEVICE, flags, 0);
+ break;
+ }
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ if (!(flags & DMA_PREP_PQ_DISABLE_P))
+ ioat_unmap(pdev, pq->p_addr - offset, len,
+ PCI_DMA_BIDIRECTIONAL, flags, 1);
+ if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+ ioat_unmap(pdev, pq->q_addr - offset, len,
+ PCI_DMA_BIDIRECTIONAL, flags, 1);
+ }
+ break;
+ }
+ default:
+ dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
+ __func__, desc->hw->ctl_f.op);
+ }
+}
+
+static bool desc_has_ext(struct ioat_ring_ent *desc)
+{
+ struct ioat_dma_descriptor *hw = desc->hw;
+
+ if (hw->ctl_f.op == IOAT_OP_XOR ||
+ hw->ctl_f.op == IOAT_OP_XOR_VAL) {
+ struct ioat_xor_descriptor *xor = desc->xor;
+
+ if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
+ return true;
+ } else if (hw->ctl_f.op == IOAT_OP_PQ ||
+ hw->ctl_f.op == IOAT_OP_PQ_VAL) {
+ struct ioat_pq_descriptor *pq = desc->pq;
+
+ if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * __cleanup - reclaim used descriptors
+ * @ioat: channel (ring) to clean
+ *
+ * The difference from the dma_v2.c __cleanup() is that this routine
+ * handles extended descriptors and dma-unmapping raid operations.
+ */
+static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_ring_ent *desc;
+ bool seen_current = false;
+ u16 active;
+ int i;
+
+ dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
+ __func__, ioat->head, ioat->tail, ioat->issued);
+
+ active = ioat2_ring_active(ioat);
+ for (i = 0; i < active && !seen_current; i++) {
+ struct dma_async_tx_descriptor *tx;
+
+ prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
+ desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
+ dump_desc_dbg(ioat, desc);
+ tx = &desc->txd;
+ if (tx->cookie) {
+ chan->completed_cookie = tx->cookie;
+ ioat3_dma_unmap(ioat, desc, ioat->tail + i);
+ tx->cookie = 0;
+ if (tx->callback) {
+ tx->callback(tx->callback_param);
+ tx->callback = NULL;
+ }
+ }
+
+ if (tx->phys == phys_complete)
+ seen_current = true;
+
+ /* skip extended descriptors */
+ if (desc_has_ext(desc)) {
+ BUG_ON(i + 1 >= active);
+ i++;
+ }
+ }
+ ioat->tail += i;
+ BUG_ON(!seen_current); /* no active descs have written a completion? */
+ chan->last_completion = phys_complete;
+ if (ioat->head == ioat->tail) {
+ dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
+ __func__);
+ clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+ mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+ }
+}
+
+static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ unsigned long phys_complete;
+
+ prefetch(chan->completion);
+
+ if (!spin_trylock_bh(&chan->cleanup_lock))
+ return;
+
+ if (!ioat_cleanup_preamble(chan, &phys_complete)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ if (!spin_trylock_bh(&ioat->ring_lock)) {
+ spin_unlock_bh(&chan->cleanup_lock);
+ return;
+ }
+
+ __cleanup(ioat, phys_complete);
+
+ spin_unlock_bh(&ioat->ring_lock);
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static void ioat3_cleanup_tasklet(unsigned long data)
+{
+ struct ioat2_dma_chan *ioat = (void *) data;
+
+ ioat3_cleanup(ioat);
+ writew(IOAT_CHANCTRL_RUN | IOAT3_CHANCTRL_COMPL_DCA_EN,
+ ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ unsigned long phys_complete;
+ u32 status;
+
+ status = ioat_chansts(chan);
+ if (is_ioat_active(status) || is_ioat_idle(status))
+ ioat_suspend(chan);
+ while (is_ioat_active(status) || is_ioat_idle(status)) {
+ status = ioat_chansts(chan);
+ cpu_relax();
+ }
+
+ if (ioat_cleanup_preamble(chan, &phys_complete))
+ __cleanup(ioat, phys_complete);
+
+ __ioat2_restart_chan(ioat);
+}
+
+static void ioat3_timer_event(unsigned long data)
+{
+ struct ioat2_dma_chan *ioat = (void *) data;
+ struct ioat_chan_common *chan = &ioat->base;
+
+ spin_lock_bh(&chan->cleanup_lock);
+ if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+ unsigned long phys_complete;
+ u64 status;
+
+ spin_lock_bh(&ioat->ring_lock);
+ status = ioat_chansts(chan);
+
+ /* when halted due to errors check for channel
+ * programming errors before advancing the completion state
+ */
+ if (is_ioat_halted(status)) {
+ u32 chanerr;
+
+ chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ dev_err(to_dev(chan), "%s: Channel halted (%x)\n",
+ __func__, chanerr);
+ BUG_ON(is_ioat_bug(chanerr));
+ }
+
+ /* if we haven't made progress and we have already
+ * acknowledged a pending completion once, then be more
+ * forceful with a restart
+ */
+ if (ioat_cleanup_preamble(chan, &phys_complete))
+ __cleanup(ioat, phys_complete);
+ else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
+ ioat3_restart_channel(ioat);
+ else {
+ set_bit(IOAT_COMPLETION_ACK, &chan->state);
+ mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+ }
+ spin_unlock_bh(&ioat->ring_lock);
+ } else {
+ u16 active;
+
+ /* if the ring is idle, empty, and oversized try to step
+ * down the size
+ */
+ spin_lock_bh(&ioat->ring_lock);
+ active = ioat2_ring_active(ioat);
+ if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
+ reshape_ring(ioat, ioat->alloc_order-1);
+ spin_unlock_bh(&ioat->ring_lock);
+
+ /* keep shrinking until we get back to our minimum
+ * default size
+ */
+ if (ioat->alloc_order > ioat_get_alloc_order())
+ mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+ }
+ spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static enum dma_status
+ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+ if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
+ return DMA_SUCCESS;
+
+ ioat3_cleanup(ioat);
+
+ return ioat_is_complete(c, cookie, done, used);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
+ size_t len, unsigned long flags)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_ring_ent *desc;
+ size_t total_len = len;
+ struct ioat_fill_descriptor *fill;
+ int num_descs;
+ u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
+ u16 idx;
+ int i;
+
+ num_descs = ioat2_xferlen_to_descs(ioat, len);
+ if (likely(num_descs) &&
+ ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
+ /* pass */;
+ else
+ return NULL;
+ i = 0;
+ do {
+ size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+ desc = ioat2_get_ring_ent(ioat, idx + i);
+ fill = desc->fill;
+
+ fill->size = xfer_size;
+ fill->src_data = src_data;
+ fill->dst_addr = dest;
+ fill->ctl = 0;
+ fill->ctl_f.op = IOAT_OP_FILL;
+
+ len -= xfer_size;
+ dest += xfer_size;
+ dump_desc_dbg(ioat, desc);
+ } while (++i < num_descs);
+
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+ fill->ctl_f.compl_write = 1;
+ dump_desc_dbg(ioat, desc);
+
+ /* we leave the channel locked to ensure in order submission */
+ return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
+ dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
+ size_t len, unsigned long flags)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_ring_ent *compl_desc;
+ struct ioat_ring_ent *desc;
+ struct ioat_ring_ent *ext;
+ size_t total_len = len;
+ struct ioat_xor_descriptor *xor;
+ struct ioat_xor_ext_descriptor *xor_ex = NULL;
+ struct ioat_dma_descriptor *hw;
+ u32 offset = 0;
+ int num_descs;
+ int with_ext;
+ int i;
+ u16 idx;
+ u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
+
+ BUG_ON(src_cnt < 2);
+
+ num_descs = ioat2_xferlen_to_descs(ioat, len);
+ /* we need 2x the number of descriptors to cover greater than 5
+ * sources
+ */
+ if (src_cnt > 5) {
+ with_ext = 1;
+ num_descs *= 2;
+ } else
+ with_ext = 0;
+
+ /* completion writes from the raid engine may pass completion
+ * writes from the legacy engine, so we need one extra null
+ * (legacy) descriptor to ensure all completion writes arrive in
+ * order.
+ */
+ if (likely(num_descs) &&
+ ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
+ /* pass */;
+ else
+ return NULL;
+ i = 0;
+ do {
+ struct ioat_raw_descriptor *descs[2];
+ size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+ int s;
+
+ desc = ioat2_get_ring_ent(ioat, idx + i);
+ xor = desc->xor;
+
+ /* save a branch by unconditionally retrieving the
+ * extended descriptor xor_set_src() knows to not write
+ * to it in the single descriptor case
+ */
+ ext = ioat2_get_ring_ent(ioat, idx + i + 1);
+ xor_ex = ext->xor_ex;
+
+ descs[0] = (struct ioat_raw_descriptor *) xor;
+ descs[1] = (struct ioat_raw_descriptor *) xor_ex;
+ for (s = 0; s < src_cnt; s++)
+ xor_set_src(descs, src[s], offset, s);
+ xor->size = xfer_size;
+ xor->dst_addr = dest + offset;
+ xor->ctl = 0;
+ xor->ctl_f.op = op;
+ xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
+
+ len -= xfer_size;
+ offset += xfer_size;
+ dump_desc_dbg(ioat, desc);
+ } while ((i += 1 + with_ext) < num_descs);
+
+ /* last xor descriptor carries the unmap parameters and fence bit */
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ if (result)
+ desc->result = result;
+ xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+
+ /* completion descriptor carries interrupt bit */
+ compl_desc = ioat2_get_ring_ent(ioat, idx + i);
+ compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+ hw = compl_desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ hw->ctl_f.compl_write = 1;
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ dump_desc_dbg(ioat, compl_desc);
+
+ /* we leave the channel locked to ensure in order submission */
+ return &compl_desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
+ unsigned int src_cnt, size_t len,
+ enum sum_check_flags *result, unsigned long flags)
+{
+ /* the cleanup routine only sets bits on validate failure, it
+ * does not clear bits on validate success... so clear it here
+ */
+ *result = 0;
+
+ return __ioat3_prep_xor_lock(chan, result, src[0], &src[1],
+ src_cnt - 1, len, flags);
+}
+
+static void
+dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext)
+{
+ struct device *dev = to_dev(&ioat->base);
+ struct ioat_pq_descriptor *pq = desc->pq;
+ struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
+ struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
+ int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
+ int i;
+
+ dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
+ " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n",
+ desc_id(desc), (unsigned long long) desc->txd.phys,
+ (unsigned long long) (pq_ex ? pq_ex->next : pq->next),
+ desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
+ pq->ctl_f.compl_write,
+ pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
+ pq->ctl_f.src_cnt);
+ for (i = 0; i < src_cnt; i++)
+ dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
+ (unsigned long long) pq_get_src(descs, i), pq->coef[i]);
+ dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
+ dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
+}
+
+static struct dma_async_tx_descriptor *
+__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
+ const dma_addr_t *dst, const dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf,
+ size_t len, unsigned long flags)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_chan_common *chan = &ioat->base;
+ struct ioat_ring_ent *compl_desc;
+ struct ioat_ring_ent *desc;
+ struct ioat_ring_ent *ext;
+ size_t total_len = len;
+ struct ioat_pq_descriptor *pq;
+ struct ioat_pq_ext_descriptor *pq_ex = NULL;
+ struct ioat_dma_descriptor *hw;
+ u32 offset = 0;
+ int num_descs;
+ int with_ext;
+ int i, s;
+ u16 idx;
+ u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
+
+ dev_dbg(to_dev(chan), "%s\n", __func__);
+ /* the engine requires at least two sources (we provide
+ * at least 1 implied source in the DMA_PREP_CONTINUE case)
+ */
+ BUG_ON(src_cnt + dmaf_continue(flags) < 2);
+
+ num_descs = ioat2_xferlen_to_descs(ioat, len);
+ /* we need 2x the number of descriptors to cover greater than 3
+ * sources (we need 1 extra source in the q-only continuation
+ * case and 3 extra sources in the p+q continuation case.
+ */
+ if (src_cnt + dmaf_p_disabled_continue(flags) > 3 ||
+ (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) {
+ with_ext = 1;
+ num_descs *= 2;
+ } else
+ with_ext = 0;
+
+ /* completion writes from the raid engine may pass completion
+ * writes from the legacy engine, so we need one extra null
+ * (legacy) descriptor to ensure all completion writes arrive in
+ * order.
+ */
+ if (likely(num_descs) &&
+ ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
+ /* pass */;
+ else
+ return NULL;
+ i = 0;
+ do {
+ struct ioat_raw_descriptor *descs[2];
+ size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+ desc = ioat2_get_ring_ent(ioat, idx + i);
+ pq = desc->pq;
+
+ /* save a branch by unconditionally retrieving the
+ * extended descriptor pq_set_src() knows to not write
+ * to it in the single descriptor case
+ */
+ ext = ioat2_get_ring_ent(ioat, idx + i + with_ext);
+ pq_ex = ext->pq_ex;
+
+ descs[0] = (struct ioat_raw_descriptor *) pq;
+ descs[1] = (struct ioat_raw_descriptor *) pq_ex;
+
+ for (s = 0; s < src_cnt; s++)
+ pq_set_src(descs, src[s], offset, scf[s], s);
+
+ /* see the comment for dma_maxpq in include/linux/dmaengine.h */
+ if (dmaf_p_disabled_continue(flags))
+ pq_set_src(descs, dst[1], offset, 1, s++);
+ else if (dmaf_continue(flags)) {
+ pq_set_src(descs, dst[0], offset, 0, s++);
+ pq_set_src(descs, dst[1], offset, 1, s++);
+ pq_set_src(descs, dst[1], offset, 0, s++);
+ }
+ pq->size = xfer_size;
+ pq->p_addr = dst[0] + offset;
+ pq->q_addr = dst[1] + offset;
+ pq->ctl = 0;
+ pq->ctl_f.op = op;
+ pq->ctl_f.src_cnt = src_cnt_to_hw(s);
+ pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
+ pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
+
+ len -= xfer_size;
+ offset += xfer_size;
+ } while ((i += 1 + with_ext) < num_descs);
+
+ /* last pq descriptor carries the unmap parameters and fence bit */
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ if (result)
+ desc->result = result;
+ pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+ dump_pq_desc_dbg(ioat, desc, ext);
+
+ /* completion descriptor carries interrupt bit */
+ compl_desc = ioat2_get_ring_ent(ioat, idx + i);
+ compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+ hw = compl_desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ hw->ctl_f.compl_write = 1;
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ dump_desc_dbg(ioat, compl_desc);
+
+ /* we leave the channel locked to ensure in order submission */
+ return &compl_desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ unsigned long flags)
+{
+ /* specify valid address for disabled result */
+ if (flags & DMA_PREP_PQ_DISABLE_P)
+ dst[0] = dst[1];
+ if (flags & DMA_PREP_PQ_DISABLE_Q)
+ dst[1] = dst[0];
+
+ /* handle the single source multiply case from the raid6
+ * recovery path
+ */
+ if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) {
+ dma_addr_t single_source[2];
+ unsigned char single_source_coef[2];
+
+ BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
+ single_source[0] = src[0];
+ single_source[1] = src[0];
+ single_source_coef[0] = scf[0];
+ single_source_coef[1] = 0;
+
+ return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
+ single_source_coef, len, flags);
+ } else
+ return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf,
+ len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ enum sum_check_flags *pqres, unsigned long flags)
+{
+ /* specify valid address for disabled result */
+ if (flags & DMA_PREP_PQ_DISABLE_P)
+ pq[0] = pq[1];
+ if (flags & DMA_PREP_PQ_DISABLE_Q)
+ pq[1] = pq[0];
+
+ /* the cleanup routine only sets bits on validate failure, it
+ * does not clear bits on validate success... so clear it here
+ */
+ *pqres = 0;
+
+ return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
+ flags);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ unsigned char scf[src_cnt];
+ dma_addr_t pq[2];
+
+ memset(scf, 0, src_cnt);
+ pq[0] = dst;
+ flags |= DMA_PREP_PQ_DISABLE_Q;
+ pq[1] = dst; /* specify valid address for disabled result */
+
+ return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
+ flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
+ unsigned int src_cnt, size_t len,
+ enum sum_check_flags *result, unsigned long flags)
+{
+ unsigned char scf[src_cnt];
+ dma_addr_t pq[2];
+
+ /* the cleanup routine only sets bits on validate failure, it
+ * does not clear bits on validate success... so clear it here
+ */
+ *result = 0;
+
+ memset(scf, 0, src_cnt);
+ pq[0] = src[0];
+ flags |= DMA_PREP_PQ_DISABLE_Q;
+ pq[1] = pq[0]; /* specify valid address for disabled result */
+
+ return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf,
+ len, flags);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
+{
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ struct ioat_ring_ent *desc;
+ struct ioat_dma_descriptor *hw;
+ u16 idx;
+
+ if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0)
+ desc = ioat2_get_ring_ent(ioat, idx);
+ else
+ return NULL;
+
+ hw = desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = 1;
+ hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+ hw->ctl_f.compl_write = 1;
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ hw->src_addr = 0;
+ hw->dst_addr = 0;
+
+ desc->txd.flags = flags;
+ desc->len = 1;
+
+ dump_desc_dbg(ioat, desc);
+
+ /* we leave the channel locked to ensure in order submission */
+ return &desc->txd;
+}
+
+static void __devinit ioat3_dma_test_callback(void *dma_async_param)
+{
+ struct completion *cmp = dma_async_param;
+
+ complete(cmp);
+}
+
+#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
+static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device)
+{
+ int i, src_idx;
+ struct page *dest;
+ struct page *xor_srcs[IOAT_NUM_SRC_TEST];
+ struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
+ dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
+ dma_addr_t dma_addr, dest_dma;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u8 cmp_byte = 0;
+ u32 cmp_word;
+ u32 xor_val_result;
+ int err = 0;
+ struct completion cmp;
+ unsigned long tmo;
+ struct device *dev = &device->pdev->dev;
+ struct dma_device *dma = &device->common;
+
+ dev_dbg(dev, "%s\n", __func__);
+
+ if (!dma_has_cap(DMA_XOR, dma->cap_mask))
+ return 0;
+
+ for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+ xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+ if (!xor_srcs[src_idx]) {
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+ }
+
+ dest = alloc_page(GFP_KERNEL);
+ if (!dest) {
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffers */
+ for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+ u8 *ptr = page_address(xor_srcs[src_idx]);
+ for (i = 0; i < PAGE_SIZE; i++)
+ ptr[i] = (1 << src_idx);
+ }
+
+ for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
+ cmp_byte ^= (u8) (1 << src_idx);
+
+ cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+ (cmp_byte << 8) | cmp_byte;
+
+ memset(page_address(dest), 0, PAGE_SIZE);
+
+ dma_chan = container_of(dma->channels.next, struct dma_chan,
+ device_node);
+ if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* test xor */
+ dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+ dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+ IOAT_NUM_SRC_TEST, PAGE_SIZE,
+ DMA_PREP_INTERRUPT);
+
+ if (!tx) {
+ dev_err(dev, "Self-test xor prep failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat3_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test xor setup failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_err(dev, "Self-test xor timed out\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i] != cmp_word) {
+ dev_err(dev, "Self-test xor failed compare\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+ dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE);
+
+ /* skip validate if the capability is not present */
+ if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
+ goto free_resources;
+
+ /* validate the sources with the destintation page */
+ for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+ xor_val_srcs[i] = xor_srcs[i];
+ xor_val_srcs[i] = dest;
+
+ xor_val_result = 1;
+
+ for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+ dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+ IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &xor_val_result, DMA_PREP_INTERRUPT);
+ if (!tx) {
+ dev_err(dev, "Self-test zero prep failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat3_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test zero setup failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_err(dev, "Self-test validate timed out\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (xor_val_result != 0) {
+ dev_err(dev, "Self-test validate failed compare\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ /* skip memset if the capability is not present */
+ if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask))
+ goto free_resources;
+
+ /* test memset */
+ dma_addr = dma_map_page(dev, dest, 0,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
+ DMA_PREP_INTERRUPT);
+ if (!tx) {
+ dev_err(dev, "Self-test memset prep failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat3_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test memset setup failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_err(dev, "Self-test memset timed out\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i]) {
+ dev_err(dev, "Self-test memset failed compare\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+
+ /* test for non-zero parity sum */
+ xor_val_result = 0;
+ for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+ dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+ IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &xor_val_result, DMA_PREP_INTERRUPT);
+ if (!tx) {
+ dev_err(dev, "Self-test 2nd zero prep failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat3_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test 2nd zero setup failed\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_err(dev, "Self-test 2nd validate timed out\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (xor_val_result != SUM_CHECK_P_RESULT) {
+ dev_err(dev, "Self-test validate failed compare\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ dma->device_free_chan_resources(dma_chan);
+out:
+ src_idx = IOAT_NUM_SRC_TEST;
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ __free_page(dest);
+ return err;
+}
+
+static int __devinit ioat3_dma_self_test(struct ioatdma_device *device)
+{
+ int rc = ioat_dma_self_test(device);
+
+ if (rc)
+ return rc;
+
+ rc = ioat_xor_val_self_test(device);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
+static int ioat3_reset_hw(struct ioat_chan_common *chan)
+{
+ /* throw away whatever the channel was doing and get it
+ * initialized, with ioat3 specific workarounds
+ */
+ struct ioatdma_device *device = chan->device;
+ struct pci_dev *pdev = device->pdev;
+ u32 chanerr;
+ u16 dev_id;
+ int err;
+
+ ioat2_quiesce(chan, msecs_to_jiffies(100));
+
+ chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+
+ /* -= IOAT ver.3 workarounds =- */
+ /* Write CHANERRMSK_INT with 3E07h to mask out the errors
+ * that can cause stability issues for IOAT ver.3, and clear any
+ * pending errors
+ */
+ pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
+ err = pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr);
+ if (err) {
+ dev_err(&pdev->dev, "channel error register unreachable\n");
+ return err;
+ }
+ pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr);
+
+ /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
+ * (workaround for spurious config parity error after restart)
+ */
+ pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
+ if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
+ pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
+
+ return ioat2_reset_sync(chan, msecs_to_jiffies(200));
+}
+
+int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
+{
+ struct pci_dev *pdev = device->pdev;
+ int dca_en = system_has_dca_enabled(pdev);
+ struct dma_device *dma;
+ struct dma_chan *c;
+ struct ioat_chan_common *chan;
+ bool is_raid_device = false;
+ int err;
+ u32 cap;
+
+ device->enumerate_channels = ioat2_enumerate_channels;
+ device->reset_hw = ioat3_reset_hw;
+ device->self_test = ioat3_dma_self_test;
+ dma = &device->common;
+ dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
+ dma->device_issue_pending = ioat2_issue_pending;
+ dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
+ dma->device_free_chan_resources = ioat2_free_chan_resources;
+
+ dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
+ dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
+
+ cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
+
+ /* dca is incompatible with raid operations */
+ if (dca_en && (cap & (IOAT_CAP_XOR|IOAT_CAP_PQ)))
+ cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ);
+
+ if (cap & IOAT_CAP_XOR) {
+ is_raid_device = true;
+ dma->max_xor = 8;
+ dma->xor_align = 2;
+
+ dma_cap_set(DMA_XOR, dma->cap_mask);
+ dma->device_prep_dma_xor = ioat3_prep_xor;
+
+ dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+ dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
+ }
+ if (cap & IOAT_CAP_PQ) {
+ is_raid_device = true;
+ dma_set_maxpq(dma, 8, 0);
+ dma->pq_align = 2;
+
+ dma_cap_set(DMA_PQ, dma->cap_mask);
+ dma->device_prep_dma_pq = ioat3_prep_pq;
+
+ dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
+ dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
+
+ if (!(cap & IOAT_CAP_XOR)) {
+ dma->max_xor = 8;
+ dma->xor_align = 2;
+
+ dma_cap_set(DMA_XOR, dma->cap_mask);
+ dma->device_prep_dma_xor = ioat3_prep_pqxor;
+
+ dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+ dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
+ }
+ }
+ if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) {
+ dma_cap_set(DMA_MEMSET, dma->cap_mask);
+ dma->device_prep_dma_memset = ioat3_prep_memset_lock;
+ }
+
+
+ if (is_raid_device) {
+ dma->device_is_tx_complete = ioat3_is_complete;
+ device->cleanup_tasklet = ioat3_cleanup_tasklet;
+ device->timer_fn = ioat3_timer_event;
+ } else {
+ dma->device_is_tx_complete = ioat2_is_complete;
+ device->cleanup_tasklet = ioat2_cleanup_tasklet;
+ device->timer_fn = ioat2_timer_event;
+ }
+
+ #ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA
+ dma_cap_clear(DMA_PQ_VAL, dma->cap_mask);
+ dma->device_prep_dma_pq_val = NULL;
+ #endif
+
+ #ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA
+ dma_cap_clear(DMA_XOR_VAL, dma->cap_mask);
+ dma->device_prep_dma_xor_val = NULL;
+ #endif
+
+ err = ioat_probe(device);
+ if (err)
+ return err;
+ ioat_set_tcp_copy_break(262144);
+
+ list_for_each_entry(c, &dma->channels, device_node) {
+ chan = to_chan_common(c);
+ writel(IOAT_DMA_DCA_ANY_CPU,
+ chan->reg_base + IOAT_DCACTRL_OFFSET);
+ }
+
+ err = ioat_register(device);
+ if (err)
+ return err;
+
+ ioat_kobject_add(device, &ioat2_ktype);
+
+ if (dca)
+ device->dca = ioat3_dca_init(pdev, device->reg_base);
+
+ return 0;
+}
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
new file mode 100644
index 00000000000..60e675455b6
--- /dev/null
+++ b/drivers/dma/ioat/hw.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_HW_H_
+#define _IOAT_HW_H_
+
+/* PCI Configuration Space Values */
+#define IOAT_PCI_VID 0x8086
+#define IOAT_MMIO_BAR 0
+
+/* CB device ID's */
+#define IOAT_PCI_DID_5000 0x1A38
+#define IOAT_PCI_DID_CNB 0x360B
+#define IOAT_PCI_DID_SCNB 0x65FF
+#define IOAT_PCI_DID_SNB 0x402F
+
+#define IOAT_PCI_RID 0x00
+#define IOAT_PCI_SVID 0x8086
+#define IOAT_PCI_SID 0x8086
+#define IOAT_VER_1_2 0x12 /* Version 1.2 */
+#define IOAT_VER_2_0 0x20 /* Version 2.0 */
+#define IOAT_VER_3_0 0x30 /* Version 3.0 */
+#define IOAT_VER_3_2 0x32 /* Version 3.2 */
+
+int system_has_dca_enabled(struct pci_dev *pdev);
+
+struct ioat_dma_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int src_snoop_dis:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int null:1;
+ unsigned int src_brk:1;
+ unsigned int dest_brk:1;
+ unsigned int bundle:1;
+ unsigned int dest_dca:1;
+ unsigned int hint:1;
+ unsigned int rsvd2:13;
+ #define IOAT_OP_COPY 0x00
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_addr;
+ uint64_t dst_addr;
+ uint64_t next;
+ uint64_t rsv1;
+ uint64_t rsv2;
+ /* store some driver data in an unused portion of the descriptor */
+ union {
+ uint64_t user1;
+ uint64_t tx_cnt;
+ };
+ uint64_t user2;
+};
+
+struct ioat_fill_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int rsvd:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int rsvd2:2;
+ unsigned int dest_brk:1;
+ unsigned int bundle:1;
+ unsigned int rsvd4:15;
+ #define IOAT_OP_FILL 0x01
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_data;
+ uint64_t dst_addr;
+ uint64_t next;
+ uint64_t rsv1;
+ uint64_t next_dst_addr;
+ uint64_t user1;
+ uint64_t user2;
+};
+
+struct ioat_xor_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int src_snoop_dis:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int src_cnt:3;
+ unsigned int bundle:1;
+ unsigned int dest_dca:1;
+ unsigned int hint:1;
+ unsigned int rsvd:13;
+ #define IOAT_OP_XOR 0x87
+ #define IOAT_OP_XOR_VAL 0x88
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_addr;
+ uint64_t dst_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ uint64_t src_addr3;
+ uint64_t src_addr4;
+ uint64_t src_addr5;
+};
+
+struct ioat_xor_ext_descriptor {
+ uint64_t src_addr6;
+ uint64_t src_addr7;
+ uint64_t src_addr8;
+ uint64_t next;
+ uint64_t rsvd[4];
+};
+
+struct ioat_pq_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int src_snoop_dis:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int src_cnt:3;
+ unsigned int bundle:1;
+ unsigned int dest_dca:1;
+ unsigned int hint:1;
+ unsigned int p_disable:1;
+ unsigned int q_disable:1;
+ unsigned int rsvd:11;
+ #define IOAT_OP_PQ 0x89
+ #define IOAT_OP_PQ_VAL 0x8a
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_addr;
+ uint64_t p_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ uint64_t src_addr3;
+ uint8_t coef[8];
+ uint64_t q_addr;
+};
+
+struct ioat_pq_ext_descriptor {
+ uint64_t src_addr4;
+ uint64_t src_addr5;
+ uint64_t src_addr6;
+ uint64_t next;
+ uint64_t src_addr7;
+ uint64_t src_addr8;
+ uint64_t rsvd[2];
+};
+
+struct ioat_pq_update_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int src_snoop_dis:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int src_cnt:3;
+ unsigned int bundle:1;
+ unsigned int dest_dca:1;
+ unsigned int hint:1;
+ unsigned int p_disable:1;
+ unsigned int q_disable:1;
+ unsigned int rsvd:3;
+ unsigned int coef:8;
+ #define IOAT_OP_PQ_UP 0x8b
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_addr;
+ uint64_t p_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ uint64_t p_src;
+ uint64_t q_src;
+ uint64_t q_addr;
+};
+
+struct ioat_raw_descriptor {
+ uint64_t field[8];
+};
+#endif
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
new file mode 100644
index 00000000000..d545fae30f3
--- /dev/null
+++ b/drivers/dma/ioat/pci.c
@@ -0,0 +1,210 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+#include "dma.h"
+#include "dma_v2.h"
+#include "registers.h"
+#include "hw.h"
+
+MODULE_VERSION(IOAT_DMA_VERSION);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+static struct pci_device_id ioat_pci_tbl[] = {
+ /* I/OAT v1 platforms */
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
+ { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
+
+ /* I/OAT v2 platforms */
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
+
+ /* I/OAT v3 platforms */
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
+
+ /* I/OAT v3.2 platforms */
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
+
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
+
+static int __devinit ioat_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id);
+static void __devexit ioat_remove(struct pci_dev *pdev);
+
+static int ioat_dca_enabled = 1;
+module_param(ioat_dca_enabled, int, 0644);
+MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
+
+struct kmem_cache *ioat2_cache;
+
+#define DRV_NAME "ioatdma"
+
+static struct pci_driver ioat_pci_driver = {
+ .name = DRV_NAME,
+ .id_table = ioat_pci_tbl,
+ .probe = ioat_pci_probe,
+ .remove = __devexit_p(ioat_remove),
+};
+
+static struct ioatdma_device *
+alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase)
+{
+ struct device *dev = &pdev->dev;
+ struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+
+ if (!d)
+ return NULL;
+ d->pdev = pdev;
+ d->reg_base = iobase;
+ return d;
+}
+
+static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ void __iomem * const *iomap;
+ struct device *dev = &pdev->dev;
+ struct ioatdma_device *device;
+ int err;
+
+ err = pcim_enable_device(pdev);
+ if (err)
+ return err;
+
+ err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME);
+ if (err)
+ return err;
+ iomap = pcim_iomap_table(pdev);
+ if (!iomap)
+ return -ENOMEM;
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err)
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (err)
+ return err;
+
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err)
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (err)
+ return err;
+
+ device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL);
+ if (!device)
+ return -ENOMEM;
+
+ pci_set_master(pdev);
+
+ device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]);
+ if (!device)
+ return -ENOMEM;
+ pci_set_drvdata(pdev, device);
+
+ device->version = readb(device->reg_base + IOAT_VER_OFFSET);
+ if (device->version == IOAT_VER_1_2)
+ err = ioat1_dma_probe(device, ioat_dca_enabled);
+ else if (device->version == IOAT_VER_2_0)
+ err = ioat2_dma_probe(device, ioat_dca_enabled);
+ else if (device->version >= IOAT_VER_3_0)
+ err = ioat3_dma_probe(device, ioat_dca_enabled);
+ else
+ return -ENODEV;
+
+ if (err) {
+ dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static void __devexit ioat_remove(struct pci_dev *pdev)
+{
+ struct ioatdma_device *device = pci_get_drvdata(pdev);
+
+ if (!device)
+ return;
+
+ dev_err(&pdev->dev, "Removing dma and dca services\n");
+ if (device->dca) {
+ unregister_dca_provider(device->dca, &pdev->dev);
+ free_dca_provider(device->dca);
+ device->dca = NULL;
+ }
+ ioat_dma_remove(device);
+}
+
+static int __init ioat_init_module(void)
+{
+ int err;
+
+ pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
+ DRV_NAME, IOAT_DMA_VERSION);
+
+ ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!ioat2_cache)
+ return -ENOMEM;
+
+ err = pci_register_driver(&ioat_pci_driver);
+ if (err)
+ kmem_cache_destroy(ioat2_cache);
+
+ return err;
+}
+module_init(ioat_init_module);
+
+static void __exit ioat_exit_module(void)
+{
+ pci_unregister_driver(&ioat_pci_driver);
+ kmem_cache_destroy(ioat2_cache);
+}
+module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h
new file mode 100644
index 00000000000..e8ae63baf58
--- /dev/null
+++ b/drivers/dma/ioat/registers.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_REGISTERS_H_
+#define _IOAT_REGISTERS_H_
+
+#define IOAT_PCI_DMACTRL_OFFSET 0x48
+#define IOAT_PCI_DMACTRL_DMA_EN 0x00000001
+#define IOAT_PCI_DMACTRL_MSI_EN 0x00000002
+
+#define IOAT_PCI_DEVICE_ID_OFFSET 0x02
+#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148
+#define IOAT_PCI_CHANERR_INT_OFFSET 0x180
+#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184
+
+/* MMIO Device Registers */
+#define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */
+
+#define IOAT_XFERCAP_OFFSET 0x01 /* 8-bit */
+#define IOAT_XFERCAP_4KB 12
+#define IOAT_XFERCAP_8KB 13
+#define IOAT_XFERCAP_16KB 14
+#define IOAT_XFERCAP_32KB 15
+#define IOAT_XFERCAP_32GB 0
+
+#define IOAT_GENCTRL_OFFSET 0x02 /* 8-bit */
+#define IOAT_GENCTRL_DEBUG_EN 0x01
+
+#define IOAT_INTRCTRL_OFFSET 0x03 /* 8-bit */
+#define IOAT_INTRCTRL_MASTER_INT_EN 0x01 /* Master Interrupt Enable */
+#define IOAT_INTRCTRL_INT_STATUS 0x02 /* ATTNSTATUS -or- Channel Int */
+#define IOAT_INTRCTRL_INT 0x04 /* INT_STATUS -and- MASTER_INT_EN */
+#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */
+
+#define IOAT_ATTNSTATUS_OFFSET 0x04 /* Each bit is a channel */
+
+#define IOAT_VER_OFFSET 0x08 /* 8-bit */
+#define IOAT_VER_MAJOR_MASK 0xF0
+#define IOAT_VER_MINOR_MASK 0x0F
+#define GET_IOAT_VER_MAJOR(x) (((x) & IOAT_VER_MAJOR_MASK) >> 4)
+#define GET_IOAT_VER_MINOR(x) ((x) & IOAT_VER_MINOR_MASK)
+
+#define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */
+
+#define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */
+#define IOAT_INTRDELAY_INT_DELAY_MASK 0x3FFF /* Interrupt Delay Time */
+#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */
+
+#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */
+#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001
+#define IOAT_DEVICE_MMIO_RESTRICTED 0x0002
+#define IOAT_DEVICE_MEMORY_BYPASS 0x0004
+#define IOAT_DEVICE_ADDRESS_REMAPPING 0x0008
+
+#define IOAT_DMA_CAP_OFFSET 0x10 /* 32-bit */
+#define IOAT_CAP_PAGE_BREAK 0x00000001
+#define IOAT_CAP_CRC 0x00000002
+#define IOAT_CAP_SKIP_MARKER 0x00000004
+#define IOAT_CAP_DCA 0x00000010
+#define IOAT_CAP_CRC_MOVE 0x00000020
+#define IOAT_CAP_FILL_BLOCK 0x00000040
+#define IOAT_CAP_APIC 0x00000080
+#define IOAT_CAP_XOR 0x00000100
+#define IOAT_CAP_PQ 0x00000200
+
+#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */
+
+/* DMA Channel Registers */
+#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */
+#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000
+#define IOAT3_CHANCTRL_COMPL_DCA_EN 0x0200
+#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100
+#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020
+#define IOAT_CHANCTRL_ERR_INT_EN 0x0010
+#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008
+#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
+#define IOAT_CHANCTRL_INT_REARM 0x0001
+#define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\
+ IOAT_CHANCTRL_ANY_ERR_ABORT_EN)
+
+#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */
+#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */
+#define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */
+
+
+#define IOAT1_CHANSTS_OFFSET 0x04 /* 64-bit Channel Status Register */
+#define IOAT2_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */
+#define IOAT_CHANSTS_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
+#define IOAT1_CHANSTS_OFFSET_LOW 0x04
+#define IOAT2_CHANSTS_OFFSET_LOW 0x08
+#define IOAT_CHANSTS_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
+#define IOAT1_CHANSTS_OFFSET_HIGH 0x08
+#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C
+#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL)
+#define IOAT_CHANSTS_SOFT_ERR 0x10ULL
+#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL
+#define IOAT_CHANSTS_STATUS 0x7ULL
+#define IOAT_CHANSTS_ACTIVE 0x0
+#define IOAT_CHANSTS_DONE 0x1
+#define IOAT_CHANSTS_SUSPENDED 0x2
+#define IOAT_CHANSTS_HALTED 0x3
+
+
+
+#define IOAT_CHAN_DMACOUNT_OFFSET 0x06 /* 16-bit DMA Count register */
+
+#define IOAT_DCACTRL_OFFSET 0x30 /* 32 bit Direct Cache Access Control Register */
+#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
+#define IOAT_DCACTRL_TARGET_CPU_MASK 0xFFFF /* APIC ID */
+
+/* CB DCA Memory Space Registers */
+#define IOAT_DCAOFFSET_OFFSET 0x14
+/* CB_BAR + IOAT_DCAOFFSET value */
+#define IOAT_DCA_VER_OFFSET 0x00
+#define IOAT_DCA_VER_MAJOR_MASK 0xF0
+#define IOAT_DCA_VER_MINOR_MASK 0x0F
+
+#define IOAT_DCA_COMP_OFFSET 0x02
+#define IOAT_DCA_COMP_V1 0x1
+
+#define IOAT_FSB_CAPABILITY_OFFSET 0x04
+#define IOAT_FSB_CAPABILITY_PREFETCH 0x1
+
+#define IOAT_PCI_CAPABILITY_OFFSET 0x06
+#define IOAT_PCI_CAPABILITY_MEMWR 0x1
+
+#define IOAT_FSB_CAP_ENABLE_OFFSET 0x08
+#define IOAT_FSB_CAP_ENABLE_PREFETCH 0x1
+
+#define IOAT_PCI_CAP_ENABLE_OFFSET 0x0A
+#define IOAT_PCI_CAP_ENABLE_MEMWR 0x1
+
+#define IOAT_APICID_TAG_MAP_OFFSET 0x0C
+#define IOAT_APICID_TAG_MAP_TAG0 0x0000000F
+#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
+#define IOAT_APICID_TAG_MAP_TAG1 0x000000F0
+#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
+#define IOAT_APICID_TAG_MAP_TAG2 0x00000F00
+#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
+#define IOAT_APICID_TAG_MAP_TAG3 0x0000F000
+#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
+#define IOAT_APICID_TAG_MAP_TAG4 0x000F0000
+#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
+#define IOAT_APICID_TAG_CB2_VALID 0x8080808080
+
+#define IOAT_DCA_GREQID_OFFSET 0x10
+#define IOAT_DCA_GREQID_SIZE 0x04
+#define IOAT_DCA_GREQID_MASK 0xFFFF
+#define IOAT_DCA_GREQID_IGNOREFUN 0x10000000
+#define IOAT_DCA_GREQID_VALID 0x20000000
+#define IOAT_DCA_GREQID_LASTID 0x80000000
+
+#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
+#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1
+
+#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
+#define IOAT3_PCI_CAPABILITY_MEMWR 0x1
+
+#define IOAT3_CSI_CONTROL_OFFSET 0x0C
+#define IOAT3_CSI_CONTROL_PREFETCH 0x1
+
+#define IOAT3_PCI_CONTROL_OFFSET 0x0E
+#define IOAT3_PCI_CONTROL_MEMWR 0x1
+
+#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
+
+#define IOAT3_DCA_GREQID_OFFSET 0x02
+
+#define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */
+#define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */
+#define IOAT_CHAINADDR_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
+#define IOAT1_CHAINADDR_OFFSET_LOW 0x0C
+#define IOAT2_CHAINADDR_OFFSET_LOW 0x10
+#define IOAT_CHAINADDR_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
+#define IOAT1_CHAINADDR_OFFSET_HIGH 0x10
+#define IOAT2_CHAINADDR_OFFSET_HIGH 0x14
+#define IOAT_CHAINADDR_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
+
+#define IOAT1_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */
+#define IOAT2_CHANCMD_OFFSET 0x04 /* 8-bit DMA Channel Command Register */
+#define IOAT_CHANCMD_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
+#define IOAT_CHANCMD_RESET 0x20
+#define IOAT_CHANCMD_RESUME 0x10
+#define IOAT_CHANCMD_ABORT 0x08
+#define IOAT_CHANCMD_SUSPEND 0x04
+#define IOAT_CHANCMD_APPEND 0x02
+#define IOAT_CHANCMD_START 0x01
+
+#define IOAT_CHANCMP_OFFSET 0x18 /* 64-bit Channel Completion Address Register */
+#define IOAT_CHANCMP_OFFSET_LOW 0x18
+#define IOAT_CHANCMP_OFFSET_HIGH 0x1C
+
+#define IOAT_CDAR_OFFSET 0x20 /* 64-bit Current Descriptor Address Register */
+#define IOAT_CDAR_OFFSET_LOW 0x20
+#define IOAT_CDAR_OFFSET_HIGH 0x24
+
+#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */
+#define IOAT_CHANERR_SRC_ADDR_ERR 0x0001
+#define IOAT_CHANERR_DEST_ADDR_ERR 0x0002
+#define IOAT_CHANERR_NEXT_ADDR_ERR 0x0004
+#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR 0x0008
+#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010
+#define IOAT_CHANERR_CHANCMD_ERR 0x0020
+#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040
+#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080
+#define IOAT_CHANERR_READ_DATA_ERR 0x0100
+#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200
+#define IOAT_CHANERR_CONTROL_ERR 0x0400
+#define IOAT_CHANERR_LENGTH_ERR 0x0800
+#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000
+#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000
+#define IOAT_CHANERR_SOFT_ERR 0x4000
+#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000
+#define IOAT_CHANERR_XOR_P_OR_CRC_ERR 0x10000
+#define IOAT_CHANERR_XOR_Q_ERR 0x20000
+#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000
+
+#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
+
+#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */
+
+#endif /* _IOAT_REGISTERS_H_ */
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
new file mode 100644
index 00000000000..ca6e6a0cb79
--- /dev/null
+++ b/drivers/dma/iop-adma.c
@@ -0,0 +1,1772 @@
+/*
+ * offload engine driver for the Intel Xscale series of i/o processors
+ * Copyright © 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+/*
+ * This driver supports the asynchrounous DMA copy and RAID engines available
+ * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/memory.h>
+#include <linux/ioport.h>
+#include <linux/raid/pq.h>
+
+#include <mach/adma.h>
+
+#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common)
+#define to_iop_adma_device(dev) \
+ container_of(dev, struct iop_adma_device, common)
+#define tx_to_iop_adma_slot(tx) \
+ container_of(tx, struct iop_adma_desc_slot, async_tx)
+
+/**
+ * iop_adma_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &iop_chan->lock while calling this function
+ */
+static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
+{
+ int stride = slot->slots_per_op;
+
+ while (stride--) {
+ slot->slots_per_op = 0;
+ slot = list_entry(slot->slot_node.next,
+ struct iop_adma_desc_slot,
+ slot_node);
+ }
+}
+
+static void
+iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
+{
+ struct dma_async_tx_descriptor *tx = &desc->async_tx;
+ struct iop_adma_desc_slot *unmap = desc->group_head;
+ struct device *dev = &iop_chan->device->pdev->dev;
+ u32 len = unmap->unmap_len;
+ enum dma_ctrl_flags flags = tx->flags;
+ u32 src_cnt;
+ dma_addr_t addr;
+ dma_addr_t dest;
+
+ src_cnt = unmap->unmap_src_cnt;
+ dest = iop_desc_get_dest_addr(unmap, iop_chan);
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ enum dma_data_direction dir;
+
+ if (src_cnt > 1) /* is xor? */
+ dir = DMA_BIDIRECTIONAL;
+ else
+ dir = DMA_FROM_DEVICE;
+
+ dma_unmap_page(dev, dest, len, dir);
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ while (src_cnt--) {
+ addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt);
+ if (addr == dest)
+ continue;
+ dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+ }
+ }
+ desc->group_head = NULL;
+}
+
+static void
+iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
+{
+ struct dma_async_tx_descriptor *tx = &desc->async_tx;
+ struct iop_adma_desc_slot *unmap = desc->group_head;
+ struct device *dev = &iop_chan->device->pdev->dev;
+ u32 len = unmap->unmap_len;
+ enum dma_ctrl_flags flags = tx->flags;
+ u32 src_cnt = unmap->unmap_src_cnt;
+ dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan);
+ dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan);
+ int i;
+
+ if (tx->flags & DMA_PREP_CONTINUE)
+ src_cnt -= 3;
+
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) {
+ dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL);
+ dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL);
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ dma_addr_t addr;
+
+ for (i = 0; i < src_cnt; i++) {
+ addr = iop_desc_get_src_addr(unmap, iop_chan, i);
+ dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+ }
+ if (desc->pq_check_result) {
+ dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE);
+ dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE);
+ }
+ }
+
+ desc->group_head = NULL;
+}
+
+
+static dma_cookie_t
+iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
+ struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
+{
+ struct dma_async_tx_descriptor *tx = &desc->async_tx;
+
+ BUG_ON(tx->cookie < 0);
+ if (tx->cookie > 0) {
+ cookie = tx->cookie;
+ tx->cookie = 0;
+
+ /* call the callback (must not sleep or submit new
+ * operations to this channel)
+ */
+ if (tx->callback)
+ tx->callback(tx->callback_param);
+
+ /* unmap dma addresses
+ * (unmap_single vs unmap_page?)
+ */
+ if (desc->group_head && desc->unmap_len) {
+ if (iop_desc_is_pq(desc))
+ iop_desc_unmap_pq(iop_chan, desc);
+ else
+ iop_desc_unmap(iop_chan, desc);
+ }
+ }
+
+ /* run dependent operations */
+ dma_run_dependencies(tx);
+
+ return cookie;
+}
+
+static int
+iop_adma_clean_slot(struct iop_adma_desc_slot *desc,
+ struct iop_adma_chan *iop_chan)
+{
+ /* the client is allowed to attach dependent operations
+ * until 'ack' is set
+ */
+ if (!async_tx_test_ack(&desc->async_tx))
+ return 0;
+
+ /* leave the last descriptor in the chain
+ * so we can append to it
+ */
+ if (desc->chain_node.next == &iop_chan->chain)
+ return 1;
+
+ dev_dbg(iop_chan->device->common.dev,
+ "\tfree slot: %d slots_per_op: %d\n",
+ desc->idx, desc->slots_per_op);
+
+ list_del(&desc->chain_node);
+ iop_adma_free_slots(desc);
+
+ return 0;
+}
+
+static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
+{
+ struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL;
+ dma_cookie_t cookie = 0;
+ u32 current_desc = iop_chan_get_current_descriptor(iop_chan);
+ int busy = iop_chan_is_busy(iop_chan);
+ int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
+
+ dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+ /* free completed slots from the chain starting with
+ * the oldest descriptor
+ */
+ list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
+ chain_node) {
+ pr_debug("\tcookie: %d slot: %d busy: %d "
+ "this_desc: %#x next_desc: %#x ack: %d\n",
+ iter->async_tx.cookie, iter->idx, busy,
+ iter->async_tx.phys, iop_desc_get_next_desc(iter),
+ async_tx_test_ack(&iter->async_tx));
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+
+ /* do not advance past the current descriptor loaded into the
+ * hardware channel, subsequent descriptors are either in
+ * process or have not been submitted
+ */
+ if (seen_current)
+ break;
+
+ /* stop the search if we reach the current descriptor and the
+ * channel is busy, or if it appears that the current descriptor
+ * needs to be re-read (i.e. has been appended to)
+ */
+ if (iter->async_tx.phys == current_desc) {
+ BUG_ON(seen_current++);
+ if (busy || iop_desc_get_next_desc(iter))
+ break;
+ }
+
+ /* detect the start of a group transaction */
+ if (!slot_cnt && !slots_per_op) {
+ slot_cnt = iter->slot_cnt;
+ slots_per_op = iter->slots_per_op;
+ if (slot_cnt <= slots_per_op) {
+ slot_cnt = 0;
+ slots_per_op = 0;
+ }
+ }
+
+ if (slot_cnt) {
+ pr_debug("\tgroup++\n");
+ if (!grp_start)
+ grp_start = iter;
+ slot_cnt -= slots_per_op;
+ }
+
+ /* all the members of a group are complete */
+ if (slots_per_op != 0 && slot_cnt == 0) {
+ struct iop_adma_desc_slot *grp_iter, *_grp_iter;
+ int end_of_chain = 0;
+ pr_debug("\tgroup end\n");
+
+ /* collect the total results */
+ if (grp_start->xor_check_result) {
+ u32 zero_sum_result = 0;
+ slot_cnt = grp_start->slot_cnt;
+ grp_iter = grp_start;
+
+ list_for_each_entry_from(grp_iter,
+ &iop_chan->chain, chain_node) {
+ zero_sum_result |=
+ iop_desc_get_zero_result(grp_iter);
+ pr_debug("\titer%d result: %d\n",
+ grp_iter->idx, zero_sum_result);
+ slot_cnt -= slots_per_op;
+ if (slot_cnt == 0)
+ break;
+ }
+ pr_debug("\tgrp_start->xor_check_result: %p\n",
+ grp_start->xor_check_result);
+ *grp_start->xor_check_result = zero_sum_result;
+ }
+
+ /* clean up the group */
+ slot_cnt = grp_start->slot_cnt;
+ grp_iter = grp_start;
+ list_for_each_entry_safe_from(grp_iter, _grp_iter,
+ &iop_chan->chain, chain_node) {
+ cookie = iop_adma_run_tx_complete_actions(
+ grp_iter, iop_chan, cookie);
+
+ slot_cnt -= slots_per_op;
+ end_of_chain = iop_adma_clean_slot(grp_iter,
+ iop_chan);
+
+ if (slot_cnt == 0 || end_of_chain)
+ break;
+ }
+
+ /* the group should be complete at this point */
+ BUG_ON(slot_cnt);
+
+ slots_per_op = 0;
+ grp_start = NULL;
+ if (end_of_chain)
+ break;
+ else
+ continue;
+ } else if (slots_per_op) /* wait for group completion */
+ continue;
+
+ /* write back zero sum results (single descriptor case) */
+ if (iter->xor_check_result && iter->async_tx.cookie)
+ *iter->xor_check_result =
+ iop_desc_get_zero_result(iter);
+
+ cookie = iop_adma_run_tx_complete_actions(
+ iter, iop_chan, cookie);
+
+ if (iop_adma_clean_slot(iter, iop_chan))
+ break;
+ }
+
+ if (cookie > 0) {
+ iop_chan->completed_cookie = cookie;
+ pr_debug("\tcompleted cookie %d\n", cookie);
+ }
+}
+
+static void
+iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
+{
+ spin_lock_bh(&iop_chan->lock);
+ __iop_adma_slot_cleanup(iop_chan);
+ spin_unlock_bh(&iop_chan->lock);
+}
+
+static void iop_adma_tasklet(unsigned long data)
+{
+ struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data;
+
+ /* lockdep will flag depedency submissions as potentially
+ * recursive locking, this is not the case as a dependency
+ * submission will never recurse a channels submit routine.
+ * There are checks in async_tx.c to prevent this.
+ */
+ spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING);
+ __iop_adma_slot_cleanup(iop_chan);
+ spin_unlock(&iop_chan->lock);
+}
+
+static struct iop_adma_desc_slot *
+iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots,
+ int slots_per_op)
+{
+ struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL;
+ LIST_HEAD(chain);
+ int slots_found, retry = 0;
+
+ /* start search from the last allocated descrtiptor
+ * if a contiguous allocation can not be found start searching
+ * from the beginning of the list
+ */
+retry:
+ slots_found = 0;
+ if (retry == 0)
+ iter = iop_chan->last_used;
+ else
+ iter = list_entry(&iop_chan->all_slots,
+ struct iop_adma_desc_slot,
+ slot_node);
+
+ list_for_each_entry_safe_continue(
+ iter, _iter, &iop_chan->all_slots, slot_node) {
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+ if (iter->slots_per_op) {
+ /* give up after finding the first busy slot
+ * on the second pass through the list
+ */
+ if (retry)
+ break;
+
+ slots_found = 0;
+ continue;
+ }
+
+ /* start the allocation if the slot is correctly aligned */
+ if (!slots_found++) {
+ if (iop_desc_is_aligned(iter, slots_per_op))
+ alloc_start = iter;
+ else {
+ slots_found = 0;
+ continue;
+ }
+ }
+
+ if (slots_found == num_slots) {
+ struct iop_adma_desc_slot *alloc_tail = NULL;
+ struct iop_adma_desc_slot *last_used = NULL;
+ iter = alloc_start;
+ while (num_slots) {
+ int i;
+ dev_dbg(iop_chan->device->common.dev,
+ "allocated slot: %d "
+ "(desc %p phys: %#x) slots_per_op %d\n",
+ iter->idx, iter->hw_desc,
+ iter->async_tx.phys, slots_per_op);
+
+ /* pre-ack all but the last descriptor */
+ if (num_slots != slots_per_op)
+ async_tx_ack(&iter->async_tx);
+
+ list_add_tail(&iter->chain_node, &chain);
+ alloc_tail = iter;
+ iter->async_tx.cookie = 0;
+ iter->slot_cnt = num_slots;
+ iter->xor_check_result = NULL;
+ for (i = 0; i < slots_per_op; i++) {
+ iter->slots_per_op = slots_per_op - i;
+ last_used = iter;
+ iter = list_entry(iter->slot_node.next,
+ struct iop_adma_desc_slot,
+ slot_node);
+ }
+ num_slots -= slots_per_op;
+ }
+ alloc_tail->group_head = alloc_start;
+ alloc_tail->async_tx.cookie = -EBUSY;
+ list_splice(&chain, &alloc_tail->tx_list);
+ iop_chan->last_used = last_used;
+ iop_desc_clear_next_desc(alloc_start);
+ iop_desc_clear_next_desc(alloc_tail);
+ return alloc_tail;
+ }
+ }
+ if (!retry++)
+ goto retry;
+
+ /* perform direct reclaim if the allocation fails */
+ __iop_adma_slot_cleanup(iop_chan);
+
+ return NULL;
+}
+
+static dma_cookie_t
+iop_desc_assign_cookie(struct iop_adma_chan *iop_chan,
+ struct iop_adma_desc_slot *desc)
+{
+ dma_cookie_t cookie = iop_chan->common.cookie;
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+ iop_chan->common.cookie = desc->async_tx.cookie = cookie;
+ return cookie;
+}
+
+static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan)
+{
+ dev_dbg(iop_chan->device->common.dev, "pending: %d\n",
+ iop_chan->pending);
+
+ if (iop_chan->pending >= IOP_ADMA_THRESHOLD) {
+ iop_chan->pending = 0;
+ iop_chan_append(iop_chan);
+ }
+}
+
+static dma_cookie_t
+iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan);
+ struct iop_adma_desc_slot *grp_start, *old_chain_tail;
+ int slot_cnt;
+ int slots_per_op;
+ dma_cookie_t cookie;
+ dma_addr_t next_dma;
+
+ grp_start = sw_desc->group_head;
+ slot_cnt = grp_start->slot_cnt;
+ slots_per_op = grp_start->slots_per_op;
+
+ spin_lock_bh(&iop_chan->lock);
+ cookie = iop_desc_assign_cookie(iop_chan, sw_desc);
+
+ old_chain_tail = list_entry(iop_chan->chain.prev,
+ struct iop_adma_desc_slot, chain_node);
+ list_splice_init(&sw_desc->tx_list,
+ &old_chain_tail->chain_node);
+
+ /* fix up the hardware chain */
+ next_dma = grp_start->async_tx.phys;
+ iop_desc_set_next_desc(old_chain_tail, next_dma);
+ BUG_ON(iop_desc_get_next_desc(old_chain_tail) != next_dma); /* flush */
+
+ /* check for pre-chained descriptors */
+ iop_paranoia(iop_desc_get_next_desc(sw_desc));
+
+ /* increment the pending count by the number of slots
+ * memcpy operations have a 1:1 (slot:operation) relation
+ * other operations are heavier and will pop the threshold
+ * more often.
+ */
+ iop_chan->pending += slot_cnt;
+ iop_adma_check_threshold(iop_chan);
+ spin_unlock_bh(&iop_chan->lock);
+
+ dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n",
+ __func__, sw_desc->async_tx.cookie, sw_desc->idx);
+
+ return cookie;
+}
+
+static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan);
+static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan);
+
+/**
+ * iop_adma_alloc_chan_resources - returns the number of allocated descriptors
+ * @chan - allocate descriptor resources for this channel
+ * @client - current client requesting the channel be ready for requests
+ *
+ * Note: We keep the slots for 1 operation on iop_chan->chain at all times. To
+ * avoid deadlock, via async_xor, num_descs_in_pool must at a minimum be
+ * greater than 2x the number slots needed to satisfy a device->max_xor
+ * request.
+ * */
+static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
+{
+ char *hw_desc;
+ int idx;
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *slot = NULL;
+ int init = iop_chan->slots_allocated ? 0 : 1;
+ struct iop_adma_platform_data *plat_data =
+ iop_chan->device->pdev->dev.platform_data;
+ int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE;
+
+ /* Allocate descriptor slots */
+ do {
+ idx = iop_chan->slots_allocated;
+ if (idx == num_descs_in_pool)
+ break;
+
+ slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+ if (!slot) {
+ printk(KERN_INFO "IOP ADMA Channel only initialized"
+ " %d descriptor slots", idx);
+ break;
+ }
+ hw_desc = (char *) iop_chan->device->dma_desc_pool_virt;
+ slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
+
+ dma_async_tx_descriptor_init(&slot->async_tx, chan);
+ slot->async_tx.tx_submit = iop_adma_tx_submit;
+ INIT_LIST_HEAD(&slot->tx_list);
+ INIT_LIST_HEAD(&slot->chain_node);
+ INIT_LIST_HEAD(&slot->slot_node);
+ hw_desc = (char *) iop_chan->device->dma_desc_pool;
+ slot->async_tx.phys =
+ (dma_addr_t) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
+ slot->idx = idx;
+
+ spin_lock_bh(&iop_chan->lock);
+ iop_chan->slots_allocated++;
+ list_add_tail(&slot->slot_node, &iop_chan->all_slots);
+ spin_unlock_bh(&iop_chan->lock);
+ } while (iop_chan->slots_allocated < num_descs_in_pool);
+
+ if (idx && !iop_chan->last_used)
+ iop_chan->last_used = list_entry(iop_chan->all_slots.next,
+ struct iop_adma_desc_slot,
+ slot_node);
+
+ dev_dbg(iop_chan->device->common.dev,
+ "allocated %d descriptor slots last_used: %p\n",
+ iop_chan->slots_allocated, iop_chan->last_used);
+
+ /* initialize the channel and the chain with a null operation */
+ if (init) {
+ if (dma_has_cap(DMA_MEMCPY,
+ iop_chan->device->common.cap_mask))
+ iop_chan_start_null_memcpy(iop_chan);
+ else if (dma_has_cap(DMA_XOR,
+ iop_chan->device->common.cap_mask))
+ iop_chan_start_null_xor(iop_chan);
+ else
+ BUG();
+ }
+
+ return (idx > 0) ? idx : -ENOMEM;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ int slot_cnt, slots_per_op;
+
+ dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+ iop_desc_init_interrupt(grp_start, iop_chan);
+ grp_start->unmap_len = 0;
+ sw_desc->async_tx.flags = flags;
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ int slot_cnt, slots_per_op;
+
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT));
+
+ dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
+ __func__, len);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+ iop_desc_init_memcpy(grp_start, flags);
+ iop_desc_set_byte_count(grp_start, iop_chan, len);
+ iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
+ iop_desc_set_memcpy_src_addr(grp_start, dma_src);
+ sw_desc->unmap_src_cnt = 1;
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_memset(struct dma_chan *chan, dma_addr_t dma_dest,
+ int value, size_t len, unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ int slot_cnt, slots_per_op;
+
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT));
+
+ dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
+ __func__, len);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_memset_slot_count(len, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+ iop_desc_init_memset(grp_start, flags);
+ iop_desc_set_byte_count(grp_start, iop_chan, len);
+ iop_desc_set_block_fill_val(grp_start, value);
+ iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
+ sw_desc->unmap_src_cnt = 1;
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
+ dma_addr_t *dma_src, unsigned int src_cnt, size_t len,
+ unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ int slot_cnt, slots_per_op;
+
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT));
+
+ dev_dbg(iop_chan->device->common.dev,
+ "%s src_cnt: %d len: %u flags: %lx\n",
+ __func__, src_cnt, len, flags);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+ iop_desc_init_xor(grp_start, src_cnt, flags);
+ iop_desc_set_byte_count(grp_start, iop_chan, len);
+ iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
+ sw_desc->unmap_src_cnt = src_cnt;
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ while (src_cnt--)
+ iop_desc_set_xor_src_addr(grp_start, src_cnt,
+ dma_src[src_cnt]);
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
+ unsigned int src_cnt, size_t len, u32 *result,
+ unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ int slot_cnt, slots_per_op;
+
+ if (unlikely(!len))
+ return NULL;
+
+ dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
+ __func__, src_cnt, len);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+ iop_desc_init_zero_sum(grp_start, src_cnt, flags);
+ iop_desc_set_zero_sum_byte_count(grp_start, len);
+ grp_start->xor_check_result = result;
+ pr_debug("\t%s: grp_start->xor_check_result: %p\n",
+ __func__, grp_start->xor_check_result);
+ sw_desc->unmap_src_cnt = src_cnt;
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ while (src_cnt--)
+ iop_desc_set_zero_sum_src_addr(grp_start, src_cnt,
+ dma_src[src_cnt]);
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *g;
+ int slot_cnt, slots_per_op;
+ int continue_srcs;
+
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+ dev_dbg(iop_chan->device->common.dev,
+ "%s src_cnt: %d len: %u flags: %lx\n",
+ __func__, src_cnt, len, flags);
+
+ if (dmaf_p_disabled_continue(flags))
+ continue_srcs = 1+src_cnt;
+ else if (dmaf_continue(flags))
+ continue_srcs = 3+src_cnt;
+ else
+ continue_srcs = 0+src_cnt;
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ int i;
+
+ g = sw_desc->group_head;
+ iop_desc_set_byte_count(g, iop_chan, len);
+
+ /* even if P is disabled its destination address (bits
+ * [3:0]) must match Q. It is ok if P points to an
+ * invalid address, it won't be written.
+ */
+ if (flags & DMA_PREP_PQ_DISABLE_P)
+ dst[0] = dst[1] & 0x7;
+
+ iop_desc_set_pq_addr(g, dst);
+ sw_desc->unmap_src_cnt = src_cnt;
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ for (i = 0; i < src_cnt; i++)
+ iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
+
+ /* if we are continuing a previous operation factor in
+ * the old p and q values, see the comment for dma_maxpq
+ * in include/linux/dmaengine.h
+ */
+ if (dmaf_p_disabled_continue(flags))
+ iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+ else if (dmaf_continue(flags)) {
+ iop_desc_set_pq_src_addr(g, i++, dst[0], 0);
+ iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+ iop_desc_set_pq_src_addr(g, i++, dst[1], 0);
+ }
+ iop_desc_init_pq(g, i, flags);
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf,
+ size_t len, enum sum_check_flags *pqres,
+ unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *g;
+ int slot_cnt, slots_per_op;
+
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+ dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
+ __func__, src_cnt, len);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ /* for validate operations p and q are tagged onto the
+ * end of the source list
+ */
+ int pq_idx = src_cnt;
+
+ g = sw_desc->group_head;
+ iop_desc_init_pq_zero_sum(g, src_cnt+2, flags);
+ iop_desc_set_pq_zero_sum_byte_count(g, len);
+ g->pq_check_result = pqres;
+ pr_debug("\t%s: g->pq_check_result: %p\n",
+ __func__, g->pq_check_result);
+ sw_desc->unmap_src_cnt = src_cnt+2;
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ while (src_cnt--)
+ iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
+ src[src_cnt],
+ scf[src_cnt]);
+ iop_desc_set_pq_zero_sum_addr(g, pq_idx, src);
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static void iop_adma_free_chan_resources(struct dma_chan *chan)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *iter, *_iter;
+ int in_use_descs = 0;
+
+ iop_adma_slot_cleanup(iop_chan);
+
+ spin_lock_bh(&iop_chan->lock);
+ list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
+ chain_node) {
+ in_use_descs++;
+ list_del(&iter->chain_node);
+ }
+ list_for_each_entry_safe_reverse(
+ iter, _iter, &iop_chan->all_slots, slot_node) {
+ list_del(&iter->slot_node);
+ kfree(iter);
+ iop_chan->slots_allocated--;
+ }
+ iop_chan->last_used = NULL;
+
+ dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n",
+ __func__, iop_chan->slots_allocated);
+ spin_unlock_bh(&iop_chan->lock);
+
+ /* one is ok since we left it on there on purpose */
+ if (in_use_descs > 1)
+ printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n",
+ in_use_descs - 1);
+}
+
+/**
+ * iop_adma_is_complete - poll the status of an ADMA transaction
+ * @chan: ADMA channel handle
+ * @cookie: ADMA transaction identifier
+ */
+static enum dma_status iop_adma_is_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done,
+ dma_cookie_t *used)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+ enum dma_status ret;
+
+ last_used = chan->cookie;
+ last_complete = iop_chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ if (ret == DMA_SUCCESS)
+ return ret;
+
+ iop_adma_slot_cleanup(iop_chan);
+
+ last_used = chan->cookie;
+ last_complete = iop_chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static irqreturn_t iop_adma_eot_handler(int irq, void *data)
+{
+ struct iop_adma_chan *chan = data;
+
+ dev_dbg(chan->device->common.dev, "%s\n", __func__);
+
+ tasklet_schedule(&chan->irq_tasklet);
+
+ iop_adma_device_clear_eot_status(chan);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t iop_adma_eoc_handler(int irq, void *data)
+{
+ struct iop_adma_chan *chan = data;
+
+ dev_dbg(chan->device->common.dev, "%s\n", __func__);
+
+ tasklet_schedule(&chan->irq_tasklet);
+
+ iop_adma_device_clear_eoc_status(chan);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t iop_adma_err_handler(int irq, void *data)
+{
+ struct iop_adma_chan *chan = data;
+ unsigned long status = iop_chan_get_status(chan);
+
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error ( %s%s%s%s%s%s%s)\n",
+ iop_is_err_int_parity(status, chan) ? "int_parity " : "",
+ iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "",
+ iop_is_err_int_tabort(status, chan) ? "int_tabort " : "",
+ iop_is_err_int_mabort(status, chan) ? "int_mabort " : "",
+ iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "",
+ iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "",
+ iop_is_err_split_tx(status, chan) ? "split_tx " : "");
+
+ iop_adma_device_clear_err_status(chan);
+
+ BUG();
+
+ return IRQ_HANDLED;
+}
+
+static void iop_adma_issue_pending(struct dma_chan *chan)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+
+ if (iop_chan->pending) {
+ iop_chan->pending = 0;
+ iop_chan_append(iop_chan);
+ }
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+#define IOP_ADMA_TEST_SIZE 2000
+
+static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device)
+{
+ int i;
+ void *src, *dest;
+ dma_addr_t src_dma, dest_dma;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ struct dma_async_tx_descriptor *tx;
+ int err = 0;
+ struct iop_adma_chan *iop_chan;
+
+ dev_dbg(device->common.dev, "%s\n", __func__);
+
+ src = kmalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+ dest = kzalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL);
+ if (!dest) {
+ kfree(src);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffer */
+ for (i = 0; i < IOP_ADMA_TEST_SIZE; i++)
+ ((u8 *) src)[i] = (u8)i;
+
+ /* Start copy, using first DMA channel */
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ dest_dma = dma_map_single(dma_chan->device->dev, dest,
+ IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
+ src_dma = dma_map_single(dma_chan->device->dev, src,
+ IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE);
+ tx = iop_adma_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+ IOP_ADMA_TEST_SIZE,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(1);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test copy timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ iop_chan = to_iop_adma_chan(dma_chan);
+ dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
+ IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
+ if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test copy failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ iop_adma_free_chan_resources(dma_chan);
+out:
+ kfree(src);
+ kfree(dest);
+ return err;
+}
+
+#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
+static int __devinit
+iop_adma_xor_val_self_test(struct iop_adma_device *device)
+{
+ int i, src_idx;
+ struct page *dest;
+ struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST];
+ struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
+ dma_addr_t dma_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
+ dma_addr_t dma_addr, dest_dma;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u8 cmp_byte = 0;
+ u32 cmp_word;
+ u32 zero_sum_result;
+ int err = 0;
+ struct iop_adma_chan *iop_chan;
+
+ dev_dbg(device->common.dev, "%s\n", __func__);
+
+ for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
+ xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+ if (!xor_srcs[src_idx]) {
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+ }
+
+ dest = alloc_page(GFP_KERNEL);
+ if (!dest) {
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffers */
+ for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
+ u8 *ptr = page_address(xor_srcs[src_idx]);
+ for (i = 0; i < PAGE_SIZE; i++)
+ ptr[i] = (1 << src_idx);
+ }
+
+ for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++)
+ cmp_byte ^= (u8) (1 << src_idx);
+
+ cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+ (cmp_byte << 8) | cmp_byte;
+
+ memset(page_address(dest), 0, PAGE_SIZE);
+
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* test xor */
+ dest_dma = dma_map_page(dma_chan->device->dev, dest, 0,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+ dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+ 0, PAGE_SIZE, DMA_TO_DEVICE);
+ tx = iop_adma_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+ IOP_ADMA_NUM_SRC_TEST, PAGE_SIZE,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test xor timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ iop_chan = to_iop_adma_chan(dma_chan);
+ dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i] != cmp_word) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test xor failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+ dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma,
+ PAGE_SIZE, DMA_TO_DEVICE);
+
+ /* skip zero sum if the capability is not present */
+ if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
+ goto free_resources;
+
+ /* zero sum the sources with the destintation page */
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+ zero_sum_srcs[i] = xor_srcs[i];
+ zero_sum_srcs[i] = dest;
+
+ zero_sum_result = 1;
+
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++)
+ dma_srcs[i] = dma_map_page(dma_chan->device->dev,
+ zero_sum_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+ IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &zero_sum_result,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test zero sum timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (zero_sum_result != 0) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test zero sum failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ /* test memset */
+ dma_addr = dma_map_page(dma_chan->device->dev, dest, 0,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ tx = iop_adma_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test memset timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i]) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test memset failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+
+ /* test for non-zero parity sum */
+ zero_sum_result = 0;
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++)
+ dma_srcs[i] = dma_map_page(dma_chan->device->dev,
+ zero_sum_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+ IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &zero_sum_result,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test non-zero sum timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (zero_sum_result != 1) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test non-zero sum failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ iop_adma_free_chan_resources(dma_chan);
+out:
+ src_idx = IOP_ADMA_NUM_SRC_TEST;
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ __free_page(dest);
+ return err;
+}
+
+#ifdef CONFIG_MD_RAID6_PQ
+static int __devinit
+iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
+{
+ /* combined sources, software pq results, and extra hw pq results */
+ struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2];
+ /* ptr to the extra hw pq buffers defined above */
+ struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2];
+ /* address conversion buffers (dma_map / page_address) */
+ void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2];
+ dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST];
+ dma_addr_t pq_dest[2];
+
+ int i;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u32 zero_sum_result;
+ int err = 0;
+ struct device *dev;
+
+ dev_dbg(device->common.dev, "%s\n", __func__);
+
+ for (i = 0; i < ARRAY_SIZE(pq); i++) {
+ pq[i] = alloc_page(GFP_KERNEL);
+ if (!pq[i]) {
+ while (i--)
+ __free_page(pq[i]);
+ return -ENOMEM;
+ }
+ }
+
+ /* Fill in src buffers */
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
+ pq_sw[i] = page_address(pq[i]);
+ memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE);
+ }
+ pq_sw[i] = page_address(pq[i]);
+ pq_sw[i+1] = page_address(pq[i+1]);
+
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ dev = dma_chan->device->dev;
+
+ /* initialize the dests */
+ memset(page_address(pq_hw[0]), 0 , PAGE_SIZE);
+ memset(page_address(pq_hw[1]), 0 , PAGE_SIZE);
+
+ /* test pq */
+ pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+ pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+
+ tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src,
+ IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp,
+ PAGE_SIZE,
+ DMA_PREP_INTERRUPT |
+ DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_err(dev, "Self-test pq timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw);
+
+ if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST],
+ page_address(pq_hw[0]), PAGE_SIZE) != 0) {
+ dev_err(dev, "Self-test p failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1],
+ page_address(pq_hw[1]), PAGE_SIZE) != 0) {
+ dev_err(dev, "Self-test q failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ /* test correct zero sum using the software generated pq values */
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+ pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+
+ zero_sum_result = ~0;
+ tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+ pq_src, IOP_ADMA_NUM_SRC_TEST,
+ raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+ DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (zero_sum_result != 0) {
+ dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n",
+ zero_sum_result);
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ /* test incorrect zero sum */
+ i = IOP_ADMA_NUM_SRC_TEST;
+ memset(pq_sw[i] + 100, 0, 100);
+ memset(pq_sw[i+1] + 200, 0, 200);
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+ pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+
+ zero_sum_result = 0;
+ tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+ pq_src, IOP_ADMA_NUM_SRC_TEST,
+ raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+ DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) {
+ dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n",
+ zero_sum_result);
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ iop_adma_free_chan_resources(dma_chan);
+out:
+ i = ARRAY_SIZE(pq);
+ while (i--)
+ __free_page(pq[i]);
+ return err;
+}
+#endif
+
+static int __devexit iop_adma_remove(struct platform_device *dev)
+{
+ struct iop_adma_device *device = platform_get_drvdata(dev);
+ struct dma_chan *chan, *_chan;
+ struct iop_adma_chan *iop_chan;
+ struct iop_adma_platform_data *plat_data = dev->dev.platform_data;
+
+ dma_async_device_unregister(&device->common);
+
+ dma_free_coherent(&dev->dev, plat_data->pool_size,
+ device->dma_desc_pool_virt, device->dma_desc_pool);
+
+ list_for_each_entry_safe(chan, _chan, &device->common.channels,
+ device_node) {
+ iop_chan = to_iop_adma_chan(chan);
+ list_del(&chan->device_node);
+ kfree(iop_chan);
+ }
+ kfree(device);
+
+ return 0;
+}
+
+static int __devinit iop_adma_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+ int ret = 0, i;
+ struct iop_adma_device *adev;
+ struct iop_adma_chan *iop_chan;
+ struct dma_device *dma_dev;
+ struct iop_adma_platform_data *plat_data = pdev->dev.platform_data;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ if (!devm_request_mem_region(&pdev->dev, res->start,
+ resource_size(res), pdev->name))
+ return -EBUSY;
+
+ adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+ if (!adev)
+ return -ENOMEM;
+ dma_dev = &adev->common;
+
+ /* allocate coherent memory for hardware descriptors
+ * note: writecombine gives slightly better performance, but
+ * requires that we explicitly flush the writes
+ */
+ if ((adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
+ plat_data->pool_size,
+ &adev->dma_desc_pool,
+ GFP_KERNEL)) == NULL) {
+ ret = -ENOMEM;
+ goto err_free_adev;
+ }
+
+ dev_dbg(&pdev->dev, "%s: allocted descriptor pool virt %p phys %p\n",
+ __func__, adev->dma_desc_pool_virt,
+ (void *) adev->dma_desc_pool);
+
+ adev->id = plat_data->hw_id;
+
+ /* discover transaction capabilites from the platform data */
+ dma_dev->cap_mask = plat_data->cap_mask;
+
+ adev->pdev = pdev;
+ platform_set_drvdata(pdev, adev);
+
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ /* set base routines */
+ dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = iop_adma_free_chan_resources;
+ dma_dev->device_is_tx_complete = iop_adma_is_complete;
+ dma_dev->device_issue_pending = iop_adma_issue_pending;
+ dma_dev->dev = &pdev->dev;
+
+ /* set prep routines based on capability */
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy;
+ if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memset = iop_adma_prep_dma_memset;
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ dma_dev->max_xor = iop_adma_get_max_xor();
+ dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
+ }
+ if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_xor_val =
+ iop_adma_prep_dma_xor_val;
+ if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+ dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0);
+ dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq;
+ }
+ if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_pq_val =
+ iop_adma_prep_dma_pq_val;
+ if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_interrupt =
+ iop_adma_prep_dma_interrupt;
+
+ iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL);
+ if (!iop_chan) {
+ ret = -ENOMEM;
+ goto err_free_dma;
+ }
+ iop_chan->device = adev;
+
+ iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start,
+ resource_size(res));
+ if (!iop_chan->mmr_base) {
+ ret = -ENOMEM;
+ goto err_free_iop_chan;
+ }
+ tasklet_init(&iop_chan->irq_tasklet, iop_adma_tasklet, (unsigned long)
+ iop_chan);
+
+ /* clear errors before enabling interrupts */
+ iop_adma_device_clear_err_status(iop_chan);
+
+ for (i = 0; i < 3; i++) {
+ irq_handler_t handler[] = { iop_adma_eot_handler,
+ iop_adma_eoc_handler,
+ iop_adma_err_handler };
+ int irq = platform_get_irq(pdev, i);
+ if (irq < 0) {
+ ret = -ENXIO;
+ goto err_free_iop_chan;
+ } else {
+ ret = devm_request_irq(&pdev->dev, irq,
+ handler[i], 0, pdev->name, iop_chan);
+ if (ret)
+ goto err_free_iop_chan;
+ }
+ }
+
+ spin_lock_init(&iop_chan->lock);
+ INIT_LIST_HEAD(&iop_chan->chain);
+ INIT_LIST_HEAD(&iop_chan->all_slots);
+ iop_chan->common.device = dma_dev;
+ list_add_tail(&iop_chan->common.device_node, &dma_dev->channels);
+
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+ ret = iop_adma_memcpy_self_test(adev);
+ dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+ if (ret)
+ goto err_free_iop_chan;
+ }
+
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
+ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
+ ret = iop_adma_xor_val_self_test(adev);
+ dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+ if (ret)
+ goto err_free_iop_chan;
+ }
+
+ if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) &&
+ dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) {
+ #ifdef CONFIG_MD_RAID6_PQ
+ ret = iop_adma_pq_zero_sum_self_test(adev);
+ dev_dbg(&pdev->dev, "pq self test returned %d\n", ret);
+ #else
+ /* can not test raid6, so do not publish capability */
+ dma_cap_clear(DMA_PQ, dma_dev->cap_mask);
+ dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask);
+ ret = 0;
+ #endif
+ if (ret)
+ goto err_free_iop_chan;
+ }
+
+ dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
+ "( %s%s%s%s%s%s%s)\n",
+ dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
+ dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
+ dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+ dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
+ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
+ dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
+ dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
+
+ dma_async_device_register(dma_dev);
+ goto out;
+
+ err_free_iop_chan:
+ kfree(iop_chan);
+ err_free_dma:
+ dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
+ adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ err_free_adev:
+ kfree(adev);
+ out:
+ return ret;
+}
+
+static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
+{
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ dma_cookie_t cookie;
+ int slot_cnt, slots_per_op;
+
+ dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+
+ list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
+ async_tx_ack(&sw_desc->async_tx);
+ iop_desc_init_memcpy(grp_start, 0);
+ iop_desc_set_byte_count(grp_start, iop_chan, 0);
+ iop_desc_set_dest_addr(grp_start, iop_chan, 0);
+ iop_desc_set_memcpy_src_addr(grp_start, 0);
+
+ cookie = iop_chan->common.cookie;
+ cookie++;
+ if (cookie <= 1)
+ cookie = 2;
+
+ /* initialize the completed cookie to be less than
+ * the most recently used cookie
+ */
+ iop_chan->completed_cookie = cookie - 1;
+ iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
+
+ /* channel should not be busy */
+ BUG_ON(iop_chan_is_busy(iop_chan));
+
+ /* clear any prior error-status bits */
+ iop_adma_device_clear_err_status(iop_chan);
+
+ /* disable operation */
+ iop_chan_disable(iop_chan);
+
+ /* set the descriptor address */
+ iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
+
+ /* 1/ don't add pre-chained descriptors
+ * 2/ dummy read to flush next_desc write
+ */
+ BUG_ON(iop_desc_get_next_desc(sw_desc));
+
+ /* run the descriptor */
+ iop_chan_enable(iop_chan);
+ } else
+ dev_printk(KERN_ERR, iop_chan->device->common.dev,
+ "failed to allocate null descriptor\n");
+ spin_unlock_bh(&iop_chan->lock);
+}
+
+static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
+{
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ dma_cookie_t cookie;
+ int slot_cnt, slots_per_op;
+
+ dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+ list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
+ async_tx_ack(&sw_desc->async_tx);
+ iop_desc_init_null_xor(grp_start, 2, 0);
+ iop_desc_set_byte_count(grp_start, iop_chan, 0);
+ iop_desc_set_dest_addr(grp_start, iop_chan, 0);
+ iop_desc_set_xor_src_addr(grp_start, 0, 0);
+ iop_desc_set_xor_src_addr(grp_start, 1, 0);
+
+ cookie = iop_chan->common.cookie;
+ cookie++;
+ if (cookie <= 1)
+ cookie = 2;
+
+ /* initialize the completed cookie to be less than
+ * the most recently used cookie
+ */
+ iop_chan->completed_cookie = cookie - 1;
+ iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
+
+ /* channel should not be busy */
+ BUG_ON(iop_chan_is_busy(iop_chan));
+
+ /* clear any prior error-status bits */
+ iop_adma_device_clear_err_status(iop_chan);
+
+ /* disable operation */
+ iop_chan_disable(iop_chan);
+
+ /* set the descriptor address */
+ iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
+
+ /* 1/ don't add pre-chained descriptors
+ * 2/ dummy read to flush next_desc write
+ */
+ BUG_ON(iop_desc_get_next_desc(sw_desc));
+
+ /* run the descriptor */
+ iop_chan_enable(iop_chan);
+ } else
+ dev_printk(KERN_ERR, iop_chan->device->common.dev,
+ "failed to allocate null descriptor\n");
+ spin_unlock_bh(&iop_chan->lock);
+}
+
+MODULE_ALIAS("platform:iop-adma");
+
+static struct platform_driver iop_adma_driver = {
+ .probe = iop_adma_probe,
+ .remove = __devexit_p(iop_adma_remove),
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = "iop-adma",
+ },
+};
+
+static int __init iop_adma_init (void)
+{
+ return platform_driver_register(&iop_adma_driver);
+}
+
+static void __exit iop_adma_exit (void)
+{
+ platform_driver_unregister(&iop_adma_driver);
+ return;
+}
+module_exit(iop_adma_exit);
+module_init(iop_adma_init);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("IOP ADMA Engine Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c
new file mode 100644
index 00000000000..c0a272c7368
--- /dev/null
+++ b/drivers/dma/iovlock.c
@@ -0,0 +1,279 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ * Portions based on net/core/datagram.c and copyrighted by their authors.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This code allows the net stack to make use of a DMA engine for
+ * skb to iovec copies.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/pagemap.h>
+#include <net/tcp.h> /* for memcpy_toiovec */
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+static int num_pages_spanned(struct iovec *iov)
+{
+ return
+ ((PAGE_ALIGN((unsigned long)iov->iov_base + iov->iov_len) -
+ ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT);
+}
+
+/*
+ * Pin down all the iovec pages needed for len bytes.
+ * Return a struct dma_pinned_list to keep track of pages pinned down.
+ *
+ * We are allocating a single chunk of memory, and then carving it up into
+ * 3 sections, the latter 2 whose size depends on the number of iovecs and the
+ * total number of pages, respectively.
+ */
+struct dma_pinned_list *dma_pin_iovec_pages(struct iovec *iov, size_t len)
+{
+ struct dma_pinned_list *local_list;
+ struct page **pages;
+ int i;
+ int ret;
+ int nr_iovecs = 0;
+ int iovec_len_used = 0;
+ int iovec_pages_used = 0;
+
+ /* don't pin down non-user-based iovecs */
+ if (segment_eq(get_fs(), KERNEL_DS))
+ return NULL;
+
+ /* determine how many iovecs/pages there are, up front */
+ do {
+ iovec_len_used += iov[nr_iovecs].iov_len;
+ iovec_pages_used += num_pages_spanned(&iov[nr_iovecs]);
+ nr_iovecs++;
+ } while (iovec_len_used < len);
+
+ /* single kmalloc for pinned list, page_list[], and the page arrays */
+ local_list = kmalloc(sizeof(*local_list)
+ + (nr_iovecs * sizeof (struct dma_page_list))
+ + (iovec_pages_used * sizeof (struct page*)), GFP_KERNEL);
+ if (!local_list)
+ goto out;
+
+ /* list of pages starts right after the page list array */
+ pages = (struct page **) &local_list->page_list[nr_iovecs];
+
+ local_list->nr_iovecs = 0;
+
+ for (i = 0; i < nr_iovecs; i++) {
+ struct dma_page_list *page_list = &local_list->page_list[i];
+
+ len -= iov[i].iov_len;
+
+ if (!access_ok(VERIFY_WRITE, iov[i].iov_base, iov[i].iov_len))
+ goto unpin;
+
+ page_list->nr_pages = num_pages_spanned(&iov[i]);
+ page_list->base_address = iov[i].iov_base;
+
+ page_list->pages = pages;
+ pages += page_list->nr_pages;
+
+ /* pin pages down */
+ down_read(&current->mm->mmap_sem);
+ ret = get_user_pages(
+ current,
+ current->mm,
+ (unsigned long) iov[i].iov_base,
+ page_list->nr_pages,
+ 1, /* write */
+ 0, /* force */
+ page_list->pages,
+ NULL);
+ up_read(&current->mm->mmap_sem);
+
+ if (ret != page_list->nr_pages)
+ goto unpin;
+
+ local_list->nr_iovecs = i + 1;
+ }
+
+ return local_list;
+
+unpin:
+ dma_unpin_iovec_pages(local_list);
+out:
+ return NULL;
+}
+
+void dma_unpin_iovec_pages(struct dma_pinned_list *pinned_list)
+{
+ int i, j;
+
+ if (!pinned_list)
+ return;
+
+ for (i = 0; i < pinned_list->nr_iovecs; i++) {
+ struct dma_page_list *page_list = &pinned_list->page_list[i];
+ for (j = 0; j < page_list->nr_pages; j++) {
+ set_page_dirty_lock(page_list->pages[j]);
+ page_cache_release(page_list->pages[j]);
+ }
+ }
+
+ kfree(pinned_list);
+}
+
+
+/*
+ * We have already pinned down the pages we will be using in the iovecs.
+ * Each entry in iov array has corresponding entry in pinned_list->page_list.
+ * Using array indexing to keep iov[] and page_list[] in sync.
+ * Initial elements in iov array's iov->iov_len will be 0 if already copied into
+ * by another call.
+ * iov array length remaining guaranteed to be bigger than len.
+ */
+dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov,
+ struct dma_pinned_list *pinned_list, unsigned char *kdata, size_t len)
+{
+ int iov_byte_offset;
+ int copy;
+ dma_cookie_t dma_cookie = 0;
+ int iovec_idx;
+ int page_idx;
+
+ if (!chan)
+ return memcpy_toiovec(iov, kdata, len);
+
+ iovec_idx = 0;
+ while (iovec_idx < pinned_list->nr_iovecs) {
+ struct dma_page_list *page_list;
+
+ /* skip already used-up iovecs */
+ while (!iov[iovec_idx].iov_len)
+ iovec_idx++;
+
+ page_list = &pinned_list->page_list[iovec_idx];
+
+ iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
+ page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
+ - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
+
+ /* break up copies to not cross page boundary */
+ while (iov[iovec_idx].iov_len) {
+ copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
+ copy = min_t(int, copy, iov[iovec_idx].iov_len);
+
+ dma_cookie = dma_async_memcpy_buf_to_pg(chan,
+ page_list->pages[page_idx],
+ iov_byte_offset,
+ kdata,
+ copy);
+ /* poll for a descriptor slot */
+ if (unlikely(dma_cookie < 0)) {
+ dma_async_issue_pending(chan);
+ continue;
+ }
+
+ len -= copy;
+ iov[iovec_idx].iov_len -= copy;
+ iov[iovec_idx].iov_base += copy;
+
+ if (!len)
+ return dma_cookie;
+
+ kdata += copy;
+ iov_byte_offset = 0;
+ page_idx++;
+ }
+ iovec_idx++;
+ }
+
+ /* really bad if we ever run out of iovecs */
+ BUG();
+ return -EFAULT;
+}
+
+dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
+ struct dma_pinned_list *pinned_list, struct page *page,
+ unsigned int offset, size_t len)
+{
+ int iov_byte_offset;
+ int copy;
+ dma_cookie_t dma_cookie = 0;
+ int iovec_idx;
+ int page_idx;
+ int err;
+
+ /* this needs as-yet-unimplemented buf-to-buff, so punt. */
+ /* TODO: use dma for this */
+ if (!chan || !pinned_list) {
+ u8 *vaddr = kmap(page);
+ err = memcpy_toiovec(iov, vaddr + offset, len);
+ kunmap(page);
+ return err;
+ }
+
+ iovec_idx = 0;
+ while (iovec_idx < pinned_list->nr_iovecs) {
+ struct dma_page_list *page_list;
+
+ /* skip already used-up iovecs */
+ while (!iov[iovec_idx].iov_len)
+ iovec_idx++;
+
+ page_list = &pinned_list->page_list[iovec_idx];
+
+ iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
+ page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
+ - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
+
+ /* break up copies to not cross page boundary */
+ while (iov[iovec_idx].iov_len) {
+ copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
+ copy = min_t(int, copy, iov[iovec_idx].iov_len);
+
+ dma_cookie = dma_async_memcpy_pg_to_pg(chan,
+ page_list->pages[page_idx],
+ iov_byte_offset,
+ page,
+ offset,
+ copy);
+ /* poll for a descriptor slot */
+ if (unlikely(dma_cookie < 0)) {
+ dma_async_issue_pending(chan);
+ continue;
+ }
+
+ len -= copy;
+ iov[iovec_idx].iov_len -= copy;
+ iov[iovec_idx].iov_base += copy;
+
+ if (!len)
+ return dma_cookie;
+
+ offset += copy;
+ iov_byte_offset = 0;
+ page_idx++;
+ }
+ iovec_idx++;
+ }
+
+ /* really bad if we ever run out of iovecs */
+ BUG();
+ return -EFAULT;
+}
diff --git a/drivers/dma/ipu/Makefile b/drivers/dma/ipu/Makefile
new file mode 100644
index 00000000000..6704cf48326
--- /dev/null
+++ b/drivers/dma/ipu/Makefile
@@ -0,0 +1 @@
+obj-y += ipu_irq.o ipu_idmac.o
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
new file mode 100644
index 00000000000..e80bae1673f
--- /dev/null
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -0,0 +1,1867 @@
+/*
+ * Copyright (C) 2008
+ * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
+ *
+ * Copyright (C) 2005-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/list.h>
+#include <linux/clk.h>
+#include <linux/vmalloc.h>
+#include <linux/string.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+
+#include <mach/ipu.h>
+
+#include "ipu_intern.h"
+
+#define FS_VF_IN_VALID 0x00000002
+#define FS_ENC_IN_VALID 0x00000001
+
+static int ipu_disable_channel(struct idmac *idmac, struct idmac_channel *ichan,
+ bool wait_for_stop);
+
+/*
+ * There can be only one, we could allocate it dynamically, but then we'd have
+ * to add an extra parameter to some functions, and use something as ugly as
+ * struct ipu *ipu = to_ipu(to_idmac(ichan->dma_chan.device));
+ * in the ISR
+ */
+static struct ipu ipu_data;
+
+#define to_ipu(id) container_of(id, struct ipu, idmac)
+
+static u32 __idmac_read_icreg(struct ipu *ipu, unsigned long reg)
+{
+ return __raw_readl(ipu->reg_ic + reg);
+}
+
+#define idmac_read_icreg(ipu, reg) __idmac_read_icreg(ipu, reg - IC_CONF)
+
+static void __idmac_write_icreg(struct ipu *ipu, u32 value, unsigned long reg)
+{
+ __raw_writel(value, ipu->reg_ic + reg);
+}
+
+#define idmac_write_icreg(ipu, v, reg) __idmac_write_icreg(ipu, v, reg - IC_CONF)
+
+static u32 idmac_read_ipureg(struct ipu *ipu, unsigned long reg)
+{
+ return __raw_readl(ipu->reg_ipu + reg);
+}
+
+static void idmac_write_ipureg(struct ipu *ipu, u32 value, unsigned long reg)
+{
+ __raw_writel(value, ipu->reg_ipu + reg);
+}
+
+/*****************************************************************************
+ * IPU / IC common functions
+ */
+static void dump_idmac_reg(struct ipu *ipu)
+{
+ dev_dbg(ipu->dev, "IDMAC_CONF 0x%x, IC_CONF 0x%x, IDMAC_CHA_EN 0x%x, "
+ "IDMAC_CHA_PRI 0x%x, IDMAC_CHA_BUSY 0x%x\n",
+ idmac_read_icreg(ipu, IDMAC_CONF),
+ idmac_read_icreg(ipu, IC_CONF),
+ idmac_read_icreg(ipu, IDMAC_CHA_EN),
+ idmac_read_icreg(ipu, IDMAC_CHA_PRI),
+ idmac_read_icreg(ipu, IDMAC_CHA_BUSY));
+ dev_dbg(ipu->dev, "BUF0_RDY 0x%x, BUF1_RDY 0x%x, CUR_BUF 0x%x, "
+ "DB_MODE 0x%x, TASKS_STAT 0x%x\n",
+ idmac_read_ipureg(ipu, IPU_CHA_BUF0_RDY),
+ idmac_read_ipureg(ipu, IPU_CHA_BUF1_RDY),
+ idmac_read_ipureg(ipu, IPU_CHA_CUR_BUF),
+ idmac_read_ipureg(ipu, IPU_CHA_DB_MODE_SEL),
+ idmac_read_ipureg(ipu, IPU_TASKS_STAT));
+}
+
+static uint32_t bytes_per_pixel(enum pixel_fmt fmt)
+{
+ switch (fmt) {
+ case IPU_PIX_FMT_GENERIC: /* generic data */
+ case IPU_PIX_FMT_RGB332:
+ case IPU_PIX_FMT_YUV420P:
+ case IPU_PIX_FMT_YUV422P:
+ default:
+ return 1;
+ case IPU_PIX_FMT_RGB565:
+ case IPU_PIX_FMT_YUYV:
+ case IPU_PIX_FMT_UYVY:
+ return 2;
+ case IPU_PIX_FMT_BGR24:
+ case IPU_PIX_FMT_RGB24:
+ return 3;
+ case IPU_PIX_FMT_GENERIC_32: /* generic data */
+ case IPU_PIX_FMT_BGR32:
+ case IPU_PIX_FMT_RGB32:
+ case IPU_PIX_FMT_ABGR32:
+ return 4;
+ }
+}
+
+/* Enable direct write to memory by the Camera Sensor Interface */
+static void ipu_ic_enable_task(struct ipu *ipu, enum ipu_channel channel)
+{
+ uint32_t ic_conf, mask;
+
+ switch (channel) {
+ case IDMAC_IC_0:
+ mask = IC_CONF_PRPENC_EN;
+ break;
+ case IDMAC_IC_7:
+ mask = IC_CONF_RWS_EN | IC_CONF_PRPENC_EN;
+ break;
+ default:
+ return;
+ }
+ ic_conf = idmac_read_icreg(ipu, IC_CONF) | mask;
+ idmac_write_icreg(ipu, ic_conf, IC_CONF);
+}
+
+/* Called under spin_lock_irqsave(&ipu_data.lock) */
+static void ipu_ic_disable_task(struct ipu *ipu, enum ipu_channel channel)
+{
+ uint32_t ic_conf, mask;
+
+ switch (channel) {
+ case IDMAC_IC_0:
+ mask = IC_CONF_PRPENC_EN;
+ break;
+ case IDMAC_IC_7:
+ mask = IC_CONF_RWS_EN | IC_CONF_PRPENC_EN;
+ break;
+ default:
+ return;
+ }
+ ic_conf = idmac_read_icreg(ipu, IC_CONF) & ~mask;
+ idmac_write_icreg(ipu, ic_conf, IC_CONF);
+}
+
+static uint32_t ipu_channel_status(struct ipu *ipu, enum ipu_channel channel)
+{
+ uint32_t stat = TASK_STAT_IDLE;
+ uint32_t task_stat_reg = idmac_read_ipureg(ipu, IPU_TASKS_STAT);
+
+ switch (channel) {
+ case IDMAC_IC_7:
+ stat = (task_stat_reg & TSTAT_CSI2MEM_MASK) >>
+ TSTAT_CSI2MEM_OFFSET;
+ break;
+ case IDMAC_IC_0:
+ case IDMAC_SDC_0:
+ case IDMAC_SDC_1:
+ default:
+ break;
+ }
+ return stat;
+}
+
+struct chan_param_mem_planar {
+ /* Word 0 */
+ u32 xv:10;
+ u32 yv:10;
+ u32 xb:12;
+
+ u32 yb:12;
+ u32 res1:2;
+ u32 nsb:1;
+ u32 lnpb:6;
+ u32 ubo_l:11;
+
+ u32 ubo_h:15;
+ u32 vbo_l:17;
+
+ u32 vbo_h:9;
+ u32 res2:3;
+ u32 fw:12;
+ u32 fh_l:8;
+
+ u32 fh_h:4;
+ u32 res3:28;
+
+ /* Word 1 */
+ u32 eba0;
+
+ u32 eba1;
+
+ u32 bpp:3;
+ u32 sl:14;
+ u32 pfs:3;
+ u32 bam:3;
+ u32 res4:2;
+ u32 npb:6;
+ u32 res5:1;
+
+ u32 sat:2;
+ u32 res6:30;
+} __attribute__ ((packed));
+
+struct chan_param_mem_interleaved {
+ /* Word 0 */
+ u32 xv:10;
+ u32 yv:10;
+ u32 xb:12;
+
+ u32 yb:12;
+ u32 sce:1;
+ u32 res1:1;
+ u32 nsb:1;
+ u32 lnpb:6;
+ u32 sx:10;
+ u32 sy_l:1;
+
+ u32 sy_h:9;
+ u32 ns:10;
+ u32 sm:10;
+ u32 sdx_l:3;
+
+ u32 sdx_h:2;
+ u32 sdy:5;
+ u32 sdrx:1;
+ u32 sdry:1;
+ u32 sdr1:1;
+ u32 res2:2;
+ u32 fw:12;
+ u32 fh_l:8;
+
+ u32 fh_h:4;
+ u32 res3:28;
+
+ /* Word 1 */
+ u32 eba0;
+
+ u32 eba1;
+
+ u32 bpp:3;
+ u32 sl:14;
+ u32 pfs:3;
+ u32 bam:3;
+ u32 res4:2;
+ u32 npb:6;
+ u32 res5:1;
+
+ u32 sat:2;
+ u32 scc:1;
+ u32 ofs0:5;
+ u32 ofs1:5;
+ u32 ofs2:5;
+ u32 ofs3:5;
+ u32 wid0:3;
+ u32 wid1:3;
+ u32 wid2:3;
+
+ u32 wid3:3;
+ u32 dec_sel:1;
+ u32 res6:28;
+} __attribute__ ((packed));
+
+union chan_param_mem {
+ struct chan_param_mem_planar pp;
+ struct chan_param_mem_interleaved ip;
+};
+
+static void ipu_ch_param_set_plane_offset(union chan_param_mem *params,
+ u32 u_offset, u32 v_offset)
+{
+ params->pp.ubo_l = u_offset & 0x7ff;
+ params->pp.ubo_h = u_offset >> 11;
+ params->pp.vbo_l = v_offset & 0x1ffff;
+ params->pp.vbo_h = v_offset >> 17;
+}
+
+static void ipu_ch_param_set_size(union chan_param_mem *params,
+ uint32_t pixel_fmt, uint16_t width,
+ uint16_t height, uint16_t stride)
+{
+ u32 u_offset;
+ u32 v_offset;
+
+ params->pp.fw = width - 1;
+ params->pp.fh_l = height - 1;
+ params->pp.fh_h = (height - 1) >> 8;
+ params->pp.sl = stride - 1;
+
+ switch (pixel_fmt) {
+ case IPU_PIX_FMT_GENERIC:
+ /*Represents 8-bit Generic data */
+ params->pp.bpp = 3;
+ params->pp.pfs = 7;
+ params->pp.npb = 31;
+ params->pp.sat = 2; /* SAT = use 32-bit access */
+ break;
+ case IPU_PIX_FMT_GENERIC_32:
+ /*Represents 32-bit Generic data */
+ params->pp.bpp = 0;
+ params->pp.pfs = 7;
+ params->pp.npb = 7;
+ params->pp.sat = 2; /* SAT = use 32-bit access */
+ break;
+ case IPU_PIX_FMT_RGB565:
+ params->ip.bpp = 2;
+ params->ip.pfs = 4;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ params->ip.ofs0 = 0; /* Red bit offset */
+ params->ip.ofs1 = 5; /* Green bit offset */
+ params->ip.ofs2 = 11; /* Blue bit offset */
+ params->ip.ofs3 = 16; /* Alpha bit offset */
+ params->ip.wid0 = 4; /* Red bit width - 1 */
+ params->ip.wid1 = 5; /* Green bit width - 1 */
+ params->ip.wid2 = 4; /* Blue bit width - 1 */
+ break;
+ case IPU_PIX_FMT_BGR24:
+ params->ip.bpp = 1; /* 24 BPP & RGB PFS */
+ params->ip.pfs = 4;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ params->ip.ofs0 = 0; /* Red bit offset */
+ params->ip.ofs1 = 8; /* Green bit offset */
+ params->ip.ofs2 = 16; /* Blue bit offset */
+ params->ip.ofs3 = 24; /* Alpha bit offset */
+ params->ip.wid0 = 7; /* Red bit width - 1 */
+ params->ip.wid1 = 7; /* Green bit width - 1 */
+ params->ip.wid2 = 7; /* Blue bit width - 1 */
+ break;
+ case IPU_PIX_FMT_RGB24:
+ params->ip.bpp = 1; /* 24 BPP & RGB PFS */
+ params->ip.pfs = 4;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ params->ip.ofs0 = 16; /* Red bit offset */
+ params->ip.ofs1 = 8; /* Green bit offset */
+ params->ip.ofs2 = 0; /* Blue bit offset */
+ params->ip.ofs3 = 24; /* Alpha bit offset */
+ params->ip.wid0 = 7; /* Red bit width - 1 */
+ params->ip.wid1 = 7; /* Green bit width - 1 */
+ params->ip.wid2 = 7; /* Blue bit width - 1 */
+ break;
+ case IPU_PIX_FMT_BGRA32:
+ case IPU_PIX_FMT_BGR32:
+ params->ip.bpp = 0;
+ params->ip.pfs = 4;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ params->ip.ofs0 = 8; /* Red bit offset */
+ params->ip.ofs1 = 16; /* Green bit offset */
+ params->ip.ofs2 = 24; /* Blue bit offset */
+ params->ip.ofs3 = 0; /* Alpha bit offset */
+ params->ip.wid0 = 7; /* Red bit width - 1 */
+ params->ip.wid1 = 7; /* Green bit width - 1 */
+ params->ip.wid2 = 7; /* Blue bit width - 1 */
+ params->ip.wid3 = 7; /* Alpha bit width - 1 */
+ break;
+ case IPU_PIX_FMT_RGBA32:
+ case IPU_PIX_FMT_RGB32:
+ params->ip.bpp = 0;
+ params->ip.pfs = 4;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ params->ip.ofs0 = 24; /* Red bit offset */
+ params->ip.ofs1 = 16; /* Green bit offset */
+ params->ip.ofs2 = 8; /* Blue bit offset */
+ params->ip.ofs3 = 0; /* Alpha bit offset */
+ params->ip.wid0 = 7; /* Red bit width - 1 */
+ params->ip.wid1 = 7; /* Green bit width - 1 */
+ params->ip.wid2 = 7; /* Blue bit width - 1 */
+ params->ip.wid3 = 7; /* Alpha bit width - 1 */
+ break;
+ case IPU_PIX_FMT_ABGR32:
+ params->ip.bpp = 0;
+ params->ip.pfs = 4;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ params->ip.ofs0 = 8; /* Red bit offset */
+ params->ip.ofs1 = 16; /* Green bit offset */
+ params->ip.ofs2 = 24; /* Blue bit offset */
+ params->ip.ofs3 = 0; /* Alpha bit offset */
+ params->ip.wid0 = 7; /* Red bit width - 1 */
+ params->ip.wid1 = 7; /* Green bit width - 1 */
+ params->ip.wid2 = 7; /* Blue bit width - 1 */
+ params->ip.wid3 = 7; /* Alpha bit width - 1 */
+ break;
+ case IPU_PIX_FMT_UYVY:
+ params->ip.bpp = 2;
+ params->ip.pfs = 6;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ break;
+ case IPU_PIX_FMT_YUV420P2:
+ case IPU_PIX_FMT_YUV420P:
+ params->ip.bpp = 3;
+ params->ip.pfs = 3;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ u_offset = stride * height;
+ v_offset = u_offset + u_offset / 4;
+ ipu_ch_param_set_plane_offset(params, u_offset, v_offset);
+ break;
+ case IPU_PIX_FMT_YVU422P:
+ params->ip.bpp = 3;
+ params->ip.pfs = 2;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ v_offset = stride * height;
+ u_offset = v_offset + v_offset / 2;
+ ipu_ch_param_set_plane_offset(params, u_offset, v_offset);
+ break;
+ case IPU_PIX_FMT_YUV422P:
+ params->ip.bpp = 3;
+ params->ip.pfs = 2;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ u_offset = stride * height;
+ v_offset = u_offset + u_offset / 2;
+ ipu_ch_param_set_plane_offset(params, u_offset, v_offset);
+ break;
+ default:
+ dev_err(ipu_data.dev,
+ "mx3 ipu: unimplemented pixel format %d\n", pixel_fmt);
+ break;
+ }
+
+ params->pp.nsb = 1;
+}
+
+static void ipu_ch_param_set_burst_size(union chan_param_mem *params,
+ uint16_t burst_pixels)
+{
+ params->pp.npb = burst_pixels - 1;
+}
+
+static void ipu_ch_param_set_buffer(union chan_param_mem *params,
+ dma_addr_t buf0, dma_addr_t buf1)
+{
+ params->pp.eba0 = buf0;
+ params->pp.eba1 = buf1;
+}
+
+static void ipu_ch_param_set_rotation(union chan_param_mem *params,
+ enum ipu_rotate_mode rotate)
+{
+ params->pp.bam = rotate;
+}
+
+static void ipu_write_param_mem(uint32_t addr, uint32_t *data,
+ uint32_t num_words)
+{
+ for (; num_words > 0; num_words--) {
+ dev_dbg(ipu_data.dev,
+ "write param mem - addr = 0x%08X, data = 0x%08X\n",
+ addr, *data);
+ idmac_write_ipureg(&ipu_data, addr, IPU_IMA_ADDR);
+ idmac_write_ipureg(&ipu_data, *data++, IPU_IMA_DATA);
+ addr++;
+ if ((addr & 0x7) == 5) {
+ addr &= ~0x7; /* set to word 0 */
+ addr += 8; /* increment to next row */
+ }
+ }
+}
+
+static int calc_resize_coeffs(uint32_t in_size, uint32_t out_size,
+ uint32_t *resize_coeff,
+ uint32_t *downsize_coeff)
+{
+ uint32_t temp_size;
+ uint32_t temp_downsize;
+
+ *resize_coeff = 1 << 13;
+ *downsize_coeff = 1 << 13;
+
+ /* Cannot downsize more than 8:1 */
+ if (out_size << 3 < in_size)
+ return -EINVAL;
+
+ /* compute downsizing coefficient */
+ temp_downsize = 0;
+ temp_size = in_size;
+ while (temp_size >= out_size * 2 && temp_downsize < 2) {
+ temp_size >>= 1;
+ temp_downsize++;
+ }
+ *downsize_coeff = temp_downsize;
+
+ /*
+ * compute resizing coefficient using the following formula:
+ * resize_coeff = M*(SI -1)/(SO - 1)
+ * where M = 2^13, SI - input size, SO - output size
+ */
+ *resize_coeff = (8192L * (temp_size - 1)) / (out_size - 1);
+ if (*resize_coeff >= 16384L) {
+ dev_err(ipu_data.dev, "Warning! Overflow on resize coeff.\n");
+ *resize_coeff = 0x3FFF;
+ }
+
+ dev_dbg(ipu_data.dev, "resizing from %u -> %u pixels, "
+ "downsize=%u, resize=%u.%lu (reg=%u)\n", in_size, out_size,
+ *downsize_coeff, *resize_coeff >= 8192L ? 1 : 0,
+ ((*resize_coeff & 0x1FFF) * 10000L) / 8192L, *resize_coeff);
+
+ return 0;
+}
+
+static enum ipu_color_space format_to_colorspace(enum pixel_fmt fmt)
+{
+ switch (fmt) {
+ case IPU_PIX_FMT_RGB565:
+ case IPU_PIX_FMT_BGR24:
+ case IPU_PIX_FMT_RGB24:
+ case IPU_PIX_FMT_BGR32:
+ case IPU_PIX_FMT_RGB32:
+ return IPU_COLORSPACE_RGB;
+ default:
+ return IPU_COLORSPACE_YCBCR;
+ }
+}
+
+static int ipu_ic_init_prpenc(struct ipu *ipu,
+ union ipu_channel_param *params, bool src_is_csi)
+{
+ uint32_t reg, ic_conf;
+ uint32_t downsize_coeff, resize_coeff;
+ enum ipu_color_space in_fmt, out_fmt;
+
+ /* Setup vertical resizing */
+ calc_resize_coeffs(params->video.in_height,
+ params->video.out_height,
+ &resize_coeff, &downsize_coeff);
+ reg = (downsize_coeff << 30) | (resize_coeff << 16);
+
+ /* Setup horizontal resizing */
+ calc_resize_coeffs(params->video.in_width,
+ params->video.out_width,
+ &resize_coeff, &downsize_coeff);
+ reg |= (downsize_coeff << 14) | resize_coeff;
+
+ /* Setup color space conversion */
+ in_fmt = format_to_colorspace(params->video.in_pixel_fmt);
+ out_fmt = format_to_colorspace(params->video.out_pixel_fmt);
+
+ /*
+ * Colourspace conversion unsupported yet - see _init_csc() in
+ * Freescale sources
+ */
+ if (in_fmt != out_fmt) {
+ dev_err(ipu->dev, "Colourspace conversion unsupported!\n");
+ return -EOPNOTSUPP;
+ }
+
+ idmac_write_icreg(ipu, reg, IC_PRP_ENC_RSC);
+
+ ic_conf = idmac_read_icreg(ipu, IC_CONF);
+
+ if (src_is_csi)
+ ic_conf &= ~IC_CONF_RWS_EN;
+ else
+ ic_conf |= IC_CONF_RWS_EN;
+
+ idmac_write_icreg(ipu, ic_conf, IC_CONF);
+
+ return 0;
+}
+
+static uint32_t dma_param_addr(uint32_t dma_ch)
+{
+ /* Channel Parameter Memory */
+ return 0x10000 | (dma_ch << 4);
+}
+
+static void ipu_channel_set_priority(struct ipu *ipu, enum ipu_channel channel,
+ bool prio)
+{
+ u32 reg = idmac_read_icreg(ipu, IDMAC_CHA_PRI);
+
+ if (prio)
+ reg |= 1UL << channel;
+ else
+ reg &= ~(1UL << channel);
+
+ idmac_write_icreg(ipu, reg, IDMAC_CHA_PRI);
+
+ dump_idmac_reg(ipu);
+}
+
+static uint32_t ipu_channel_conf_mask(enum ipu_channel channel)
+{
+ uint32_t mask;
+
+ switch (channel) {
+ case IDMAC_IC_0:
+ case IDMAC_IC_7:
+ mask = IPU_CONF_CSI_EN | IPU_CONF_IC_EN;
+ break;
+ case IDMAC_SDC_0:
+ case IDMAC_SDC_1:
+ mask = IPU_CONF_SDC_EN | IPU_CONF_DI_EN;
+ break;
+ default:
+ mask = 0;
+ break;
+ }
+
+ return mask;
+}
+
+/**
+ * ipu_enable_channel() - enable an IPU channel.
+ * @idmac: IPU DMAC context.
+ * @ichan: IDMAC channel.
+ * @return: 0 on success or negative error code on failure.
+ */
+static int ipu_enable_channel(struct idmac *idmac, struct idmac_channel *ichan)
+{
+ struct ipu *ipu = to_ipu(idmac);
+ enum ipu_channel channel = ichan->dma_chan.chan_id;
+ uint32_t reg;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ipu->lock, flags);
+
+ /* Reset to buffer 0 */
+ idmac_write_ipureg(ipu, 1UL << channel, IPU_CHA_CUR_BUF);
+ ichan->active_buffer = 0;
+ ichan->status = IPU_CHANNEL_ENABLED;
+
+ switch (channel) {
+ case IDMAC_SDC_0:
+ case IDMAC_SDC_1:
+ case IDMAC_IC_7:
+ ipu_channel_set_priority(ipu, channel, true);
+ default:
+ break;
+ }
+
+ reg = idmac_read_icreg(ipu, IDMAC_CHA_EN);
+
+ idmac_write_icreg(ipu, reg | (1UL << channel), IDMAC_CHA_EN);
+
+ ipu_ic_enable_task(ipu, channel);
+
+ spin_unlock_irqrestore(&ipu->lock, flags);
+ return 0;
+}
+
+/**
+ * ipu_init_channel_buffer() - initialize a buffer for logical IPU channel.
+ * @ichan: IDMAC channel.
+ * @pixel_fmt: pixel format of buffer. Pixel format is a FOURCC ASCII code.
+ * @width: width of buffer in pixels.
+ * @height: height of buffer in pixels.
+ * @stride: stride length of buffer in pixels.
+ * @rot_mode: rotation mode of buffer. A rotation setting other than
+ * IPU_ROTATE_VERT_FLIP should only be used for input buffers of
+ * rotation channels.
+ * @phyaddr_0: buffer 0 physical address.
+ * @phyaddr_1: buffer 1 physical address. Setting this to a value other than
+ * NULL enables double buffering mode.
+ * @return: 0 on success or negative error code on failure.
+ */
+static int ipu_init_channel_buffer(struct idmac_channel *ichan,
+ enum pixel_fmt pixel_fmt,
+ uint16_t width, uint16_t height,
+ uint32_t stride,
+ enum ipu_rotate_mode rot_mode,
+ dma_addr_t phyaddr_0, dma_addr_t phyaddr_1)
+{
+ enum ipu_channel channel = ichan->dma_chan.chan_id;
+ struct idmac *idmac = to_idmac(ichan->dma_chan.device);
+ struct ipu *ipu = to_ipu(idmac);
+ union chan_param_mem params = {};
+ unsigned long flags;
+ uint32_t reg;
+ uint32_t stride_bytes;
+
+ stride_bytes = stride * bytes_per_pixel(pixel_fmt);
+
+ if (stride_bytes % 4) {
+ dev_err(ipu->dev,
+ "Stride length must be 32-bit aligned, stride = %d, bytes = %d\n",
+ stride, stride_bytes);
+ return -EINVAL;
+ }
+
+ /* IC channel's stride must be a multiple of 8 pixels */
+ if ((channel <= IDMAC_IC_13) && (stride % 8)) {
+ dev_err(ipu->dev, "Stride must be 8 pixel multiple\n");
+ return -EINVAL;
+ }
+
+ /* Build parameter memory data for DMA channel */
+ ipu_ch_param_set_size(&params, pixel_fmt, width, height, stride_bytes);
+ ipu_ch_param_set_buffer(&params, phyaddr_0, phyaddr_1);
+ ipu_ch_param_set_rotation(&params, rot_mode);
+ /* Some channels (rotation) have restriction on burst length */
+ switch (channel) {
+ case IDMAC_IC_7: /* Hangs with burst 8, 16, other values
+ invalid - Table 44-30 */
+/*
+ ipu_ch_param_set_burst_size(&params, 8);
+ */
+ break;
+ case IDMAC_SDC_0:
+ case IDMAC_SDC_1:
+ /* In original code only IPU_PIX_FMT_RGB565 was setting burst */
+ ipu_ch_param_set_burst_size(&params, 16);
+ break;
+ case IDMAC_IC_0:
+ default:
+ break;
+ }
+
+ spin_lock_irqsave(&ipu->lock, flags);
+
+ ipu_write_param_mem(dma_param_addr(channel), (uint32_t *)&params, 10);
+
+ reg = idmac_read_ipureg(ipu, IPU_CHA_DB_MODE_SEL);
+
+ if (phyaddr_1)
+ reg |= 1UL << channel;
+ else
+ reg &= ~(1UL << channel);
+
+ idmac_write_ipureg(ipu, reg, IPU_CHA_DB_MODE_SEL);
+
+ ichan->status = IPU_CHANNEL_READY;
+
+ spin_unlock_irqrestore(&ipu->lock, flags);
+
+ return 0;
+}
+
+/**
+ * ipu_select_buffer() - mark a channel's buffer as ready.
+ * @channel: channel ID.
+ * @buffer_n: buffer number to mark ready.
+ */
+static void ipu_select_buffer(enum ipu_channel channel, int buffer_n)
+{
+ /* No locking - this is a write-one-to-set register, cleared by IPU */
+ if (buffer_n == 0)
+ /* Mark buffer 0 as ready. */
+ idmac_write_ipureg(&ipu_data, 1UL << channel, IPU_CHA_BUF0_RDY);
+ else
+ /* Mark buffer 1 as ready. */
+ idmac_write_ipureg(&ipu_data, 1UL << channel, IPU_CHA_BUF1_RDY);
+}
+
+/**
+ * ipu_update_channel_buffer() - update physical address of a channel buffer.
+ * @ichan: IDMAC channel.
+ * @buffer_n: buffer number to update.
+ * 0 or 1 are the only valid values.
+ * @phyaddr: buffer physical address.
+ */
+/* Called under spin_lock(_irqsave)(&ichan->lock) */
+static void ipu_update_channel_buffer(struct idmac_channel *ichan,
+ int buffer_n, dma_addr_t phyaddr)
+{
+ enum ipu_channel channel = ichan->dma_chan.chan_id;
+ uint32_t reg;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ipu_data.lock, flags);
+
+ if (buffer_n == 0) {
+ reg = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF0_RDY);
+ if (reg & (1UL << channel)) {
+ ipu_ic_disable_task(&ipu_data, channel);
+ ichan->status = IPU_CHANNEL_READY;
+ }
+
+ /* 44.3.3.1.9 - Row Number 1 (WORD1, offset 0) */
+ idmac_write_ipureg(&ipu_data, dma_param_addr(channel) +
+ 0x0008UL, IPU_IMA_ADDR);
+ idmac_write_ipureg(&ipu_data, phyaddr, IPU_IMA_DATA);
+ } else {
+ reg = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF1_RDY);
+ if (reg & (1UL << channel)) {
+ ipu_ic_disable_task(&ipu_data, channel);
+ ichan->status = IPU_CHANNEL_READY;
+ }
+
+ /* Check if double-buffering is already enabled */
+ reg = idmac_read_ipureg(&ipu_data, IPU_CHA_DB_MODE_SEL);
+
+ if (!(reg & (1UL << channel)))
+ idmac_write_ipureg(&ipu_data, reg | (1UL << channel),
+ IPU_CHA_DB_MODE_SEL);
+
+ /* 44.3.3.1.9 - Row Number 1 (WORD1, offset 1) */
+ idmac_write_ipureg(&ipu_data, dma_param_addr(channel) +
+ 0x0009UL, IPU_IMA_ADDR);
+ idmac_write_ipureg(&ipu_data, phyaddr, IPU_IMA_DATA);
+ }
+
+ spin_unlock_irqrestore(&ipu_data.lock, flags);
+}
+
+/* Called under spin_lock_irqsave(&ichan->lock) */
+static int ipu_submit_buffer(struct idmac_channel *ichan,
+ struct idmac_tx_desc *desc, struct scatterlist *sg, int buf_idx)
+{
+ unsigned int chan_id = ichan->dma_chan.chan_id;
+ struct device *dev = &ichan->dma_chan.dev->device;
+
+ if (async_tx_test_ack(&desc->txd))
+ return -EINTR;
+
+ /*
+ * On first invocation this shouldn't be necessary, the call to
+ * ipu_init_channel_buffer() above will set addresses for us, so we
+ * could make it conditional on status >= IPU_CHANNEL_ENABLED, but
+ * doing it again shouldn't hurt either.
+ */
+ ipu_update_channel_buffer(ichan, buf_idx, sg_dma_address(sg));
+
+ ipu_select_buffer(chan_id, buf_idx);
+ dev_dbg(dev, "Updated sg %p on channel 0x%x buffer %d\n",
+ sg, chan_id, buf_idx);
+
+ return 0;
+}
+
+/* Called under spin_lock_irqsave(&ichan->lock) */
+static int ipu_submit_channel_buffers(struct idmac_channel *ichan,
+ struct idmac_tx_desc *desc)
+{
+ struct scatterlist *sg;
+ int i, ret = 0;
+
+ for (i = 0, sg = desc->sg; i < 2 && sg; i++) {
+ if (!ichan->sg[i]) {
+ ichan->sg[i] = sg;
+
+ ret = ipu_submit_buffer(ichan, desc, sg, i);
+ if (ret < 0)
+ return ret;
+
+ sg = sg_next(sg);
+ }
+ }
+
+ return ret;
+}
+
+static dma_cookie_t idmac_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct idmac_tx_desc *desc = to_tx_desc(tx);
+ struct idmac_channel *ichan = to_idmac_chan(tx->chan);
+ struct idmac *idmac = to_idmac(tx->chan->device);
+ struct ipu *ipu = to_ipu(idmac);
+ struct device *dev = &ichan->dma_chan.dev->device;
+ dma_cookie_t cookie;
+ unsigned long flags;
+ int ret;
+
+ /* Sanity check */
+ if (!list_empty(&desc->list)) {
+ /* The descriptor doesn't belong to client */
+ dev_err(dev, "Descriptor %p not prepared!\n", tx);
+ return -EBUSY;
+ }
+
+ mutex_lock(&ichan->chan_mutex);
+
+ async_tx_clear_ack(tx);
+
+ if (ichan->status < IPU_CHANNEL_READY) {
+ struct idmac_video_param *video = &ichan->params.video;
+ /*
+ * Initial buffer assignment - the first two sg-entries from
+ * the descriptor will end up in the IDMAC buffers
+ */
+ dma_addr_t dma_1 = sg_is_last(desc->sg) ? 0 :
+ sg_dma_address(&desc->sg[1]);
+
+ WARN_ON(ichan->sg[0] || ichan->sg[1]);
+
+ cookie = ipu_init_channel_buffer(ichan,
+ video->out_pixel_fmt,
+ video->out_width,
+ video->out_height,
+ video->out_stride,
+ IPU_ROTATE_NONE,
+ sg_dma_address(&desc->sg[0]),
+ dma_1);
+ if (cookie < 0)
+ goto out;
+ }
+
+ dev_dbg(dev, "Submitting sg %p\n", &desc->sg[0]);
+
+ cookie = ichan->dma_chan.cookie;
+
+ if (++cookie < 0)
+ cookie = 1;
+
+ /* from dmaengine.h: "last cookie value returned to client" */
+ ichan->dma_chan.cookie = cookie;
+ tx->cookie = cookie;
+
+ /* ipu->lock can be taken under ichan->lock, but not v.v. */
+ spin_lock_irqsave(&ichan->lock, flags);
+
+ list_add_tail(&desc->list, &ichan->queue);
+ /* submit_buffers() atomically verifies and fills empty sg slots */
+ ret = ipu_submit_channel_buffers(ichan, desc);
+
+ spin_unlock_irqrestore(&ichan->lock, flags);
+
+ if (ret < 0) {
+ cookie = ret;
+ goto dequeue;
+ }
+
+ if (ichan->status < IPU_CHANNEL_ENABLED) {
+ ret = ipu_enable_channel(idmac, ichan);
+ if (ret < 0) {
+ cookie = ret;
+ goto dequeue;
+ }
+ }
+
+ dump_idmac_reg(ipu);
+
+dequeue:
+ if (cookie < 0) {
+ spin_lock_irqsave(&ichan->lock, flags);
+ list_del_init(&desc->list);
+ spin_unlock_irqrestore(&ichan->lock, flags);
+ tx->cookie = cookie;
+ ichan->dma_chan.cookie = cookie;
+ }
+
+out:
+ mutex_unlock(&ichan->chan_mutex);
+
+ return cookie;
+}
+
+/* Called with ichan->chan_mutex held */
+static int idmac_desc_alloc(struct idmac_channel *ichan, int n)
+{
+ struct idmac_tx_desc *desc = vmalloc(n * sizeof(struct idmac_tx_desc));
+ struct idmac *idmac = to_idmac(ichan->dma_chan.device);
+
+ if (!desc)
+ return -ENOMEM;
+
+ /* No interrupts, just disable the tasklet for a moment */
+ tasklet_disable(&to_ipu(idmac)->tasklet);
+
+ ichan->n_tx_desc = n;
+ ichan->desc = desc;
+ INIT_LIST_HEAD(&ichan->queue);
+ INIT_LIST_HEAD(&ichan->free_list);
+
+ while (n--) {
+ struct dma_async_tx_descriptor *txd = &desc->txd;
+
+ memset(txd, 0, sizeof(*txd));
+ dma_async_tx_descriptor_init(txd, &ichan->dma_chan);
+ txd->tx_submit = idmac_tx_submit;
+
+ list_add(&desc->list, &ichan->free_list);
+
+ desc++;
+ }
+
+ tasklet_enable(&to_ipu(idmac)->tasklet);
+
+ return 0;
+}
+
+/**
+ * ipu_init_channel() - initialize an IPU channel.
+ * @idmac: IPU DMAC context.
+ * @ichan: pointer to the channel object.
+ * @return 0 on success or negative error code on failure.
+ */
+static int ipu_init_channel(struct idmac *idmac, struct idmac_channel *ichan)
+{
+ union ipu_channel_param *params = &ichan->params;
+ uint32_t ipu_conf;
+ enum ipu_channel channel = ichan->dma_chan.chan_id;
+ unsigned long flags;
+ uint32_t reg;
+ struct ipu *ipu = to_ipu(idmac);
+ int ret = 0, n_desc = 0;
+
+ dev_dbg(ipu->dev, "init channel = %d\n", channel);
+
+ if (channel != IDMAC_SDC_0 && channel != IDMAC_SDC_1 &&
+ channel != IDMAC_IC_7)
+ return -EINVAL;
+
+ spin_lock_irqsave(&ipu->lock, flags);
+
+ switch (channel) {
+ case IDMAC_IC_7:
+ n_desc = 16;
+ reg = idmac_read_icreg(ipu, IC_CONF);
+ idmac_write_icreg(ipu, reg & ~IC_CONF_CSI_MEM_WR_EN, IC_CONF);
+ break;
+ case IDMAC_IC_0:
+ n_desc = 16;
+ reg = idmac_read_ipureg(ipu, IPU_FS_PROC_FLOW);
+ idmac_write_ipureg(ipu, reg & ~FS_ENC_IN_VALID, IPU_FS_PROC_FLOW);
+ ret = ipu_ic_init_prpenc(ipu, params, true);
+ break;
+ case IDMAC_SDC_0:
+ case IDMAC_SDC_1:
+ n_desc = 4;
+ default:
+ break;
+ }
+
+ ipu->channel_init_mask |= 1L << channel;
+
+ /* Enable IPU sub module */
+ ipu_conf = idmac_read_ipureg(ipu, IPU_CONF) |
+ ipu_channel_conf_mask(channel);
+ idmac_write_ipureg(ipu, ipu_conf, IPU_CONF);
+
+ spin_unlock_irqrestore(&ipu->lock, flags);
+
+ if (n_desc && !ichan->desc)
+ ret = idmac_desc_alloc(ichan, n_desc);
+
+ dump_idmac_reg(ipu);
+
+ return ret;
+}
+
+/**
+ * ipu_uninit_channel() - uninitialize an IPU channel.
+ * @idmac: IPU DMAC context.
+ * @ichan: pointer to the channel object.
+ */
+static void ipu_uninit_channel(struct idmac *idmac, struct idmac_channel *ichan)
+{
+ enum ipu_channel channel = ichan->dma_chan.chan_id;
+ unsigned long flags;
+ uint32_t reg;
+ unsigned long chan_mask = 1UL << channel;
+ uint32_t ipu_conf;
+ struct ipu *ipu = to_ipu(idmac);
+
+ spin_lock_irqsave(&ipu->lock, flags);
+
+ if (!(ipu->channel_init_mask & chan_mask)) {
+ dev_err(ipu->dev, "Channel already uninitialized %d\n",
+ channel);
+ spin_unlock_irqrestore(&ipu->lock, flags);
+ return;
+ }
+
+ /* Reset the double buffer */
+ reg = idmac_read_ipureg(ipu, IPU_CHA_DB_MODE_SEL);
+ idmac_write_ipureg(ipu, reg & ~chan_mask, IPU_CHA_DB_MODE_SEL);
+
+ ichan->sec_chan_en = false;
+
+ switch (channel) {
+ case IDMAC_IC_7:
+ reg = idmac_read_icreg(ipu, IC_CONF);
+ idmac_write_icreg(ipu, reg & ~(IC_CONF_RWS_EN | IC_CONF_PRPENC_EN),
+ IC_CONF);
+ break;
+ case IDMAC_IC_0:
+ reg = idmac_read_icreg(ipu, IC_CONF);
+ idmac_write_icreg(ipu, reg & ~(IC_CONF_PRPENC_EN | IC_CONF_PRPENC_CSC1),
+ IC_CONF);
+ break;
+ case IDMAC_SDC_0:
+ case IDMAC_SDC_1:
+ default:
+ break;
+ }
+
+ ipu->channel_init_mask &= ~(1L << channel);
+
+ ipu_conf = idmac_read_ipureg(ipu, IPU_CONF) &
+ ~ipu_channel_conf_mask(channel);
+ idmac_write_ipureg(ipu, ipu_conf, IPU_CONF);
+
+ spin_unlock_irqrestore(&ipu->lock, flags);
+
+ ichan->n_tx_desc = 0;
+ vfree(ichan->desc);
+ ichan->desc = NULL;
+}
+
+/**
+ * ipu_disable_channel() - disable an IPU channel.
+ * @idmac: IPU DMAC context.
+ * @ichan: channel object pointer.
+ * @wait_for_stop: flag to set whether to wait for channel end of frame or
+ * return immediately.
+ * @return: 0 on success or negative error code on failure.
+ */
+static int ipu_disable_channel(struct idmac *idmac, struct idmac_channel *ichan,
+ bool wait_for_stop)
+{
+ enum ipu_channel channel = ichan->dma_chan.chan_id;
+ struct ipu *ipu = to_ipu(idmac);
+ uint32_t reg;
+ unsigned long flags;
+ unsigned long chan_mask = 1UL << channel;
+ unsigned int timeout;
+
+ if (wait_for_stop && channel != IDMAC_SDC_1 && channel != IDMAC_SDC_0) {
+ timeout = 40;
+ /* This waiting always fails. Related to spurious irq problem */
+ while ((idmac_read_icreg(ipu, IDMAC_CHA_BUSY) & chan_mask) ||
+ (ipu_channel_status(ipu, channel) == TASK_STAT_ACTIVE)) {
+ timeout--;
+ msleep(10);
+
+ if (!timeout) {
+ dev_dbg(ipu->dev,
+ "Warning: timeout waiting for channel %u to "
+ "stop: buf0_rdy = 0x%08X, buf1_rdy = 0x%08X, "
+ "busy = 0x%08X, tstat = 0x%08X\n", channel,
+ idmac_read_ipureg(ipu, IPU_CHA_BUF0_RDY),
+ idmac_read_ipureg(ipu, IPU_CHA_BUF1_RDY),
+ idmac_read_icreg(ipu, IDMAC_CHA_BUSY),
+ idmac_read_ipureg(ipu, IPU_TASKS_STAT));
+ break;
+ }
+ }
+ dev_dbg(ipu->dev, "timeout = %d * 10ms\n", 40 - timeout);
+ }
+ /* SDC BG and FG must be disabled before DMA is disabled */
+ if (wait_for_stop && (channel == IDMAC_SDC_0 ||
+ channel == IDMAC_SDC_1)) {
+ for (timeout = 5;
+ timeout && !ipu_irq_status(ichan->eof_irq); timeout--)
+ msleep(5);
+ }
+
+ spin_lock_irqsave(&ipu->lock, flags);
+
+ /* Disable IC task */
+ ipu_ic_disable_task(ipu, channel);
+
+ /* Disable DMA channel(s) */
+ reg = idmac_read_icreg(ipu, IDMAC_CHA_EN);
+ idmac_write_icreg(ipu, reg & ~chan_mask, IDMAC_CHA_EN);
+
+ /*
+ * Problem (observed with channel DMAIC_7): after enabling the channel
+ * and initialising buffers, there comes an interrupt with current still
+ * pointing at buffer 0, whereas it should use buffer 0 first and only
+ * generate an interrupt when it is done, then current should already
+ * point to buffer 1. This spurious interrupt also comes on channel
+ * DMASDC_0. With DMAIC_7 normally, is we just leave the ISR after the
+ * first interrupt, there comes the second with current correctly
+ * pointing to buffer 1 this time. But sometimes this second interrupt
+ * doesn't come and the channel hangs. Clearing BUFx_RDY when disabling
+ * the channel seems to prevent the channel from hanging, but it doesn't
+ * prevent the spurious interrupt. This might also be unsafe. Think
+ * about the IDMAC controller trying to switch to a buffer, when we
+ * clear the ready bit, and re-enable it a moment later.
+ */
+ reg = idmac_read_ipureg(ipu, IPU_CHA_BUF0_RDY);
+ idmac_write_ipureg(ipu, 0, IPU_CHA_BUF0_RDY);
+ idmac_write_ipureg(ipu, reg & ~(1UL << channel), IPU_CHA_BUF0_RDY);
+
+ reg = idmac_read_ipureg(ipu, IPU_CHA_BUF1_RDY);
+ idmac_write_ipureg(ipu, 0, IPU_CHA_BUF1_RDY);
+ idmac_write_ipureg(ipu, reg & ~(1UL << channel), IPU_CHA_BUF1_RDY);
+
+ spin_unlock_irqrestore(&ipu->lock, flags);
+
+ return 0;
+}
+
+static struct scatterlist *idmac_sg_next(struct idmac_channel *ichan,
+ struct idmac_tx_desc **desc, struct scatterlist *sg)
+{
+ struct scatterlist *sgnew = sg ? sg_next(sg) : NULL;
+
+ if (sgnew)
+ /* next sg-element in this list */
+ return sgnew;
+
+ if ((*desc)->list.next == &ichan->queue)
+ /* No more descriptors on the queue */
+ return NULL;
+
+ /* Fetch next descriptor */
+ *desc = list_entry((*desc)->list.next, struct idmac_tx_desc, list);
+ return (*desc)->sg;
+}
+
+/*
+ * We have several possibilities here:
+ * current BUF next BUF
+ *
+ * not last sg next not last sg
+ * not last sg next last sg
+ * last sg first sg from next descriptor
+ * last sg NULL
+ *
+ * Besides, the descriptor queue might be empty or not. We process all these
+ * cases carefully.
+ */
+static irqreturn_t idmac_interrupt(int irq, void *dev_id)
+{
+ struct idmac_channel *ichan = dev_id;
+ struct device *dev = &ichan->dma_chan.dev->device;
+ unsigned int chan_id = ichan->dma_chan.chan_id;
+ struct scatterlist **sg, *sgnext, *sgnew = NULL;
+ /* Next transfer descriptor */
+ struct idmac_tx_desc *desc, *descnew;
+ dma_async_tx_callback callback;
+ void *callback_param;
+ bool done = false;
+ u32 ready0, ready1, curbuf, err;
+ unsigned long flags;
+
+ /* IDMAC has cleared the respective BUFx_RDY bit, we manage the buffer */
+
+ dev_dbg(dev, "IDMAC irq %d, buf %d\n", irq, ichan->active_buffer);
+
+ spin_lock_irqsave(&ipu_data.lock, flags);
+
+ ready0 = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF0_RDY);
+ ready1 = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF1_RDY);
+ curbuf = idmac_read_ipureg(&ipu_data, IPU_CHA_CUR_BUF);
+ err = idmac_read_ipureg(&ipu_data, IPU_INT_STAT_4);
+
+ if (err & (1 << chan_id)) {
+ idmac_write_ipureg(&ipu_data, 1 << chan_id, IPU_INT_STAT_4);
+ spin_unlock_irqrestore(&ipu_data.lock, flags);
+ /*
+ * Doing this
+ * ichan->sg[0] = ichan->sg[1] = NULL;
+ * you can force channel re-enable on the next tx_submit(), but
+ * this is dirty - think about descriptors with multiple
+ * sg elements.
+ */
+ dev_warn(dev, "NFB4EOF on channel %d, ready %x, %x, cur %x\n",
+ chan_id, ready0, ready1, curbuf);
+ return IRQ_HANDLED;
+ }
+ spin_unlock_irqrestore(&ipu_data.lock, flags);
+
+ /* Other interrupts do not interfere with this channel */
+ spin_lock(&ichan->lock);
+ if (unlikely(chan_id != IDMAC_SDC_0 && chan_id != IDMAC_SDC_1 &&
+ ((curbuf >> chan_id) & 1) == ichan->active_buffer &&
+ !list_is_last(ichan->queue.next, &ichan->queue))) {
+ int i = 100;
+
+ /* This doesn't help. See comment in ipu_disable_channel() */
+ while (--i) {
+ curbuf = idmac_read_ipureg(&ipu_data, IPU_CHA_CUR_BUF);
+ if (((curbuf >> chan_id) & 1) != ichan->active_buffer)
+ break;
+ cpu_relax();
+ }
+
+ if (!i) {
+ spin_unlock(&ichan->lock);
+ dev_dbg(dev,
+ "IRQ on active buffer on channel %x, active "
+ "%d, ready %x, %x, current %x!\n", chan_id,
+ ichan->active_buffer, ready0, ready1, curbuf);
+ return IRQ_NONE;
+ } else
+ dev_dbg(dev,
+ "Buffer deactivated on channel %x, active "
+ "%d, ready %x, %x, current %x, rest %d!\n", chan_id,
+ ichan->active_buffer, ready0, ready1, curbuf, i);
+ }
+
+ if (unlikely((ichan->active_buffer && (ready1 >> chan_id) & 1) ||
+ (!ichan->active_buffer && (ready0 >> chan_id) & 1)
+ )) {
+ spin_unlock(&ichan->lock);
+ dev_dbg(dev,
+ "IRQ with active buffer still ready on channel %x, "
+ "active %d, ready %x, %x!\n", chan_id,
+ ichan->active_buffer, ready0, ready1);
+ return IRQ_NONE;
+ }
+
+ if (unlikely(list_empty(&ichan->queue))) {
+ ichan->sg[ichan->active_buffer] = NULL;
+ spin_unlock(&ichan->lock);
+ dev_err(dev,
+ "IRQ without queued buffers on channel %x, active %d, "
+ "ready %x, %x!\n", chan_id,
+ ichan->active_buffer, ready0, ready1);
+ return IRQ_NONE;
+ }
+
+ /*
+ * active_buffer is a software flag, it shows which buffer we are
+ * currently expecting back from the hardware, IDMAC should be
+ * processing the other buffer already
+ */
+ sg = &ichan->sg[ichan->active_buffer];
+ sgnext = ichan->sg[!ichan->active_buffer];
+
+ if (!*sg) {
+ spin_unlock(&ichan->lock);
+ return IRQ_HANDLED;
+ }
+
+ desc = list_entry(ichan->queue.next, struct idmac_tx_desc, list);
+ descnew = desc;
+
+ dev_dbg(dev, "IDMAC irq %d, dma 0x%08x, next dma 0x%08x, current %d, curbuf 0x%08x\n",
+ irq, sg_dma_address(*sg), sgnext ? sg_dma_address(sgnext) : 0, ichan->active_buffer, curbuf);
+
+ /* Find the descriptor of sgnext */
+ sgnew = idmac_sg_next(ichan, &descnew, *sg);
+ if (sgnext != sgnew)
+ dev_err(dev, "Submitted buffer %p, next buffer %p\n", sgnext, sgnew);
+
+ /*
+ * if sgnext == NULL sg must be the last element in a scatterlist and
+ * queue must be empty
+ */
+ if (unlikely(!sgnext)) {
+ if (!WARN_ON(sg_next(*sg)))
+ dev_dbg(dev, "Underrun on channel %x\n", chan_id);
+ ichan->sg[!ichan->active_buffer] = sgnew;
+
+ if (unlikely(sgnew)) {
+ ipu_submit_buffer(ichan, descnew, sgnew, !ichan->active_buffer);
+ } else {
+ spin_lock_irqsave(&ipu_data.lock, flags);
+ ipu_ic_disable_task(&ipu_data, chan_id);
+ spin_unlock_irqrestore(&ipu_data.lock, flags);
+ ichan->status = IPU_CHANNEL_READY;
+ /* Continue to check for complete descriptor */
+ }
+ }
+
+ /* Calculate and submit the next sg element */
+ sgnew = idmac_sg_next(ichan, &descnew, sgnew);
+
+ if (unlikely(!sg_next(*sg)) || !sgnext) {
+ /*
+ * Last element in scatterlist done, remove from the queue,
+ * _init for debugging
+ */
+ list_del_init(&desc->list);
+ done = true;
+ }
+
+ *sg = sgnew;
+
+ if (likely(sgnew) &&
+ ipu_submit_buffer(ichan, descnew, sgnew, ichan->active_buffer) < 0) {
+ callback = descnew->txd.callback;
+ callback_param = descnew->txd.callback_param;
+ spin_unlock(&ichan->lock);
+ if (callback)
+ callback(callback_param);
+ spin_lock(&ichan->lock);
+ }
+
+ /* Flip the active buffer - even if update above failed */
+ ichan->active_buffer = !ichan->active_buffer;
+ if (done)
+ ichan->completed = desc->txd.cookie;
+
+ callback = desc->txd.callback;
+ callback_param = desc->txd.callback_param;
+
+ spin_unlock(&ichan->lock);
+
+ if (done && (desc->txd.flags & DMA_PREP_INTERRUPT) && callback)
+ callback(callback_param);
+
+ return IRQ_HANDLED;
+}
+
+static void ipu_gc_tasklet(unsigned long arg)
+{
+ struct ipu *ipu = (struct ipu *)arg;
+ int i;
+
+ for (i = 0; i < IPU_CHANNELS_NUM; i++) {
+ struct idmac_channel *ichan = ipu->channel + i;
+ struct idmac_tx_desc *desc;
+ unsigned long flags;
+ struct scatterlist *sg;
+ int j, k;
+
+ for (j = 0; j < ichan->n_tx_desc; j++) {
+ desc = ichan->desc + j;
+ spin_lock_irqsave(&ichan->lock, flags);
+ if (async_tx_test_ack(&desc->txd)) {
+ list_move(&desc->list, &ichan->free_list);
+ for_each_sg(desc->sg, sg, desc->sg_len, k) {
+ if (ichan->sg[0] == sg)
+ ichan->sg[0] = NULL;
+ else if (ichan->sg[1] == sg)
+ ichan->sg[1] = NULL;
+ }
+ async_tx_clear_ack(&desc->txd);
+ }
+ spin_unlock_irqrestore(&ichan->lock, flags);
+ }
+ }
+}
+
+/* Allocate and initialise a transfer descriptor. */
+static struct dma_async_tx_descriptor *idmac_prep_slave_sg(struct dma_chan *chan,
+ struct scatterlist *sgl, unsigned int sg_len,
+ enum dma_data_direction direction, unsigned long tx_flags)
+{
+ struct idmac_channel *ichan = to_idmac_chan(chan);
+ struct idmac_tx_desc *desc = NULL;
+ struct dma_async_tx_descriptor *txd = NULL;
+ unsigned long flags;
+
+ /* We only can handle these three channels so far */
+ if (chan->chan_id != IDMAC_SDC_0 && chan->chan_id != IDMAC_SDC_1 &&
+ chan->chan_id != IDMAC_IC_7)
+ return NULL;
+
+ if (direction != DMA_FROM_DEVICE && direction != DMA_TO_DEVICE) {
+ dev_err(chan->device->dev, "Invalid DMA direction %d!\n", direction);
+ return NULL;
+ }
+
+ mutex_lock(&ichan->chan_mutex);
+
+ spin_lock_irqsave(&ichan->lock, flags);
+ if (!list_empty(&ichan->free_list)) {
+ desc = list_entry(ichan->free_list.next,
+ struct idmac_tx_desc, list);
+
+ list_del_init(&desc->list);
+
+ desc->sg_len = sg_len;
+ desc->sg = sgl;
+ txd = &desc->txd;
+ txd->flags = tx_flags;
+ }
+ spin_unlock_irqrestore(&ichan->lock, flags);
+
+ mutex_unlock(&ichan->chan_mutex);
+
+ tasklet_schedule(&to_ipu(to_idmac(chan->device))->tasklet);
+
+ return txd;
+}
+
+/* Re-select the current buffer and re-activate the channel */
+static void idmac_issue_pending(struct dma_chan *chan)
+{
+ struct idmac_channel *ichan = to_idmac_chan(chan);
+ struct idmac *idmac = to_idmac(chan->device);
+ struct ipu *ipu = to_ipu(idmac);
+ unsigned long flags;
+
+ /* This is not always needed, but doesn't hurt either */
+ spin_lock_irqsave(&ipu->lock, flags);
+ ipu_select_buffer(chan->chan_id, ichan->active_buffer);
+ spin_unlock_irqrestore(&ipu->lock, flags);
+
+ /*
+ * Might need to perform some parts of initialisation from
+ * ipu_enable_channel(), but not all, we do not want to reset to buffer
+ * 0, don't need to set priority again either, but re-enabling the task
+ * and the channel might be a good idea.
+ */
+}
+
+static void __idmac_terminate_all(struct dma_chan *chan)
+{
+ struct idmac_channel *ichan = to_idmac_chan(chan);
+ struct idmac *idmac = to_idmac(chan->device);
+ unsigned long flags;
+ int i;
+
+ ipu_disable_channel(idmac, ichan,
+ ichan->status >= IPU_CHANNEL_ENABLED);
+
+ tasklet_disable(&to_ipu(idmac)->tasklet);
+
+ /* ichan->queue is modified in ISR, have to spinlock */
+ spin_lock_irqsave(&ichan->lock, flags);
+ list_splice_init(&ichan->queue, &ichan->free_list);
+
+ if (ichan->desc)
+ for (i = 0; i < ichan->n_tx_desc; i++) {
+ struct idmac_tx_desc *desc = ichan->desc + i;
+ if (list_empty(&desc->list))
+ /* Descriptor was prepared, but not submitted */
+ list_add(&desc->list, &ichan->free_list);
+
+ async_tx_clear_ack(&desc->txd);
+ }
+
+ ichan->sg[0] = NULL;
+ ichan->sg[1] = NULL;
+ spin_unlock_irqrestore(&ichan->lock, flags);
+
+ tasklet_enable(&to_ipu(idmac)->tasklet);
+
+ ichan->status = IPU_CHANNEL_INITIALIZED;
+}
+
+static void idmac_terminate_all(struct dma_chan *chan)
+{
+ struct idmac_channel *ichan = to_idmac_chan(chan);
+
+ mutex_lock(&ichan->chan_mutex);
+
+ __idmac_terminate_all(chan);
+
+ mutex_unlock(&ichan->chan_mutex);
+}
+
+#ifdef DEBUG
+static irqreturn_t ic_sof_irq(int irq, void *dev_id)
+{
+ struct idmac_channel *ichan = dev_id;
+ printk(KERN_DEBUG "Got SOF IRQ %d on Channel %d\n",
+ irq, ichan->dma_chan.chan_id);
+ disable_irq_nosync(irq);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ic_eof_irq(int irq, void *dev_id)
+{
+ struct idmac_channel *ichan = dev_id;
+ printk(KERN_DEBUG "Got EOF IRQ %d on Channel %d\n",
+ irq, ichan->dma_chan.chan_id);
+ disable_irq_nosync(irq);
+ return IRQ_HANDLED;
+}
+
+static int ic_sof = -EINVAL, ic_eof = -EINVAL;
+#endif
+
+static int idmac_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct idmac_channel *ichan = to_idmac_chan(chan);
+ struct idmac *idmac = to_idmac(chan->device);
+ int ret;
+
+ /* dmaengine.c now guarantees to only offer free channels */
+ BUG_ON(chan->client_count > 1);
+ WARN_ON(ichan->status != IPU_CHANNEL_FREE);
+
+ chan->cookie = 1;
+ ichan->completed = -ENXIO;
+
+ ret = ipu_irq_map(chan->chan_id);
+ if (ret < 0)
+ goto eimap;
+
+ ichan->eof_irq = ret;
+
+ /*
+ * Important to first disable the channel, because maybe someone
+ * used it before us, e.g., the bootloader
+ */
+ ipu_disable_channel(idmac, ichan, true);
+
+ ret = ipu_init_channel(idmac, ichan);
+ if (ret < 0)
+ goto eichan;
+
+ ret = request_irq(ichan->eof_irq, idmac_interrupt, 0,
+ ichan->eof_name, ichan);
+ if (ret < 0)
+ goto erirq;
+
+#ifdef DEBUG
+ if (chan->chan_id == IDMAC_IC_7) {
+ ic_sof = ipu_irq_map(69);
+ if (ic_sof > 0)
+ request_irq(ic_sof, ic_sof_irq, 0, "IC SOF", ichan);
+ ic_eof = ipu_irq_map(70);
+ if (ic_eof > 0)
+ request_irq(ic_eof, ic_eof_irq, 0, "IC EOF", ichan);
+ }
+#endif
+
+ ichan->status = IPU_CHANNEL_INITIALIZED;
+
+ dev_dbg(&chan->dev->device, "Found channel 0x%x, irq %d\n",
+ chan->chan_id, ichan->eof_irq);
+
+ return ret;
+
+erirq:
+ ipu_uninit_channel(idmac, ichan);
+eichan:
+ ipu_irq_unmap(chan->chan_id);
+eimap:
+ return ret;
+}
+
+static void idmac_free_chan_resources(struct dma_chan *chan)
+{
+ struct idmac_channel *ichan = to_idmac_chan(chan);
+ struct idmac *idmac = to_idmac(chan->device);
+
+ mutex_lock(&ichan->chan_mutex);
+
+ __idmac_terminate_all(chan);
+
+ if (ichan->status > IPU_CHANNEL_FREE) {
+#ifdef DEBUG
+ if (chan->chan_id == IDMAC_IC_7) {
+ if (ic_sof > 0) {
+ free_irq(ic_sof, ichan);
+ ipu_irq_unmap(69);
+ ic_sof = -EINVAL;
+ }
+ if (ic_eof > 0) {
+ free_irq(ic_eof, ichan);
+ ipu_irq_unmap(70);
+ ic_eof = -EINVAL;
+ }
+ }
+#endif
+ free_irq(ichan->eof_irq, ichan);
+ ipu_irq_unmap(chan->chan_id);
+ }
+
+ ichan->status = IPU_CHANNEL_FREE;
+
+ ipu_uninit_channel(idmac, ichan);
+
+ mutex_unlock(&ichan->chan_mutex);
+
+ tasklet_schedule(&to_ipu(idmac)->tasklet);
+}
+
+static enum dma_status idmac_is_tx_complete(struct dma_chan *chan,
+ dma_cookie_t cookie, dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct idmac_channel *ichan = to_idmac_chan(chan);
+
+ if (done)
+ *done = ichan->completed;
+ if (used)
+ *used = chan->cookie;
+ if (cookie != chan->cookie)
+ return DMA_ERROR;
+ return DMA_SUCCESS;
+}
+
+static int __init ipu_idmac_init(struct ipu *ipu)
+{
+ struct idmac *idmac = &ipu->idmac;
+ struct dma_device *dma = &idmac->dma;
+ int i;
+
+ dma_cap_set(DMA_SLAVE, dma->cap_mask);
+ dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+
+ /* Compulsory common fields */
+ dma->dev = ipu->dev;
+ dma->device_alloc_chan_resources = idmac_alloc_chan_resources;
+ dma->device_free_chan_resources = idmac_free_chan_resources;
+ dma->device_is_tx_complete = idmac_is_tx_complete;
+ dma->device_issue_pending = idmac_issue_pending;
+
+ /* Compulsory for DMA_SLAVE fields */
+ dma->device_prep_slave_sg = idmac_prep_slave_sg;
+ dma->device_terminate_all = idmac_terminate_all;
+
+ INIT_LIST_HEAD(&dma->channels);
+ for (i = 0; i < IPU_CHANNELS_NUM; i++) {
+ struct idmac_channel *ichan = ipu->channel + i;
+ struct dma_chan *dma_chan = &ichan->dma_chan;
+
+ spin_lock_init(&ichan->lock);
+ mutex_init(&ichan->chan_mutex);
+
+ ichan->status = IPU_CHANNEL_FREE;
+ ichan->sec_chan_en = false;
+ ichan->completed = -ENXIO;
+ snprintf(ichan->eof_name, sizeof(ichan->eof_name), "IDMAC EOF %d", i);
+
+ dma_chan->device = &idmac->dma;
+ dma_chan->cookie = 1;
+ dma_chan->chan_id = i;
+ list_add_tail(&dma_chan->device_node, &dma->channels);
+ }
+
+ idmac_write_icreg(ipu, 0x00000070, IDMAC_CONF);
+
+ return dma_async_device_register(&idmac->dma);
+}
+
+static void __exit ipu_idmac_exit(struct ipu *ipu)
+{
+ int i;
+ struct idmac *idmac = &ipu->idmac;
+
+ for (i = 0; i < IPU_CHANNELS_NUM; i++) {
+ struct idmac_channel *ichan = ipu->channel + i;
+
+ idmac_terminate_all(&ichan->dma_chan);
+ idmac_prep_slave_sg(&ichan->dma_chan, NULL, 0, DMA_NONE, 0);
+ }
+
+ dma_async_device_unregister(&idmac->dma);
+}
+
+/*****************************************************************************
+ * IPU common probe / remove
+ */
+
+static int __init ipu_probe(struct platform_device *pdev)
+{
+ struct ipu_platform_data *pdata = pdev->dev.platform_data;
+ struct resource *mem_ipu, *mem_ic;
+ int ret;
+
+ spin_lock_init(&ipu_data.lock);
+
+ mem_ipu = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ mem_ic = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!pdata || !mem_ipu || !mem_ic)
+ return -EINVAL;
+
+ ipu_data.dev = &pdev->dev;
+
+ platform_set_drvdata(pdev, &ipu_data);
+
+ ret = platform_get_irq(pdev, 0);
+ if (ret < 0)
+ goto err_noirq;
+
+ ipu_data.irq_fn = ret;
+ ret = platform_get_irq(pdev, 1);
+ if (ret < 0)
+ goto err_noirq;
+
+ ipu_data.irq_err = ret;
+ ipu_data.irq_base = pdata->irq_base;
+
+ dev_dbg(&pdev->dev, "fn irq %u, err irq %u, irq-base %u\n",
+ ipu_data.irq_fn, ipu_data.irq_err, ipu_data.irq_base);
+
+ /* Remap IPU common registers */
+ ipu_data.reg_ipu = ioremap(mem_ipu->start,
+ mem_ipu->end - mem_ipu->start + 1);
+ if (!ipu_data.reg_ipu) {
+ ret = -ENOMEM;
+ goto err_ioremap_ipu;
+ }
+
+ /* Remap Image Converter and Image DMA Controller registers */
+ ipu_data.reg_ic = ioremap(mem_ic->start,
+ mem_ic->end - mem_ic->start + 1);
+ if (!ipu_data.reg_ic) {
+ ret = -ENOMEM;
+ goto err_ioremap_ic;
+ }
+
+ /* Get IPU clock */
+ ipu_data.ipu_clk = clk_get(&pdev->dev, NULL);
+ if (IS_ERR(ipu_data.ipu_clk)) {
+ ret = PTR_ERR(ipu_data.ipu_clk);
+ goto err_clk_get;
+ }
+
+ /* Make sure IPU HSP clock is running */
+ clk_enable(ipu_data.ipu_clk);
+
+ /* Disable all interrupts */
+ idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_1);
+ idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_2);
+ idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_3);
+ idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_4);
+ idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_5);
+
+ dev_dbg(&pdev->dev, "%s @ 0x%08lx, fn irq %u, err irq %u\n", pdev->name,
+ (unsigned long)mem_ipu->start, ipu_data.irq_fn, ipu_data.irq_err);
+
+ ret = ipu_irq_attach_irq(&ipu_data, pdev);
+ if (ret < 0)
+ goto err_attach_irq;
+
+ /* Initialize DMA engine */
+ ret = ipu_idmac_init(&ipu_data);
+ if (ret < 0)
+ goto err_idmac_init;
+
+ tasklet_init(&ipu_data.tasklet, ipu_gc_tasklet, (unsigned long)&ipu_data);
+
+ ipu_data.dev = &pdev->dev;
+
+ dev_dbg(ipu_data.dev, "IPU initialized\n");
+
+ return 0;
+
+err_idmac_init:
+err_attach_irq:
+ ipu_irq_detach_irq(&ipu_data, pdev);
+ clk_disable(ipu_data.ipu_clk);
+ clk_put(ipu_data.ipu_clk);
+err_clk_get:
+ iounmap(ipu_data.reg_ic);
+err_ioremap_ic:
+ iounmap(ipu_data.reg_ipu);
+err_ioremap_ipu:
+err_noirq:
+ dev_err(&pdev->dev, "Failed to probe IPU: %d\n", ret);
+ return ret;
+}
+
+static int __exit ipu_remove(struct platform_device *pdev)
+{
+ struct ipu *ipu = platform_get_drvdata(pdev);
+
+ ipu_idmac_exit(ipu);
+ ipu_irq_detach_irq(ipu, pdev);
+ clk_disable(ipu->ipu_clk);
+ clk_put(ipu->ipu_clk);
+ iounmap(ipu->reg_ic);
+ iounmap(ipu->reg_ipu);
+ tasklet_kill(&ipu->tasklet);
+ platform_set_drvdata(pdev, NULL);
+
+ return 0;
+}
+
+/*
+ * We need two MEM resources - with IPU-common and Image Converter registers,
+ * including PF_CONF and IDMAC_* registers, and two IRQs - function and error
+ */
+static struct platform_driver ipu_platform_driver = {
+ .driver = {
+ .name = "ipu-core",
+ .owner = THIS_MODULE,
+ },
+ .remove = __exit_p(ipu_remove),
+};
+
+static int __init ipu_init(void)
+{
+ return platform_driver_probe(&ipu_platform_driver, ipu_probe);
+}
+subsys_initcall(ipu_init);
+
+MODULE_DESCRIPTION("IPU core driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Guennadi Liakhovetski <lg@denx.de>");
+MODULE_ALIAS("platform:ipu-core");
diff --git a/drivers/dma/ipu/ipu_intern.h b/drivers/dma/ipu/ipu_intern.h
new file mode 100644
index 00000000000..545cf11a94a
--- /dev/null
+++ b/drivers/dma/ipu/ipu_intern.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2008
+ * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
+ *
+ * Copyright (C) 2005-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _IPU_INTERN_H_
+#define _IPU_INTERN_H_
+
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+
+/* IPU Common registers */
+#define IPU_CONF 0x00
+#define IPU_CHA_BUF0_RDY 0x04
+#define IPU_CHA_BUF1_RDY 0x08
+#define IPU_CHA_DB_MODE_SEL 0x0C
+#define IPU_CHA_CUR_BUF 0x10
+#define IPU_FS_PROC_FLOW 0x14
+#define IPU_FS_DISP_FLOW 0x18
+#define IPU_TASKS_STAT 0x1C
+#define IPU_IMA_ADDR 0x20
+#define IPU_IMA_DATA 0x24
+#define IPU_INT_CTRL_1 0x28
+#define IPU_INT_CTRL_2 0x2C
+#define IPU_INT_CTRL_3 0x30
+#define IPU_INT_CTRL_4 0x34
+#define IPU_INT_CTRL_5 0x38
+#define IPU_INT_STAT_1 0x3C
+#define IPU_INT_STAT_2 0x40
+#define IPU_INT_STAT_3 0x44
+#define IPU_INT_STAT_4 0x48
+#define IPU_INT_STAT_5 0x4C
+#define IPU_BRK_CTRL_1 0x50
+#define IPU_BRK_CTRL_2 0x54
+#define IPU_BRK_STAT 0x58
+#define IPU_DIAGB_CTRL 0x5C
+
+/* IPU_CONF Register bits */
+#define IPU_CONF_CSI_EN 0x00000001
+#define IPU_CONF_IC_EN 0x00000002
+#define IPU_CONF_ROT_EN 0x00000004
+#define IPU_CONF_PF_EN 0x00000008
+#define IPU_CONF_SDC_EN 0x00000010
+#define IPU_CONF_ADC_EN 0x00000020
+#define IPU_CONF_DI_EN 0x00000040
+#define IPU_CONF_DU_EN 0x00000080
+#define IPU_CONF_PXL_ENDIAN 0x00000100
+
+/* Image Converter Registers */
+#define IC_CONF 0x88
+#define IC_PRP_ENC_RSC 0x8C
+#define IC_PRP_VF_RSC 0x90
+#define IC_PP_RSC 0x94
+#define IC_CMBP_1 0x98
+#define IC_CMBP_2 0x9C
+#define PF_CONF 0xA0
+#define IDMAC_CONF 0xA4
+#define IDMAC_CHA_EN 0xA8
+#define IDMAC_CHA_PRI 0xAC
+#define IDMAC_CHA_BUSY 0xB0
+
+/* Image Converter Register bits */
+#define IC_CONF_PRPENC_EN 0x00000001
+#define IC_CONF_PRPENC_CSC1 0x00000002
+#define IC_CONF_PRPENC_ROT_EN 0x00000004
+#define IC_CONF_PRPVF_EN 0x00000100
+#define IC_CONF_PRPVF_CSC1 0x00000200
+#define IC_CONF_PRPVF_CSC2 0x00000400
+#define IC_CONF_PRPVF_CMB 0x00000800
+#define IC_CONF_PRPVF_ROT_EN 0x00001000
+#define IC_CONF_PP_EN 0x00010000
+#define IC_CONF_PP_CSC1 0x00020000
+#define IC_CONF_PP_CSC2 0x00040000
+#define IC_CONF_PP_CMB 0x00080000
+#define IC_CONF_PP_ROT_EN 0x00100000
+#define IC_CONF_IC_GLB_LOC_A 0x10000000
+#define IC_CONF_KEY_COLOR_EN 0x20000000
+#define IC_CONF_RWS_EN 0x40000000
+#define IC_CONF_CSI_MEM_WR_EN 0x80000000
+
+#define IDMA_CHAN_INVALID 0x000000FF
+#define IDMA_IC_0 0x00000001
+#define IDMA_IC_1 0x00000002
+#define IDMA_IC_2 0x00000004
+#define IDMA_IC_3 0x00000008
+#define IDMA_IC_4 0x00000010
+#define IDMA_IC_5 0x00000020
+#define IDMA_IC_6 0x00000040
+#define IDMA_IC_7 0x00000080
+#define IDMA_IC_8 0x00000100
+#define IDMA_IC_9 0x00000200
+#define IDMA_IC_10 0x00000400
+#define IDMA_IC_11 0x00000800
+#define IDMA_IC_12 0x00001000
+#define IDMA_IC_13 0x00002000
+#define IDMA_SDC_BG 0x00004000
+#define IDMA_SDC_FG 0x00008000
+#define IDMA_SDC_MASK 0x00010000
+#define IDMA_SDC_PARTIAL 0x00020000
+#define IDMA_ADC_SYS1_WR 0x00040000
+#define IDMA_ADC_SYS2_WR 0x00080000
+#define IDMA_ADC_SYS1_CMD 0x00100000
+#define IDMA_ADC_SYS2_CMD 0x00200000
+#define IDMA_ADC_SYS1_RD 0x00400000
+#define IDMA_ADC_SYS2_RD 0x00800000
+#define IDMA_PF_QP 0x01000000
+#define IDMA_PF_BSP 0x02000000
+#define IDMA_PF_Y_IN 0x04000000
+#define IDMA_PF_U_IN 0x08000000
+#define IDMA_PF_V_IN 0x10000000
+#define IDMA_PF_Y_OUT 0x20000000
+#define IDMA_PF_U_OUT 0x40000000
+#define IDMA_PF_V_OUT 0x80000000
+
+#define TSTAT_PF_H264_PAUSE 0x00000001
+#define TSTAT_CSI2MEM_MASK 0x0000000C
+#define TSTAT_CSI2MEM_OFFSET 2
+#define TSTAT_VF_MASK 0x00000600
+#define TSTAT_VF_OFFSET 9
+#define TSTAT_VF_ROT_MASK 0x000C0000
+#define TSTAT_VF_ROT_OFFSET 18
+#define TSTAT_ENC_MASK 0x00000180
+#define TSTAT_ENC_OFFSET 7
+#define TSTAT_ENC_ROT_MASK 0x00030000
+#define TSTAT_ENC_ROT_OFFSET 16
+#define TSTAT_PP_MASK 0x00001800
+#define TSTAT_PP_OFFSET 11
+#define TSTAT_PP_ROT_MASK 0x00300000
+#define TSTAT_PP_ROT_OFFSET 20
+#define TSTAT_PF_MASK 0x00C00000
+#define TSTAT_PF_OFFSET 22
+#define TSTAT_ADCSYS1_MASK 0x03000000
+#define TSTAT_ADCSYS1_OFFSET 24
+#define TSTAT_ADCSYS2_MASK 0x0C000000
+#define TSTAT_ADCSYS2_OFFSET 26
+
+#define TASK_STAT_IDLE 0
+#define TASK_STAT_ACTIVE 1
+#define TASK_STAT_WAIT4READY 2
+
+struct idmac {
+ struct dma_device dma;
+};
+
+struct ipu {
+ void __iomem *reg_ipu;
+ void __iomem *reg_ic;
+ unsigned int irq_fn; /* IPU Function IRQ to the CPU */
+ unsigned int irq_err; /* IPU Error IRQ to the CPU */
+ unsigned int irq_base; /* Beginning of the IPU IRQ range */
+ unsigned long channel_init_mask;
+ spinlock_t lock;
+ struct clk *ipu_clk;
+ struct device *dev;
+ struct idmac idmac;
+ struct idmac_channel channel[IPU_CHANNELS_NUM];
+ struct tasklet_struct tasklet;
+};
+
+#define to_idmac(d) container_of(d, struct idmac, dma)
+
+extern int ipu_irq_attach_irq(struct ipu *ipu, struct platform_device *dev);
+extern void ipu_irq_detach_irq(struct ipu *ipu, struct platform_device *dev);
+
+extern bool ipu_irq_status(uint32_t irq);
+extern int ipu_irq_map(unsigned int source);
+extern int ipu_irq_unmap(unsigned int source);
+
+#endif
diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c
new file mode 100644
index 00000000000..dd8ebc75b66
--- /dev/null
+++ b/drivers/dma/ipu/ipu_irq.c
@@ -0,0 +1,413 @@
+/*
+ * Copyright (C) 2008
+ * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+
+#include <mach/ipu.h>
+
+#include "ipu_intern.h"
+
+/*
+ * Register read / write - shall be inlined by the compiler
+ */
+static u32 ipu_read_reg(struct ipu *ipu, unsigned long reg)
+{
+ return __raw_readl(ipu->reg_ipu + reg);
+}
+
+static void ipu_write_reg(struct ipu *ipu, u32 value, unsigned long reg)
+{
+ __raw_writel(value, ipu->reg_ipu + reg);
+}
+
+
+/*
+ * IPU IRQ chip driver
+ */
+
+#define IPU_IRQ_NR_FN_BANKS 3
+#define IPU_IRQ_NR_ERR_BANKS 2
+#define IPU_IRQ_NR_BANKS (IPU_IRQ_NR_FN_BANKS + IPU_IRQ_NR_ERR_BANKS)
+
+struct ipu_irq_bank {
+ unsigned int control;
+ unsigned int status;
+ spinlock_t lock;
+ struct ipu *ipu;
+};
+
+static struct ipu_irq_bank irq_bank[IPU_IRQ_NR_BANKS] = {
+ /* 3 groups of functional interrupts */
+ {
+ .control = IPU_INT_CTRL_1,
+ .status = IPU_INT_STAT_1,
+ }, {
+ .control = IPU_INT_CTRL_2,
+ .status = IPU_INT_STAT_2,
+ }, {
+ .control = IPU_INT_CTRL_3,
+ .status = IPU_INT_STAT_3,
+ },
+ /* 2 groups of error interrupts */
+ {
+ .control = IPU_INT_CTRL_4,
+ .status = IPU_INT_STAT_4,
+ }, {
+ .control = IPU_INT_CTRL_5,
+ .status = IPU_INT_STAT_5,
+ },
+};
+
+struct ipu_irq_map {
+ unsigned int irq;
+ int source;
+ struct ipu_irq_bank *bank;
+ struct ipu *ipu;
+};
+
+static struct ipu_irq_map irq_map[CONFIG_MX3_IPU_IRQS];
+/* Protects allocations from the above array of maps */
+static DEFINE_MUTEX(map_lock);
+/* Protects register accesses and individual mappings */
+static DEFINE_SPINLOCK(bank_lock);
+
+static struct ipu_irq_map *src2map(unsigned int src)
+{
+ int i;
+
+ for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++)
+ if (irq_map[i].source == src)
+ return irq_map + i;
+
+ return NULL;
+}
+
+static void ipu_irq_unmask(unsigned int irq)
+{
+ struct ipu_irq_map *map = get_irq_chip_data(irq);
+ struct ipu_irq_bank *bank;
+ uint32_t reg;
+ unsigned long lock_flags;
+
+ spin_lock_irqsave(&bank_lock, lock_flags);
+
+ bank = map->bank;
+ if (!bank) {
+ spin_unlock_irqrestore(&bank_lock, lock_flags);
+ pr_err("IPU: %s(%u) - unmapped!\n", __func__, irq);
+ return;
+ }
+
+ reg = ipu_read_reg(bank->ipu, bank->control);
+ reg |= (1UL << (map->source & 31));
+ ipu_write_reg(bank->ipu, reg, bank->control);
+
+ spin_unlock_irqrestore(&bank_lock, lock_flags);
+}
+
+static void ipu_irq_mask(unsigned int irq)
+{
+ struct ipu_irq_map *map = get_irq_chip_data(irq);
+ struct ipu_irq_bank *bank;
+ uint32_t reg;
+ unsigned long lock_flags;
+
+ spin_lock_irqsave(&bank_lock, lock_flags);
+
+ bank = map->bank;
+ if (!bank) {
+ spin_unlock_irqrestore(&bank_lock, lock_flags);
+ pr_err("IPU: %s(%u) - unmapped!\n", __func__, irq);
+ return;
+ }
+
+ reg = ipu_read_reg(bank->ipu, bank->control);
+ reg &= ~(1UL << (map->source & 31));
+ ipu_write_reg(bank->ipu, reg, bank->control);
+
+ spin_unlock_irqrestore(&bank_lock, lock_flags);
+}
+
+static void ipu_irq_ack(unsigned int irq)
+{
+ struct ipu_irq_map *map = get_irq_chip_data(irq);
+ struct ipu_irq_bank *bank;
+ unsigned long lock_flags;
+
+ spin_lock_irqsave(&bank_lock, lock_flags);
+
+ bank = map->bank;
+ if (!bank) {
+ spin_unlock_irqrestore(&bank_lock, lock_flags);
+ pr_err("IPU: %s(%u) - unmapped!\n", __func__, irq);
+ return;
+ }
+
+ ipu_write_reg(bank->ipu, 1UL << (map->source & 31), bank->status);
+ spin_unlock_irqrestore(&bank_lock, lock_flags);
+}
+
+/**
+ * ipu_irq_status() - returns the current interrupt status of the specified IRQ.
+ * @irq: interrupt line to get status for.
+ * @return: true if the interrupt is pending/asserted or false if the
+ * interrupt is not pending.
+ */
+bool ipu_irq_status(unsigned int irq)
+{
+ struct ipu_irq_map *map = get_irq_chip_data(irq);
+ struct ipu_irq_bank *bank;
+ unsigned long lock_flags;
+ bool ret;
+
+ spin_lock_irqsave(&bank_lock, lock_flags);
+ bank = map->bank;
+ ret = bank && ipu_read_reg(bank->ipu, bank->status) &
+ (1UL << (map->source & 31));
+ spin_unlock_irqrestore(&bank_lock, lock_flags);
+
+ return ret;
+}
+
+/**
+ * ipu_irq_map() - map an IPU interrupt source to an IRQ number
+ * @source: interrupt source bit position (see below)
+ * @return: mapped IRQ number or negative error code
+ *
+ * The source parameter has to be explained further. On i.MX31 IPU has 137 IRQ
+ * sources, they are broken down in 5 32-bit registers, like 32, 32, 24, 32, 17.
+ * However, the source argument of this function is not the sequence number of
+ * the possible IRQ, but rather its bit position. So, first interrupt in fourth
+ * register has source number 96, and not 88. This makes calculations easier,
+ * and also provides forward compatibility with any future IPU implementations
+ * with any interrupt bit assignments.
+ */
+int ipu_irq_map(unsigned int source)
+{
+ int i, ret = -ENOMEM;
+ struct ipu_irq_map *map;
+
+ might_sleep();
+
+ mutex_lock(&map_lock);
+ map = src2map(source);
+ if (map) {
+ pr_err("IPU: Source %u already mapped to IRQ %u\n", source, map->irq);
+ ret = -EBUSY;
+ goto out;
+ }
+
+ for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++) {
+ if (irq_map[i].source < 0) {
+ unsigned long lock_flags;
+
+ spin_lock_irqsave(&bank_lock, lock_flags);
+ irq_map[i].source = source;
+ irq_map[i].bank = irq_bank + source / 32;
+ spin_unlock_irqrestore(&bank_lock, lock_flags);
+
+ ret = irq_map[i].irq;
+ pr_debug("IPU: mapped source %u to IRQ %u\n",
+ source, ret);
+ break;
+ }
+ }
+out:
+ mutex_unlock(&map_lock);
+
+ if (ret < 0)
+ pr_err("IPU: couldn't map source %u: %d\n", source, ret);
+
+ return ret;
+}
+
+/**
+ * ipu_irq_map() - map an IPU interrupt source to an IRQ number
+ * @source: interrupt source bit position (see ipu_irq_map())
+ * @return: 0 or negative error code
+ */
+int ipu_irq_unmap(unsigned int source)
+{
+ int i, ret = -EINVAL;
+
+ might_sleep();
+
+ mutex_lock(&map_lock);
+ for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++) {
+ if (irq_map[i].source == source) {
+ unsigned long lock_flags;
+
+ pr_debug("IPU: unmapped source %u from IRQ %u\n",
+ source, irq_map[i].irq);
+
+ spin_lock_irqsave(&bank_lock, lock_flags);
+ irq_map[i].source = -EINVAL;
+ irq_map[i].bank = NULL;
+ spin_unlock_irqrestore(&bank_lock, lock_flags);
+
+ ret = 0;
+ break;
+ }
+ }
+ mutex_unlock(&map_lock);
+
+ return ret;
+}
+
+/* Chained IRQ handler for IPU error interrupt */
+static void ipu_irq_err(unsigned int irq, struct irq_desc *desc)
+{
+ struct ipu *ipu = get_irq_data(irq);
+ u32 status;
+ int i, line;
+
+ for (i = IPU_IRQ_NR_FN_BANKS; i < IPU_IRQ_NR_BANKS; i++) {
+ struct ipu_irq_bank *bank = irq_bank + i;
+
+ spin_lock(&bank_lock);
+ status = ipu_read_reg(ipu, bank->status);
+ /*
+ * Don't think we have to clear all interrupts here, they will
+ * be acked by ->handle_irq() (handle_level_irq). However, we
+ * might want to clear unhandled interrupts after the loop...
+ */
+ status &= ipu_read_reg(ipu, bank->control);
+ spin_unlock(&bank_lock);
+ while ((line = ffs(status))) {
+ struct ipu_irq_map *map;
+
+ line--;
+ status &= ~(1UL << line);
+
+ spin_lock(&bank_lock);
+ map = src2map(32 * i + line);
+ if (map)
+ irq = map->irq;
+ spin_unlock(&bank_lock);
+
+ if (!map) {
+ pr_err("IPU: Interrupt on unmapped source %u bank %d\n",
+ line, i);
+ continue;
+ }
+ generic_handle_irq(irq);
+ }
+ }
+}
+
+/* Chained IRQ handler for IPU function interrupt */
+static void ipu_irq_fn(unsigned int irq, struct irq_desc *desc)
+{
+ struct ipu *ipu = get_irq_data(irq);
+ u32 status;
+ int i, line;
+
+ for (i = 0; i < IPU_IRQ_NR_FN_BANKS; i++) {
+ struct ipu_irq_bank *bank = irq_bank + i;
+
+ spin_lock(&bank_lock);
+ status = ipu_read_reg(ipu, bank->status);
+ /* Not clearing all interrupts, see above */
+ status &= ipu_read_reg(ipu, bank->control);
+ spin_unlock(&bank_lock);
+ while ((line = ffs(status))) {
+ struct ipu_irq_map *map;
+
+ line--;
+ status &= ~(1UL << line);
+
+ spin_lock(&bank_lock);
+ map = src2map(32 * i + line);
+ if (map)
+ irq = map->irq;
+ spin_unlock(&bank_lock);
+
+ if (!map) {
+ pr_err("IPU: Interrupt on unmapped source %u bank %d\n",
+ line, i);
+ continue;
+ }
+ generic_handle_irq(irq);
+ }
+ }
+}
+
+static struct irq_chip ipu_irq_chip = {
+ .name = "ipu_irq",
+ .ack = ipu_irq_ack,
+ .mask = ipu_irq_mask,
+ .unmask = ipu_irq_unmask,
+};
+
+/* Install the IRQ handler */
+int __init ipu_irq_attach_irq(struct ipu *ipu, struct platform_device *dev)
+{
+ struct ipu_platform_data *pdata = dev->dev.platform_data;
+ unsigned int irq, irq_base, i;
+
+ irq_base = pdata->irq_base;
+
+ for (i = 0; i < IPU_IRQ_NR_BANKS; i++)
+ irq_bank[i].ipu = ipu;
+
+ for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++) {
+ int ret;
+
+ irq = irq_base + i;
+ ret = set_irq_chip(irq, &ipu_irq_chip);
+ if (ret < 0)
+ return ret;
+ ret = set_irq_chip_data(irq, irq_map + i);
+ if (ret < 0)
+ return ret;
+ irq_map[i].ipu = ipu;
+ irq_map[i].irq = irq;
+ irq_map[i].source = -EINVAL;
+ set_irq_handler(irq, handle_level_irq);
+#ifdef CONFIG_ARM
+ set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+#endif
+ }
+
+ set_irq_data(ipu->irq_fn, ipu);
+ set_irq_chained_handler(ipu->irq_fn, ipu_irq_fn);
+
+ set_irq_data(ipu->irq_err, ipu);
+ set_irq_chained_handler(ipu->irq_err, ipu_irq_err);
+
+ return 0;
+}
+
+void ipu_irq_detach_irq(struct ipu *ipu, struct platform_device *dev)
+{
+ struct ipu_platform_data *pdata = dev->dev.platform_data;
+ unsigned int irq, irq_base;
+
+ irq_base = pdata->irq_base;
+
+ set_irq_chained_handler(ipu->irq_fn, NULL);
+ set_irq_data(ipu->irq_fn, NULL);
+
+ set_irq_chained_handler(ipu->irq_err, NULL);
+ set_irq_data(ipu->irq_err, NULL);
+
+ for (irq = irq_base; irq < irq_base + CONFIG_MX3_IPU_IRQS; irq++) {
+#ifdef CONFIG_ARM
+ set_irq_flags(irq, 0);
+#endif
+ set_irq_chip(irq, NULL);
+ set_irq_chip_data(irq, NULL);
+ }
+}
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
new file mode 100644
index 00000000000..466ab10c1ff
--- /dev/null
+++ b/drivers/dma/mv_xor.c
@@ -0,0 +1,1381 @@
+/*
+ * offload engine driver for the Marvell XOR engine
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/memory.h>
+#include <plat/mv_xor.h>
+#include "mv_xor.h"
+
+static void mv_xor_issue_pending(struct dma_chan *chan);
+
+#define to_mv_xor_chan(chan) \
+ container_of(chan, struct mv_xor_chan, common)
+
+#define to_mv_xor_device(dev) \
+ container_of(dev, struct mv_xor_device, common)
+
+#define to_mv_xor_slot(tx) \
+ container_of(tx, struct mv_xor_desc_slot, async_tx)
+
+static void mv_desc_init(struct mv_xor_desc_slot *desc, unsigned long flags)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+
+ hw_desc->status = (1 << 31);
+ hw_desc->phy_next_desc = 0;
+ hw_desc->desc_command = (1 << 31);
+}
+
+static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ return hw_desc->phy_dest_addr;
+}
+
+static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
+ int src_idx)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ return hw_desc->phy_src_addr[src_idx];
+}
+
+
+static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc,
+ u32 byte_count)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->byte_count = byte_count;
+}
+
+static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
+ u32 next_desc_addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ BUG_ON(hw_desc->phy_next_desc);
+ hw_desc->phy_next_desc = next_desc_addr;
+}
+
+static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_next_desc = 0;
+}
+
+static void mv_desc_set_block_fill_val(struct mv_xor_desc_slot *desc, u32 val)
+{
+ desc->value = val;
+}
+
+static void mv_desc_set_dest_addr(struct mv_xor_desc_slot *desc,
+ dma_addr_t addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_dest_addr = addr;
+}
+
+static int mv_chan_memset_slot_count(size_t len)
+{
+ return 1;
+}
+
+#define mv_chan_memcpy_slot_count(c) mv_chan_memset_slot_count(c)
+
+static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
+ int index, dma_addr_t addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_src_addr[index] = addr;
+ if (desc->type == DMA_XOR)
+ hw_desc->desc_command |= (1 << index);
+}
+
+static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan)
+{
+ return __raw_readl(XOR_CURR_DESC(chan));
+}
+
+static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan,
+ u32 next_desc_addr)
+{
+ __raw_writel(next_desc_addr, XOR_NEXT_DESC(chan));
+}
+
+static void mv_chan_set_dest_pointer(struct mv_xor_chan *chan, u32 desc_addr)
+{
+ __raw_writel(desc_addr, XOR_DEST_POINTER(chan));
+}
+
+static void mv_chan_set_block_size(struct mv_xor_chan *chan, u32 block_size)
+{
+ __raw_writel(block_size, XOR_BLOCK_SIZE(chan));
+}
+
+static void mv_chan_set_value(struct mv_xor_chan *chan, u32 value)
+{
+ __raw_writel(value, XOR_INIT_VALUE_LOW(chan));
+ __raw_writel(value, XOR_INIT_VALUE_HIGH(chan));
+}
+
+static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan)
+{
+ u32 val = __raw_readl(XOR_INTR_MASK(chan));
+ val |= XOR_INTR_MASK_VALUE << (chan->idx * 16);
+ __raw_writel(val, XOR_INTR_MASK(chan));
+}
+
+static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
+{
+ u32 intr_cause = __raw_readl(XOR_INTR_CAUSE(chan));
+ intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF;
+ return intr_cause;
+}
+
+static int mv_is_err_intr(u32 intr_cause)
+{
+ if (intr_cause & ((1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9)))
+ return 1;
+
+ return 0;
+}
+
+static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
+{
+ u32 val = (1 << (1 + (chan->idx * 16)));
+ dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__, val);
+ __raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan)
+{
+ u32 val = 0xFFFF0000 >> (chan->idx * 16);
+ __raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static int mv_can_chain(struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc_slot *chain_old_tail = list_entry(
+ desc->chain_node.prev, struct mv_xor_desc_slot, chain_node);
+
+ if (chain_old_tail->type != desc->type)
+ return 0;
+ if (desc->type == DMA_MEMSET)
+ return 0;
+
+ return 1;
+}
+
+static void mv_set_mode(struct mv_xor_chan *chan,
+ enum dma_transaction_type type)
+{
+ u32 op_mode;
+ u32 config = __raw_readl(XOR_CONFIG(chan));
+
+ switch (type) {
+ case DMA_XOR:
+ op_mode = XOR_OPERATION_MODE_XOR;
+ break;
+ case DMA_MEMCPY:
+ op_mode = XOR_OPERATION_MODE_MEMCPY;
+ break;
+ case DMA_MEMSET:
+ op_mode = XOR_OPERATION_MODE_MEMSET;
+ break;
+ default:
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error: unsupported operation %d.\n",
+ type);
+ BUG();
+ return;
+ }
+
+ config &= ~0x7;
+ config |= op_mode;
+ __raw_writel(config, XOR_CONFIG(chan));
+ chan->current_type = type;
+}
+
+static void mv_chan_activate(struct mv_xor_chan *chan)
+{
+ u32 activation;
+
+ dev_dbg(chan->device->common.dev, " activate chan.\n");
+ activation = __raw_readl(XOR_ACTIVATION(chan));
+ activation |= 0x1;
+ __raw_writel(activation, XOR_ACTIVATION(chan));
+}
+
+static char mv_chan_is_busy(struct mv_xor_chan *chan)
+{
+ u32 state = __raw_readl(XOR_ACTIVATION(chan));
+
+ state = (state >> 4) & 0x3;
+
+ return (state == 1) ? 1 : 0;
+}
+
+static int mv_chan_xor_slot_count(size_t len, int src_cnt)
+{
+ return 1;
+}
+
+/**
+ * mv_xor_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_free_slots(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *slot)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d slot %p\n",
+ __func__, __LINE__, slot);
+
+ slot->slots_per_op = 0;
+
+}
+
+/*
+ * mv_xor_start_new_chain - program the engine to operate on new chain headed by
+ * sw_desc
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *sw_desc)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d: sw_desc %p\n",
+ __func__, __LINE__, sw_desc);
+ if (sw_desc->type != mv_chan->current_type)
+ mv_set_mode(mv_chan, sw_desc->type);
+
+ if (sw_desc->type == DMA_MEMSET) {
+ /* for memset requests we need to program the engine, no
+ * descriptors used.
+ */
+ struct mv_xor_desc *hw_desc = sw_desc->hw_desc;
+ mv_chan_set_dest_pointer(mv_chan, hw_desc->phy_dest_addr);
+ mv_chan_set_block_size(mv_chan, sw_desc->unmap_len);
+ mv_chan_set_value(mv_chan, sw_desc->value);
+ } else {
+ /* set the hardware chain */
+ mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys);
+ }
+ mv_chan->pending += sw_desc->slot_cnt;
+ mv_xor_issue_pending(&mv_chan->common);
+}
+
+static dma_cookie_t
+mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *mv_chan, dma_cookie_t cookie)
+{
+ BUG_ON(desc->async_tx.cookie < 0);
+
+ if (desc->async_tx.cookie > 0) {
+ cookie = desc->async_tx.cookie;
+
+ /* call the callback (must not sleep or submit new
+ * operations to this channel)
+ */
+ if (desc->async_tx.callback)
+ desc->async_tx.callback(
+ desc->async_tx.callback_param);
+
+ /* unmap dma addresses
+ * (unmap_single vs unmap_page?)
+ */
+ if (desc->group_head && desc->unmap_len) {
+ struct mv_xor_desc_slot *unmap = desc->group_head;
+ struct device *dev =
+ &mv_chan->device->pdev->dev;
+ u32 len = unmap->unmap_len;
+ enum dma_ctrl_flags flags = desc->async_tx.flags;
+ u32 src_cnt;
+ dma_addr_t addr;
+ dma_addr_t dest;
+
+ src_cnt = unmap->unmap_src_cnt;
+ dest = mv_desc_get_dest_addr(unmap);
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ enum dma_data_direction dir;
+
+ if (src_cnt > 1) /* is xor ? */
+ dir = DMA_BIDIRECTIONAL;
+ else
+ dir = DMA_FROM_DEVICE;
+ dma_unmap_page(dev, dest, len, dir);
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ while (src_cnt--) {
+ addr = mv_desc_get_src_addr(unmap,
+ src_cnt);
+ if (addr == dest)
+ continue;
+ dma_unmap_page(dev, addr, len,
+ DMA_TO_DEVICE);
+ }
+ }
+ desc->group_head = NULL;
+ }
+ }
+
+ /* run dependent operations */
+ dma_run_dependencies(&desc->async_tx);
+
+ return cookie;
+}
+
+static int
+mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan)
+{
+ struct mv_xor_desc_slot *iter, *_iter;
+
+ dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+ list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+ completed_node) {
+
+ if (async_tx_test_ack(&iter->async_tx)) {
+ list_del(&iter->completed_node);
+ mv_xor_free_slots(mv_chan, iter);
+ }
+ }
+ return 0;
+}
+
+static int
+mv_xor_clean_slot(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *mv_chan)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d: desc %p flags %d\n",
+ __func__, __LINE__, desc, desc->async_tx.flags);
+ list_del(&desc->chain_node);
+ /* the client is allowed to attach dependent operations
+ * until 'ack' is set
+ */
+ if (!async_tx_test_ack(&desc->async_tx)) {
+ /* move this slot to the completed_slots */
+ list_add_tail(&desc->completed_node, &mv_chan->completed_slots);
+ return 0;
+ }
+
+ mv_xor_free_slots(mv_chan, desc);
+ return 0;
+}
+
+static void __mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+ struct mv_xor_desc_slot *iter, *_iter;
+ dma_cookie_t cookie = 0;
+ int busy = mv_chan_is_busy(mv_chan);
+ u32 current_desc = mv_chan_get_current_desc(mv_chan);
+ int seen_current = 0;
+
+ dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+ dev_dbg(mv_chan->device->common.dev, "current_desc %x\n", current_desc);
+ mv_xor_clean_completed_slots(mv_chan);
+
+ /* free completed slots from the chain starting with
+ * the oldest descriptor
+ */
+
+ list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+ chain_node) {
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+
+ /* do not advance past the current descriptor loaded into the
+ * hardware channel, subsequent descriptors are either in
+ * process or have not been submitted
+ */
+ if (seen_current)
+ break;
+
+ /* stop the search if we reach the current descriptor and the
+ * channel is busy
+ */
+ if (iter->async_tx.phys == current_desc) {
+ seen_current = 1;
+ if (busy)
+ break;
+ }
+
+ cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie);
+
+ if (mv_xor_clean_slot(iter, mv_chan))
+ break;
+ }
+
+ if ((busy == 0) && !list_empty(&mv_chan->chain)) {
+ struct mv_xor_desc_slot *chain_head;
+ chain_head = list_entry(mv_chan->chain.next,
+ struct mv_xor_desc_slot,
+ chain_node);
+
+ mv_xor_start_new_chain(mv_chan, chain_head);
+ }
+
+ if (cookie > 0)
+ mv_chan->completed_cookie = cookie;
+}
+
+static void
+mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+ spin_lock_bh(&mv_chan->lock);
+ __mv_xor_slot_cleanup(mv_chan);
+ spin_unlock_bh(&mv_chan->lock);
+}
+
+static void mv_xor_tasklet(unsigned long data)
+{
+ struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
+ __mv_xor_slot_cleanup(chan);
+}
+
+static struct mv_xor_desc_slot *
+mv_xor_alloc_slots(struct mv_xor_chan *mv_chan, int num_slots,
+ int slots_per_op)
+{
+ struct mv_xor_desc_slot *iter, *_iter, *alloc_start = NULL;
+ LIST_HEAD(chain);
+ int slots_found, retry = 0;
+
+ /* start search from the last allocated descrtiptor
+ * if a contiguous allocation can not be found start searching
+ * from the beginning of the list
+ */
+retry:
+ slots_found = 0;
+ if (retry == 0)
+ iter = mv_chan->last_used;
+ else
+ iter = list_entry(&mv_chan->all_slots,
+ struct mv_xor_desc_slot,
+ slot_node);
+
+ list_for_each_entry_safe_continue(
+ iter, _iter, &mv_chan->all_slots, slot_node) {
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+ if (iter->slots_per_op) {
+ /* give up after finding the first busy slot
+ * on the second pass through the list
+ */
+ if (retry)
+ break;
+
+ slots_found = 0;
+ continue;
+ }
+
+ /* start the allocation if the slot is correctly aligned */
+ if (!slots_found++)
+ alloc_start = iter;
+
+ if (slots_found == num_slots) {
+ struct mv_xor_desc_slot *alloc_tail = NULL;
+ struct mv_xor_desc_slot *last_used = NULL;
+ iter = alloc_start;
+ while (num_slots) {
+ int i;
+
+ /* pre-ack all but the last descriptor */
+ async_tx_ack(&iter->async_tx);
+
+ list_add_tail(&iter->chain_node, &chain);
+ alloc_tail = iter;
+ iter->async_tx.cookie = 0;
+ iter->slot_cnt = num_slots;
+ iter->xor_check_result = NULL;
+ for (i = 0; i < slots_per_op; i++) {
+ iter->slots_per_op = slots_per_op - i;
+ last_used = iter;
+ iter = list_entry(iter->slot_node.next,
+ struct mv_xor_desc_slot,
+ slot_node);
+ }
+ num_slots -= slots_per_op;
+ }
+ alloc_tail->group_head = alloc_start;
+ alloc_tail->async_tx.cookie = -EBUSY;
+ list_splice(&chain, &alloc_tail->tx_list);
+ mv_chan->last_used = last_used;
+ mv_desc_clear_next_desc(alloc_start);
+ mv_desc_clear_next_desc(alloc_tail);
+ return alloc_tail;
+ }
+ }
+ if (!retry++)
+ goto retry;
+
+ /* try to free some slots if the allocation fails */
+ tasklet_schedule(&mv_chan->irq_tasklet);
+
+ return NULL;
+}
+
+static dma_cookie_t
+mv_desc_assign_cookie(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *desc)
+{
+ dma_cookie_t cookie = mv_chan->common.cookie;
+
+ if (++cookie < 0)
+ cookie = 1;
+ mv_chan->common.cookie = desc->async_tx.cookie = cookie;
+ return cookie;
+}
+
+/************************ DMA engine API functions ****************************/
+static dma_cookie_t
+mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx);
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan);
+ struct mv_xor_desc_slot *grp_start, *old_chain_tail;
+ dma_cookie_t cookie;
+ int new_hw_chain = 1;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p: async_tx %p\n",
+ __func__, sw_desc, &sw_desc->async_tx);
+
+ grp_start = sw_desc->group_head;
+
+ spin_lock_bh(&mv_chan->lock);
+ cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
+
+ if (list_empty(&mv_chan->chain))
+ list_splice_init(&sw_desc->tx_list, &mv_chan->chain);
+ else {
+ new_hw_chain = 0;
+
+ old_chain_tail = list_entry(mv_chan->chain.prev,
+ struct mv_xor_desc_slot,
+ chain_node);
+ list_splice_init(&grp_start->tx_list,
+ &old_chain_tail->chain_node);
+
+ if (!mv_can_chain(grp_start))
+ goto submit_done;
+
+ dev_dbg(mv_chan->device->common.dev, "Append to last desc %x\n",
+ old_chain_tail->async_tx.phys);
+
+ /* fix up the hardware chain */
+ mv_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys);
+
+ /* if the channel is not busy */
+ if (!mv_chan_is_busy(mv_chan)) {
+ u32 current_desc = mv_chan_get_current_desc(mv_chan);
+ /*
+ * and the curren desc is the end of the chain before
+ * the append, then we need to start the channel
+ */
+ if (current_desc == old_chain_tail->async_tx.phys)
+ new_hw_chain = 1;
+ }
+ }
+
+ if (new_hw_chain)
+ mv_xor_start_new_chain(mv_chan, grp_start);
+
+submit_done:
+ spin_unlock_bh(&mv_chan->lock);
+
+ return cookie;
+}
+
+/* returns the number of allocated descriptors */
+static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
+{
+ char *hw_desc;
+ int idx;
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *slot = NULL;
+ struct mv_xor_platform_data *plat_data =
+ mv_chan->device->pdev->dev.platform_data;
+ int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE;
+
+ /* Allocate descriptor slots */
+ idx = mv_chan->slots_allocated;
+ while (idx < num_descs_in_pool) {
+ slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+ if (!slot) {
+ printk(KERN_INFO "MV XOR Channel only initialized"
+ " %d descriptor slots", idx);
+ break;
+ }
+ hw_desc = (char *) mv_chan->device->dma_desc_pool_virt;
+ slot->hw_desc = (void *) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+
+ dma_async_tx_descriptor_init(&slot->async_tx, chan);
+ slot->async_tx.tx_submit = mv_xor_tx_submit;
+ INIT_LIST_HEAD(&slot->chain_node);
+ INIT_LIST_HEAD(&slot->slot_node);
+ INIT_LIST_HEAD(&slot->tx_list);
+ hw_desc = (char *) mv_chan->device->dma_desc_pool;
+ slot->async_tx.phys =
+ (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+ slot->idx = idx++;
+
+ spin_lock_bh(&mv_chan->lock);
+ mv_chan->slots_allocated = idx;
+ list_add_tail(&slot->slot_node, &mv_chan->all_slots);
+ spin_unlock_bh(&mv_chan->lock);
+ }
+
+ if (mv_chan->slots_allocated && !mv_chan->last_used)
+ mv_chan->last_used = list_entry(mv_chan->all_slots.next,
+ struct mv_xor_desc_slot,
+ slot_node);
+
+ dev_dbg(mv_chan->device->common.dev,
+ "allocated %d descriptor slots last_used: %p\n",
+ mv_chan->slots_allocated, mv_chan->last_used);
+
+ return mv_chan->slots_allocated ? : -ENOMEM;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s dest: %x src %x len: %u flags: %ld\n",
+ __func__, dest, src, len, flags);
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_memcpy_slot_count(len);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_MEMCPY;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ mv_desc_set_byte_count(grp_start, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, dest);
+ mv_desc_set_src_addr(grp_start, 0, src);
+ sw_desc->unmap_src_cnt = 1;
+ sw_desc->unmap_len = len;
+ }
+ spin_unlock_bh(&mv_chan->lock);
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p\n",
+ __func__, sw_desc, sw_desc ? &sw_desc->async_tx : 0);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
+ size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s dest: %x len: %u flags: %ld\n",
+ __func__, dest, len, flags);
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_memset_slot_count(len);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_MEMSET;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ mv_desc_set_byte_count(grp_start, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, dest);
+ mv_desc_set_block_fill_val(grp_start, value);
+ sw_desc->unmap_src_cnt = 1;
+ sw_desc->unmap_len = len;
+ }
+ spin_unlock_bh(&mv_chan->lock);
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p \n",
+ __func__, sw_desc, &sw_desc->async_tx);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s src_cnt: %d len: dest %x %u flags: %ld\n",
+ __func__, src_cnt, len, dest, flags);
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_xor_slot_count(len, src_cnt);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_XOR;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ /* the byte count field is the same as in memcpy desc*/
+ mv_desc_set_byte_count(grp_start, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, dest);
+ sw_desc->unmap_src_cnt = src_cnt;
+ sw_desc->unmap_len = len;
+ while (src_cnt--)
+ mv_desc_set_src_addr(grp_start, src_cnt, src[src_cnt]);
+ }
+ spin_unlock_bh(&mv_chan->lock);
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p \n",
+ __func__, sw_desc, &sw_desc->async_tx);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static void mv_xor_free_chan_resources(struct dma_chan *chan)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *iter, *_iter;
+ int in_use_descs = 0;
+
+ mv_xor_slot_cleanup(mv_chan);
+
+ spin_lock_bh(&mv_chan->lock);
+ list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+ chain_node) {
+ in_use_descs++;
+ list_del(&iter->chain_node);
+ }
+ list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+ completed_node) {
+ in_use_descs++;
+ list_del(&iter->completed_node);
+ }
+ list_for_each_entry_safe_reverse(
+ iter, _iter, &mv_chan->all_slots, slot_node) {
+ list_del(&iter->slot_node);
+ kfree(iter);
+ mv_chan->slots_allocated--;
+ }
+ mv_chan->last_used = NULL;
+
+ dev_dbg(mv_chan->device->common.dev, "%s slots_allocated %d\n",
+ __func__, mv_chan->slots_allocated);
+ spin_unlock_bh(&mv_chan->lock);
+
+ if (in_use_descs)
+ dev_err(mv_chan->device->common.dev,
+ "freeing %d in use descriptors!\n", in_use_descs);
+}
+
+/**
+ * mv_xor_is_complete - poll the status of an XOR transaction
+ * @chan: XOR channel handle
+ * @cookie: XOR transaction identifier
+ */
+static enum dma_status mv_xor_is_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done,
+ dma_cookie_t *used)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+ enum dma_status ret;
+
+ last_used = chan->cookie;
+ last_complete = mv_chan->completed_cookie;
+ mv_chan->is_complete_cookie = cookie;
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ if (ret == DMA_SUCCESS) {
+ mv_xor_clean_completed_slots(mv_chan);
+ return ret;
+ }
+ mv_xor_slot_cleanup(mv_chan);
+
+ last_used = chan->cookie;
+ last_complete = mv_chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static void mv_dump_xor_regs(struct mv_xor_chan *chan)
+{
+ u32 val;
+
+ val = __raw_readl(XOR_CONFIG(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "config 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ACTIVATION(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "activation 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_INTR_CAUSE(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "intr cause 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_INTR_MASK(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "intr mask 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ERROR_CAUSE(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error cause 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ERROR_ADDR(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error addr 0x%08x.\n", val);
+}
+
+static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
+ u32 intr_cause)
+{
+ if (intr_cause & (1 << 4)) {
+ dev_dbg(chan->device->common.dev,
+ "ignore this error\n");
+ return;
+ }
+
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error on chan %d. intr cause 0x%08x.\n",
+ chan->idx, intr_cause);
+
+ mv_dump_xor_regs(chan);
+ BUG();
+}
+
+static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
+{
+ struct mv_xor_chan *chan = data;
+ u32 intr_cause = mv_chan_get_intr_cause(chan);
+
+ dev_dbg(chan->device->common.dev, "intr cause %x\n", intr_cause);
+
+ if (mv_is_err_intr(intr_cause))
+ mv_xor_err_interrupt_handler(chan, intr_cause);
+
+ tasklet_schedule(&chan->irq_tasklet);
+
+ mv_xor_device_clear_eoc_cause(chan);
+
+ return IRQ_HANDLED;
+}
+
+static void mv_xor_issue_pending(struct dma_chan *chan)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+
+ if (mv_chan->pending >= MV_XOR_THRESHOLD) {
+ mv_chan->pending = 0;
+ mv_chan_activate(mv_chan);
+ }
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+#define MV_XOR_TEST_SIZE 2000
+
+static int __devinit mv_xor_memcpy_self_test(struct mv_xor_device *device)
+{
+ int i;
+ void *src, *dest;
+ dma_addr_t src_dma, dest_dma;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ struct dma_async_tx_descriptor *tx;
+ int err = 0;
+ struct mv_xor_chan *mv_chan;
+
+ src = kmalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+
+ dest = kzalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+ if (!dest) {
+ kfree(src);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffer */
+ for (i = 0; i < MV_XOR_TEST_SIZE; i++)
+ ((u8 *) src)[i] = (u8)i;
+
+ /* Start copy, using first DMA channel */
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ dest_dma = dma_map_single(dma_chan->device->dev, dest,
+ MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+
+ src_dma = dma_map_single(dma_chan->device->dev, src,
+ MV_XOR_TEST_SIZE, DMA_TO_DEVICE);
+
+ tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+ MV_XOR_TEST_SIZE, 0);
+ cookie = mv_xor_tx_submit(tx);
+ mv_xor_issue_pending(dma_chan);
+ async_tx_ack(tx);
+ msleep(1);
+
+ if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test copy timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ mv_chan = to_mv_xor_chan(dma_chan);
+ dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+ MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+ if (memcmp(src, dest, MV_XOR_TEST_SIZE)) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test copy failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ mv_xor_free_chan_resources(dma_chan);
+out:
+ kfree(src);
+ kfree(dest);
+ return err;
+}
+
+#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
+static int __devinit
+mv_xor_xor_self_test(struct mv_xor_device *device)
+{
+ int i, src_idx;
+ struct page *dest;
+ struct page *xor_srcs[MV_XOR_NUM_SRC_TEST];
+ dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST];
+ dma_addr_t dest_dma;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u8 cmp_byte = 0;
+ u32 cmp_word;
+ int err = 0;
+ struct mv_xor_chan *mv_chan;
+
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+ xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+ if (!xor_srcs[src_idx]) {
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+ }
+
+ dest = alloc_page(GFP_KERNEL);
+ if (!dest) {
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffers */
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+ u8 *ptr = page_address(xor_srcs[src_idx]);
+ for (i = 0; i < PAGE_SIZE; i++)
+ ptr[i] = (1 << src_idx);
+ }
+
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++)
+ cmp_byte ^= (u8) (1 << src_idx);
+
+ cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+ (cmp_byte << 8) | cmp_byte;
+
+ memset(page_address(dest), 0, PAGE_SIZE);
+
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* test xor */
+ dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+
+ for (i = 0; i < MV_XOR_NUM_SRC_TEST; i++)
+ dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+ 0, PAGE_SIZE, DMA_TO_DEVICE);
+
+ tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+ MV_XOR_NUM_SRC_TEST, PAGE_SIZE, 0);
+
+ cookie = mv_xor_tx_submit(tx);
+ mv_xor_issue_pending(dma_chan);
+ async_tx_ack(tx);
+ msleep(8);
+
+ if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test xor timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ mv_chan = to_mv_xor_chan(dma_chan);
+ dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i] != cmp_word) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test xor failed compare, disabling."
+ " index %d, data %x, expected %x\n", i,
+ ptr[i], cmp_word);
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+
+free_resources:
+ mv_xor_free_chan_resources(dma_chan);
+out:
+ src_idx = MV_XOR_NUM_SRC_TEST;
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ __free_page(dest);
+ return err;
+}
+
+static int __devexit mv_xor_remove(struct platform_device *dev)
+{
+ struct mv_xor_device *device = platform_get_drvdata(dev);
+ struct dma_chan *chan, *_chan;
+ struct mv_xor_chan *mv_chan;
+ struct mv_xor_platform_data *plat_data = dev->dev.platform_data;
+
+ dma_async_device_unregister(&device->common);
+
+ dma_free_coherent(&dev->dev, plat_data->pool_size,
+ device->dma_desc_pool_virt, device->dma_desc_pool);
+
+ list_for_each_entry_safe(chan, _chan, &device->common.channels,
+ device_node) {
+ mv_chan = to_mv_xor_chan(chan);
+ list_del(&chan->device_node);
+ }
+
+ return 0;
+}
+
+static int __devinit mv_xor_probe(struct platform_device *pdev)
+{
+ int ret = 0;
+ int irq;
+ struct mv_xor_device *adev;
+ struct mv_xor_chan *mv_chan;
+ struct dma_device *dma_dev;
+ struct mv_xor_platform_data *plat_data = pdev->dev.platform_data;
+
+
+ adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL);
+ if (!adev)
+ return -ENOMEM;
+
+ dma_dev = &adev->common;
+
+ /* allocate coherent memory for hardware descriptors
+ * note: writecombine gives slightly better performance, but
+ * requires that we explicitly flush the writes
+ */
+ adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
+ plat_data->pool_size,
+ &adev->dma_desc_pool,
+ GFP_KERNEL);
+ if (!adev->dma_desc_pool_virt)
+ return -ENOMEM;
+
+ adev->id = plat_data->hw_id;
+
+ /* discover transaction capabilites from the platform data */
+ dma_dev->cap_mask = plat_data->cap_mask;
+ adev->pdev = pdev;
+ platform_set_drvdata(pdev, adev);
+
+ adev->shared = platform_get_drvdata(plat_data->shared);
+
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ /* set base routines */
+ dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = mv_xor_free_chan_resources;
+ dma_dev->device_is_tx_complete = mv_xor_is_complete;
+ dma_dev->device_issue_pending = mv_xor_issue_pending;
+ dma_dev->dev = &pdev->dev;
+
+ /* set prep routines based on capability */
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy;
+ if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset;
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ dma_dev->max_xor = 8;
+ dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor;
+ }
+
+ mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL);
+ if (!mv_chan) {
+ ret = -ENOMEM;
+ goto err_free_dma;
+ }
+ mv_chan->device = adev;
+ mv_chan->idx = plat_data->hw_id;
+ mv_chan->mmr_base = adev->shared->xor_base;
+
+ if (!mv_chan->mmr_base) {
+ ret = -ENOMEM;
+ goto err_free_dma;
+ }
+ tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
+ mv_chan);
+
+ /* clear errors before enabling interrupts */
+ mv_xor_device_clear_err_status(mv_chan);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ ret = irq;
+ goto err_free_dma;
+ }
+ ret = devm_request_irq(&pdev->dev, irq,
+ mv_xor_interrupt_handler,
+ 0, dev_name(&pdev->dev), mv_chan);
+ if (ret)
+ goto err_free_dma;
+
+ mv_chan_unmask_interrupts(mv_chan);
+
+ mv_set_mode(mv_chan, DMA_MEMCPY);
+
+ spin_lock_init(&mv_chan->lock);
+ INIT_LIST_HEAD(&mv_chan->chain);
+ INIT_LIST_HEAD(&mv_chan->completed_slots);
+ INIT_LIST_HEAD(&mv_chan->all_slots);
+ mv_chan->common.device = dma_dev;
+
+ list_add_tail(&mv_chan->common.device_node, &dma_dev->channels);
+
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+ ret = mv_xor_memcpy_self_test(adev);
+ dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+ if (ret)
+ goto err_free_dma;
+ }
+
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ ret = mv_xor_xor_self_test(adev);
+ dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+ if (ret)
+ goto err_free_dma;
+ }
+
+ dev_printk(KERN_INFO, &pdev->dev, "Marvell XOR: "
+ "( %s%s%s%s)\n",
+ dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
+ dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
+ dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
+
+ dma_async_device_register(dma_dev);
+ goto out;
+
+ err_free_dma:
+ dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
+ adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ out:
+ return ret;
+}
+
+static void
+mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
+ struct mbus_dram_target_info *dram)
+{
+ void __iomem *base = msp->xor_base;
+ u32 win_enable = 0;
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ writel(0, base + WINDOW_BASE(i));
+ writel(0, base + WINDOW_SIZE(i));
+ if (i < 4)
+ writel(0, base + WINDOW_REMAP_HIGH(i));
+ }
+
+ for (i = 0; i < dram->num_cs; i++) {
+ struct mbus_dram_window *cs = dram->cs + i;
+
+ writel((cs->base & 0xffff0000) |
+ (cs->mbus_attr << 8) |
+ dram->mbus_dram_target_id, base + WINDOW_BASE(i));
+ writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
+
+ win_enable |= (1 << i);
+ win_enable |= 3 << (16 + (2 * i));
+ }
+
+ writel(win_enable, base + WINDOW_BAR_ENABLE(0));
+ writel(win_enable, base + WINDOW_BAR_ENABLE(1));
+}
+
+static struct platform_driver mv_xor_driver = {
+ .probe = mv_xor_probe,
+ .remove = __devexit_p(mv_xor_remove),
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = MV_XOR_NAME,
+ },
+};
+
+static int mv_xor_shared_probe(struct platform_device *pdev)
+{
+ struct mv_xor_platform_shared_data *msd = pdev->dev.platform_data;
+ struct mv_xor_shared_private *msp;
+ struct resource *res;
+
+ dev_printk(KERN_NOTICE, &pdev->dev, "Marvell shared XOR driver\n");
+
+ msp = devm_kzalloc(&pdev->dev, sizeof(*msp), GFP_KERNEL);
+ if (!msp)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ msp->xor_base = devm_ioremap(&pdev->dev, res->start,
+ res->end - res->start + 1);
+ if (!msp->xor_base)
+ return -EBUSY;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!res)
+ return -ENODEV;
+
+ msp->xor_high_base = devm_ioremap(&pdev->dev, res->start,
+ res->end - res->start + 1);
+ if (!msp->xor_high_base)
+ return -EBUSY;
+
+ platform_set_drvdata(pdev, msp);
+
+ /*
+ * (Re-)program MBUS remapping windows if we are asked to.
+ */
+ if (msd != NULL && msd->dram != NULL)
+ mv_xor_conf_mbus_windows(msp, msd->dram);
+
+ return 0;
+}
+
+static int mv_xor_shared_remove(struct platform_device *pdev)
+{
+ return 0;
+}
+
+static struct platform_driver mv_xor_shared_driver = {
+ .probe = mv_xor_shared_probe,
+ .remove = mv_xor_shared_remove,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = MV_XOR_SHARED_NAME,
+ },
+};
+
+
+static int __init mv_xor_init(void)
+{
+ int rc;
+
+ rc = platform_driver_register(&mv_xor_shared_driver);
+ if (!rc) {
+ rc = platform_driver_register(&mv_xor_driver);
+ if (rc)
+ platform_driver_unregister(&mv_xor_shared_driver);
+ }
+ return rc;
+}
+module_init(mv_xor_init);
+
+/* it's currently unsafe to unload this module */
+#if 0
+static void __exit mv_xor_exit(void)
+{
+ platform_driver_unregister(&mv_xor_driver);
+ platform_driver_unregister(&mv_xor_shared_driver);
+ return;
+}
+
+module_exit(mv_xor_exit);
+#endif
+
+MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>");
+MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
new file mode 100644
index 00000000000..977b592e976
--- /dev/null
+++ b/drivers/dma/mv_xor.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MV_XOR_H
+#define MV_XOR_H
+
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+#define USE_TIMER
+#define MV_XOR_SLOT_SIZE 64
+#define MV_XOR_THRESHOLD 1
+
+#define XOR_OPERATION_MODE_XOR 0
+#define XOR_OPERATION_MODE_MEMCPY 2
+#define XOR_OPERATION_MODE_MEMSET 4
+
+#define XOR_CURR_DESC(chan) (chan->mmr_base + 0x210 + (chan->idx * 4))
+#define XOR_NEXT_DESC(chan) (chan->mmr_base + 0x200 + (chan->idx * 4))
+#define XOR_BYTE_COUNT(chan) (chan->mmr_base + 0x220 + (chan->idx * 4))
+#define XOR_DEST_POINTER(chan) (chan->mmr_base + 0x2B0 + (chan->idx * 4))
+#define XOR_BLOCK_SIZE(chan) (chan->mmr_base + 0x2C0 + (chan->idx * 4))
+#define XOR_INIT_VALUE_LOW(chan) (chan->mmr_base + 0x2E0)
+#define XOR_INIT_VALUE_HIGH(chan) (chan->mmr_base + 0x2E4)
+
+#define XOR_CONFIG(chan) (chan->mmr_base + 0x10 + (chan->idx * 4))
+#define XOR_ACTIVATION(chan) (chan->mmr_base + 0x20 + (chan->idx * 4))
+#define XOR_INTR_CAUSE(chan) (chan->mmr_base + 0x30)
+#define XOR_INTR_MASK(chan) (chan->mmr_base + 0x40)
+#define XOR_ERROR_CAUSE(chan) (chan->mmr_base + 0x50)
+#define XOR_ERROR_ADDR(chan) (chan->mmr_base + 0x60)
+#define XOR_INTR_MASK_VALUE 0x3F5
+
+#define WINDOW_BASE(w) (0x250 + ((w) << 2))
+#define WINDOW_SIZE(w) (0x270 + ((w) << 2))
+#define WINDOW_REMAP_HIGH(w) (0x290 + ((w) << 2))
+#define WINDOW_BAR_ENABLE(chan) (0x240 + ((chan) << 2))
+
+struct mv_xor_shared_private {
+ void __iomem *xor_base;
+ void __iomem *xor_high_base;
+};
+
+
+/**
+ * struct mv_xor_device - internal representation of a XOR device
+ * @pdev: Platform device
+ * @id: HW XOR Device selector
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
+ * @common: embedded struct dma_device
+ */
+struct mv_xor_device {
+ struct platform_device *pdev;
+ int id;
+ dma_addr_t dma_desc_pool;
+ void *dma_desc_pool_virt;
+ struct dma_device common;
+ struct mv_xor_shared_private *shared;
+};
+
+/**
+ * struct mv_xor_chan - internal representation of a XOR channel
+ * @pending: allows batching of hardware operations
+ * @completed_cookie: identifier for the most recently completed operation
+ * @lock: serializes enqueue/dequeue operations to the descriptors pool
+ * @mmr_base: memory mapped register base
+ * @idx: the index of the xor channel
+ * @chain: device chain view of the descriptors
+ * @completed_slots: slots completed by HW but still need to be acked
+ * @device: parent device
+ * @common: common dmaengine channel object members
+ * @last_used: place holder for allocation to continue from where it left off
+ * @all_slots: complete domain of slots usable by the channel
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs
+ */
+struct mv_xor_chan {
+ int pending;
+ dma_cookie_t completed_cookie;
+ spinlock_t lock; /* protects the descriptor slot pool */
+ void __iomem *mmr_base;
+ unsigned int idx;
+ enum dma_transaction_type current_type;
+ struct list_head chain;
+ struct list_head completed_slots;
+ struct mv_xor_device *device;
+ struct dma_chan common;
+ struct mv_xor_desc_slot *last_used;
+ struct list_head all_slots;
+ int slots_allocated;
+ struct tasklet_struct irq_tasklet;
+#ifdef USE_TIMER
+ unsigned long cleanup_time;
+ u32 current_on_last_cleanup;
+ dma_cookie_t is_complete_cookie;
+#endif
+};
+
+/**
+ * struct mv_xor_desc_slot - software descriptor
+ * @slot_node: node on the mv_xor_chan.all_slots list
+ * @chain_node: node on the mv_xor_chan.chain list
+ * @completed_node: node on the mv_xor_chan.completed_slots list
+ * @hw_desc: virtual address of the hardware descriptor chain
+ * @phys: hardware address of the hardware descriptor chain
+ * @group_head: first operation in a transaction
+ * @slot_cnt: total slots used in an transaction (group of operations)
+ * @slots_per_op: number of slots per operation
+ * @idx: pool index
+ * @unmap_src_cnt: number of xor sources
+ * @unmap_len: transaction bytecount
+ * @tx_list: list of slots that make up a multi-descriptor transaction
+ * @async_tx: support for the async_tx api
+ * @xor_check_result: result of zero sum
+ * @crc32_result: result crc calculation
+ */
+struct mv_xor_desc_slot {
+ struct list_head slot_node;
+ struct list_head chain_node;
+ struct list_head completed_node;
+ enum dma_transaction_type type;
+ void *hw_desc;
+ struct mv_xor_desc_slot *group_head;
+ u16 slot_cnt;
+ u16 slots_per_op;
+ u16 idx;
+ u16 unmap_src_cnt;
+ u32 value;
+ size_t unmap_len;
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor async_tx;
+ union {
+ u32 *xor_check_result;
+ u32 *crc32_result;
+ };
+#ifdef USE_TIMER
+ unsigned long arrival_time;
+ struct timer_list timeout;
+#endif
+};
+
+/* This structure describes XOR descriptor size 64bytes */
+struct mv_xor_desc {
+ u32 status; /* descriptor execution status */
+ u32 crc32_result; /* result of CRC-32 calculation */
+ u32 desc_command; /* type of operation to be carried out */
+ u32 phy_next_desc; /* next descriptor address pointer */
+ u32 byte_count; /* size of src/dst blocks in bytes */
+ u32 phy_dest_addr; /* destination block address */
+ u32 phy_src_addr[8]; /* source block addresses */
+ u32 reserved0;
+ u32 reserved1;
+};
+
+#define to_mv_sw_desc(addr_hw_desc) \
+ container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc)
+
+#define mv_hw_desc_slot_idx(hw_desc, idx) \
+ ((void *)(((unsigned long)hw_desc) + ((idx) << 5)))
+
+#define MV_XOR_MIN_BYTE_COUNT (128)
+#define XOR_MAX_BYTE_COUNT ((16 * 1024 * 1024) - 1)
+#define MV_XOR_MAX_BYTE_COUNT XOR_MAX_BYTE_COUNT
+
+
+#endif
diff --git a/drivers/dma/ppc4xx/Makefile b/drivers/dma/ppc4xx/Makefile
new file mode 100644
index 00000000000..b3d259b3e52
--- /dev/null
+++ b/drivers/dma/ppc4xx/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += adma.o
diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c
new file mode 100644
index 00000000000..0a3478e910f
--- /dev/null
+++ b/drivers/dma/ppc4xx/adma.c
@@ -0,0 +1,5027 @@
+/*
+ * Copyright (C) 2006-2009 DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * Further porting to arch/powerpc by
+ * Anatolij Gustschin <agust@denx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This driver supports the asynchrounous DMA copy and RAID engines available
+ * on the AMCC PPC440SPe Processors.
+ * Based on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
+ * ADMA driver written by D.Williams.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/async_tx.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include "adma.h"
+
+enum ppc_adma_init_code {
+ PPC_ADMA_INIT_OK = 0,
+ PPC_ADMA_INIT_MEMRES,
+ PPC_ADMA_INIT_MEMREG,
+ PPC_ADMA_INIT_ALLOC,
+ PPC_ADMA_INIT_COHERENT,
+ PPC_ADMA_INIT_CHANNEL,
+ PPC_ADMA_INIT_IRQ1,
+ PPC_ADMA_INIT_IRQ2,
+ PPC_ADMA_INIT_REGISTER
+};
+
+static char *ppc_adma_errors[] = {
+ [PPC_ADMA_INIT_OK] = "ok",
+ [PPC_ADMA_INIT_MEMRES] = "failed to get memory resource",
+ [PPC_ADMA_INIT_MEMREG] = "failed to request memory region",
+ [PPC_ADMA_INIT_ALLOC] = "failed to allocate memory for adev "
+ "structure",
+ [PPC_ADMA_INIT_COHERENT] = "failed to allocate coherent memory for "
+ "hardware descriptors",
+ [PPC_ADMA_INIT_CHANNEL] = "failed to allocate memory for channel",
+ [PPC_ADMA_INIT_IRQ1] = "failed to request first irq",
+ [PPC_ADMA_INIT_IRQ2] = "failed to request second irq",
+ [PPC_ADMA_INIT_REGISTER] = "failed to register dma async device",
+};
+
+static enum ppc_adma_init_code
+ppc440spe_adma_devices[PPC440SPE_ADMA_ENGINES_NUM];
+
+struct ppc_dma_chan_ref {
+ struct dma_chan *chan;
+ struct list_head node;
+};
+
+/* The list of channels exported by ppc440spe ADMA */
+struct list_head
+ppc440spe_adma_chan_list = LIST_HEAD_INIT(ppc440spe_adma_chan_list);
+
+/* This flag is set when want to refetch the xor chain in the interrupt
+ * handler
+ */
+static u32 do_xor_refetch;
+
+/* Pointer to DMA0, DMA1 CP/CS FIFO */
+static void *ppc440spe_dma_fifo_buf;
+
+/* Pointers to last submitted to DMA0, DMA1 CDBs */
+static struct ppc440spe_adma_desc_slot *chan_last_sub[3];
+static struct ppc440spe_adma_desc_slot *chan_first_cdb[3];
+
+/* Pointer to last linked and submitted xor CB */
+static struct ppc440spe_adma_desc_slot *xor_last_linked;
+static struct ppc440spe_adma_desc_slot *xor_last_submit;
+
+/* This array is used in data-check operations for storing a pattern */
+static char ppc440spe_qword[16];
+
+static atomic_t ppc440spe_adma_err_irq_ref;
+static dcr_host_t ppc440spe_mq_dcr_host;
+static unsigned int ppc440spe_mq_dcr_len;
+
+/* Since RXOR operations use the common register (MQ0_CF2H) for setting-up
+ * the block size in transactions, then we do not allow to activate more than
+ * only one RXOR transactions simultaneously. So use this var to store
+ * the information about is RXOR currently active (PPC440SPE_RXOR_RUN bit is
+ * set) or not (PPC440SPE_RXOR_RUN is clear).
+ */
+static unsigned long ppc440spe_rxor_state;
+
+/* These are used in enable & check routines
+ */
+static u32 ppc440spe_r6_enabled;
+static struct ppc440spe_adma_chan *ppc440spe_r6_tchan;
+static struct completion ppc440spe_r6_test_comp;
+
+static int ppc440spe_adma_dma2rxor_prep_src(
+ struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_rxor *cursor, int index,
+ int src_cnt, u32 addr);
+static void ppc440spe_adma_dma2rxor_set_src(
+ struct ppc440spe_adma_desc_slot *desc,
+ int index, dma_addr_t addr);
+static void ppc440spe_adma_dma2rxor_set_mult(
+ struct ppc440spe_adma_desc_slot *desc,
+ int index, u8 mult);
+
+#ifdef ADMA_LL_DEBUG
+#define ADMA_LL_DBG(x) ({ if (1) x; 0; })
+#else
+#define ADMA_LL_DBG(x) ({ if (0) x; 0; })
+#endif
+
+static void print_cb(struct ppc440spe_adma_chan *chan, void *block)
+{
+ struct dma_cdb *cdb;
+ struct xor_cb *cb;
+ int i;
+
+ switch (chan->device->id) {
+ case 0:
+ case 1:
+ cdb = block;
+
+ pr_debug("CDB at %p [%d]:\n"
+ "\t attr 0x%02x opc 0x%02x cnt 0x%08x\n"
+ "\t sg1u 0x%08x sg1l 0x%08x\n"
+ "\t sg2u 0x%08x sg2l 0x%08x\n"
+ "\t sg3u 0x%08x sg3l 0x%08x\n",
+ cdb, chan->device->id,
+ cdb->attr, cdb->opc, le32_to_cpu(cdb->cnt),
+ le32_to_cpu(cdb->sg1u), le32_to_cpu(cdb->sg1l),
+ le32_to_cpu(cdb->sg2u), le32_to_cpu(cdb->sg2l),
+ le32_to_cpu(cdb->sg3u), le32_to_cpu(cdb->sg3l)
+ );
+ break;
+ case 2:
+ cb = block;
+
+ pr_debug("CB at %p [%d]:\n"
+ "\t cbc 0x%08x cbbc 0x%08x cbs 0x%08x\n"
+ "\t cbtah 0x%08x cbtal 0x%08x\n"
+ "\t cblah 0x%08x cblal 0x%08x\n",
+ cb, chan->device->id,
+ cb->cbc, cb->cbbc, cb->cbs,
+ cb->cbtah, cb->cbtal,
+ cb->cblah, cb->cblal);
+ for (i = 0; i < 16; i++) {
+ if (i && !cb->ops[i].h && !cb->ops[i].l)
+ continue;
+ pr_debug("\t ops[%2d]: h 0x%08x l 0x%08x\n",
+ i, cb->ops[i].h, cb->ops[i].l);
+ }
+ break;
+ }
+}
+
+static void print_cb_list(struct ppc440spe_adma_chan *chan,
+ struct ppc440spe_adma_desc_slot *iter)
+{
+ for (; iter; iter = iter->hw_next)
+ print_cb(chan, iter->hw_desc);
+}
+
+static void prep_dma_xor_dbg(int id, dma_addr_t dst, dma_addr_t *src,
+ unsigned int src_cnt)
+{
+ int i;
+
+ pr_debug("\n%s(%d):\nsrc: ", __func__, id);
+ for (i = 0; i < src_cnt; i++)
+ pr_debug("\t0x%016llx ", src[i]);
+ pr_debug("dst:\n\t0x%016llx\n", dst);
+}
+
+static void prep_dma_pq_dbg(int id, dma_addr_t *dst, dma_addr_t *src,
+ unsigned int src_cnt)
+{
+ int i;
+
+ pr_debug("\n%s(%d):\nsrc: ", __func__, id);
+ for (i = 0; i < src_cnt; i++)
+ pr_debug("\t0x%016llx ", src[i]);
+ pr_debug("dst: ");
+ for (i = 0; i < 2; i++)
+ pr_debug("\t0x%016llx ", dst[i]);
+}
+
+static void prep_dma_pqzero_sum_dbg(int id, dma_addr_t *src,
+ unsigned int src_cnt,
+ const unsigned char *scf)
+{
+ int i;
+
+ pr_debug("\n%s(%d):\nsrc(coef): ", __func__, id);
+ if (scf) {
+ for (i = 0; i < src_cnt; i++)
+ pr_debug("\t0x%016llx(0x%02x) ", src[i], scf[i]);
+ } else {
+ for (i = 0; i < src_cnt; i++)
+ pr_debug("\t0x%016llx(no) ", src[i]);
+ }
+
+ pr_debug("dst: ");
+ for (i = 0; i < 2; i++)
+ pr_debug("\t0x%016llx ", src[src_cnt + i]);
+}
+
+/******************************************************************************
+ * Command (Descriptor) Blocks low-level routines
+ ******************************************************************************/
+/**
+ * ppc440spe_desc_init_interrupt - initialize the descriptor for INTERRUPT
+ * pseudo operation
+ */
+static void ppc440spe_desc_init_interrupt(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan)
+{
+ struct xor_cb *p;
+
+ switch (chan->device->id) {
+ case PPC440SPE_XOR_ID:
+ p = desc->hw_desc;
+ memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+ /* NOP with Command Block Complete Enable */
+ p->cbc = XOR_CBCR_CBCE_BIT;
+ break;
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
+ /* NOP with interrupt */
+ set_bit(PPC440SPE_DESC_INT, &desc->flags);
+ break;
+ default:
+ printk(KERN_ERR "Unsupported id %d in %s\n", chan->device->id,
+ __func__);
+ break;
+ }
+}
+
+/**
+ * ppc440spe_desc_init_null_xor - initialize the descriptor for NULL XOR
+ * pseudo operation
+ */
+static void ppc440spe_desc_init_null_xor(struct ppc440spe_adma_desc_slot *desc)
+{
+ memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+ desc->hw_next = NULL;
+ desc->src_cnt = 0;
+ desc->dst_cnt = 1;
+}
+
+/**
+ * ppc440spe_desc_init_xor - initialize the descriptor for XOR operation
+ */
+static void ppc440spe_desc_init_xor(struct ppc440spe_adma_desc_slot *desc,
+ int src_cnt, unsigned long flags)
+{
+ struct xor_cb *hw_desc = desc->hw_desc;
+
+ memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+ desc->hw_next = NULL;
+ desc->src_cnt = src_cnt;
+ desc->dst_cnt = 1;
+
+ hw_desc->cbc = XOR_CBCR_TGT_BIT | src_cnt;
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Enable interrupt on completion */
+ hw_desc->cbc |= XOR_CBCR_CBCE_BIT;
+}
+
+/**
+ * ppc440spe_desc_init_dma2pq - initialize the descriptor for PQ
+ * operation in DMA2 controller
+ */
+static void ppc440spe_desc_init_dma2pq(struct ppc440spe_adma_desc_slot *desc,
+ int dst_cnt, int src_cnt, unsigned long flags)
+{
+ struct xor_cb *hw_desc = desc->hw_desc;
+
+ memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+ desc->hw_next = NULL;
+ desc->src_cnt = src_cnt;
+ desc->dst_cnt = dst_cnt;
+ memset(desc->reverse_flags, 0, sizeof(desc->reverse_flags));
+ desc->descs_per_op = 0;
+
+ hw_desc->cbc = XOR_CBCR_TGT_BIT;
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Enable interrupt on completion */
+ hw_desc->cbc |= XOR_CBCR_CBCE_BIT;
+}
+
+#define DMA_CTRL_FLAGS_LAST DMA_PREP_FENCE
+#define DMA_PREP_ZERO_P (DMA_CTRL_FLAGS_LAST << 1)
+#define DMA_PREP_ZERO_Q (DMA_PREP_ZERO_P << 1)
+
+/**
+ * ppc440spe_desc_init_dma01pq - initialize the descriptors for PQ operation
+ * with DMA0/1
+ */
+static void ppc440spe_desc_init_dma01pq(struct ppc440spe_adma_desc_slot *desc,
+ int dst_cnt, int src_cnt, unsigned long flags,
+ unsigned long op)
+{
+ struct dma_cdb *hw_desc;
+ struct ppc440spe_adma_desc_slot *iter;
+ u8 dopc;
+
+ /* Common initialization of a PQ descriptors chain */
+ set_bits(op, &desc->flags);
+ desc->src_cnt = src_cnt;
+ desc->dst_cnt = dst_cnt;
+
+ /* WXOR MULTICAST if both P and Q are being computed
+ * MV_SG1_SG2 if Q only
+ */
+ dopc = (desc->dst_cnt == DMA_DEST_MAX_NUM) ?
+ DMA_CDB_OPC_MULTICAST : DMA_CDB_OPC_MV_SG1_SG2;
+
+ list_for_each_entry(iter, &desc->group_list, chain_node) {
+ hw_desc = iter->hw_desc;
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+
+ if (likely(!list_is_last(&iter->chain_node,
+ &desc->group_list))) {
+ /* set 'next' pointer */
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot, chain_node);
+ clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+ } else {
+ /* this is the last descriptor.
+ * this slot will be pasted from ADMA level
+ * each time it wants to configure parameters
+ * of the transaction (src, dst, ...)
+ */
+ iter->hw_next = NULL;
+ if (flags & DMA_PREP_INTERRUPT)
+ set_bit(PPC440SPE_DESC_INT, &iter->flags);
+ else
+ clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+ }
+ }
+
+ /* Set OPS depending on WXOR/RXOR type of operation */
+ if (!test_bit(PPC440SPE_DESC_RXOR, &desc->flags)) {
+ /* This is a WXOR only chain:
+ * - first descriptors are for zeroing destinations
+ * if PPC440SPE_ZERO_P/Q set;
+ * - descriptors remained are for GF-XOR operations.
+ */
+ iter = list_first_entry(&desc->group_list,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+
+ if (test_bit(PPC440SPE_ZERO_P, &desc->flags)) {
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ iter = list_first_entry(&iter->chain_node,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ }
+
+ if (test_bit(PPC440SPE_ZERO_Q, &desc->flags)) {
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ iter = list_first_entry(&iter->chain_node,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ }
+
+ list_for_each_entry_from(iter, &desc->group_list, chain_node) {
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = dopc;
+ }
+ } else {
+ /* This is either RXOR-only or mixed RXOR/WXOR */
+
+ /* The first 1 or 2 slots in chain are always RXOR,
+ * if need to calculate P & Q, then there are two
+ * RXOR slots; if only P or only Q, then there is one
+ */
+ iter = list_first_entry(&desc->group_list,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+
+ if (desc->dst_cnt == DMA_DEST_MAX_NUM) {
+ iter = list_first_entry(&iter->chain_node,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ }
+
+ /* The remaining descs (if any) are WXORs */
+ if (test_bit(PPC440SPE_DESC_WXOR, &desc->flags)) {
+ iter = list_first_entry(&iter->chain_node,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ list_for_each_entry_from(iter, &desc->group_list,
+ chain_node) {
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = dopc;
+ }
+ }
+ }
+}
+
+/**
+ * ppc440spe_desc_init_dma01pqzero_sum - initialize the descriptor
+ * for PQ_ZERO_SUM operation
+ */
+static void ppc440spe_desc_init_dma01pqzero_sum(
+ struct ppc440spe_adma_desc_slot *desc,
+ int dst_cnt, int src_cnt)
+{
+ struct dma_cdb *hw_desc;
+ struct ppc440spe_adma_desc_slot *iter;
+ int i = 0;
+ u8 dopc = (dst_cnt == 2) ? DMA_CDB_OPC_MULTICAST :
+ DMA_CDB_OPC_MV_SG1_SG2;
+ /*
+ * Initialize starting from 2nd or 3rd descriptor dependent
+ * on dst_cnt. First one or two slots are for cloning P
+ * and/or Q to chan->pdest and/or chan->qdest as we have
+ * to preserve original P/Q.
+ */
+ iter = list_first_entry(&desc->group_list,
+ struct ppc440spe_adma_desc_slot, chain_node);
+ iter = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot, chain_node);
+
+ if (dst_cnt > 1) {
+ iter = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot, chain_node);
+ }
+ /* initialize each source descriptor in chain */
+ list_for_each_entry_from(iter, &desc->group_list, chain_node) {
+ hw_desc = iter->hw_desc;
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ iter->src_cnt = 0;
+ iter->dst_cnt = 0;
+
+ /* This is a ZERO_SUM operation:
+ * - <src_cnt> descriptors starting from 2nd or 3rd
+ * descriptor are for GF-XOR operations;
+ * - remaining <dst_cnt> descriptors are for checking the result
+ */
+ if (i++ < src_cnt)
+ /* MV_SG1_SG2 if only Q is being verified
+ * MULTICAST if both P and Q are being verified
+ */
+ hw_desc->opc = dopc;
+ else
+ /* DMA_CDB_OPC_DCHECK128 operation */
+ hw_desc->opc = DMA_CDB_OPC_DCHECK128;
+
+ if (likely(!list_is_last(&iter->chain_node,
+ &desc->group_list))) {
+ /* set 'next' pointer */
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ } else {
+ /* this is the last descriptor.
+ * this slot will be pasted from ADMA level
+ * each time it wants to configure parameters
+ * of the transaction (src, dst, ...)
+ */
+ iter->hw_next = NULL;
+ /* always enable interrupt generation since we get
+ * the status of pqzero from the handler
+ */
+ set_bit(PPC440SPE_DESC_INT, &iter->flags);
+ }
+ }
+ desc->src_cnt = src_cnt;
+ desc->dst_cnt = dst_cnt;
+}
+
+/**
+ * ppc440spe_desc_init_memcpy - initialize the descriptor for MEMCPY operation
+ */
+static void ppc440spe_desc_init_memcpy(struct ppc440spe_adma_desc_slot *desc,
+ unsigned long flags)
+{
+ struct dma_cdb *hw_desc = desc->hw_desc;
+
+ memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
+ desc->hw_next = NULL;
+ desc->src_cnt = 1;
+ desc->dst_cnt = 1;
+
+ if (flags & DMA_PREP_INTERRUPT)
+ set_bit(PPC440SPE_DESC_INT, &desc->flags);
+ else
+ clear_bit(PPC440SPE_DESC_INT, &desc->flags);
+
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+}
+
+/**
+ * ppc440spe_desc_init_memset - initialize the descriptor for MEMSET operation
+ */
+static void ppc440spe_desc_init_memset(struct ppc440spe_adma_desc_slot *desc,
+ int value, unsigned long flags)
+{
+ struct dma_cdb *hw_desc = desc->hw_desc;
+
+ memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
+ desc->hw_next = NULL;
+ desc->src_cnt = 1;
+ desc->dst_cnt = 1;
+
+ if (flags & DMA_PREP_INTERRUPT)
+ set_bit(PPC440SPE_DESC_INT, &desc->flags);
+ else
+ clear_bit(PPC440SPE_DESC_INT, &desc->flags);
+
+ hw_desc->sg1u = hw_desc->sg1l = cpu_to_le32((u32)value);
+ hw_desc->sg3u = hw_desc->sg3l = cpu_to_le32((u32)value);
+ hw_desc->opc = DMA_CDB_OPC_DFILL128;
+}
+
+/**
+ * ppc440spe_desc_set_src_addr - set source address into the descriptor
+ */
+static void ppc440spe_desc_set_src_addr(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan,
+ int src_idx, dma_addr_t addrh,
+ dma_addr_t addrl)
+{
+ struct dma_cdb *dma_hw_desc;
+ struct xor_cb *xor_hw_desc;
+ phys_addr_t addr64, tmplow, tmphi;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ if (!addrh) {
+ addr64 = addrl;
+ tmphi = (addr64 >> 32);
+ tmplow = (addr64 & 0xFFFFFFFF);
+ } else {
+ tmphi = addrh;
+ tmplow = addrl;
+ }
+ dma_hw_desc = desc->hw_desc;
+ dma_hw_desc->sg1l = cpu_to_le32((u32)tmplow);
+ dma_hw_desc->sg1u |= cpu_to_le32((u32)tmphi);
+ break;
+ case PPC440SPE_XOR_ID:
+ xor_hw_desc = desc->hw_desc;
+ xor_hw_desc->ops[src_idx].l = addrl;
+ xor_hw_desc->ops[src_idx].h |= addrh;
+ break;
+ }
+}
+
+/**
+ * ppc440spe_desc_set_src_mult - set source address mult into the descriptor
+ */
+static void ppc440spe_desc_set_src_mult(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan, u32 mult_index,
+ int sg_index, unsigned char mult_value)
+{
+ struct dma_cdb *dma_hw_desc;
+ struct xor_cb *xor_hw_desc;
+ u32 *psgu;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_hw_desc = desc->hw_desc;
+
+ switch (sg_index) {
+ /* for RXOR operations set multiplier
+ * into source cued address
+ */
+ case DMA_CDB_SG_SRC:
+ psgu = &dma_hw_desc->sg1u;
+ break;
+ /* for WXOR operations set multiplier
+ * into destination cued address(es)
+ */
+ case DMA_CDB_SG_DST1:
+ psgu = &dma_hw_desc->sg2u;
+ break;
+ case DMA_CDB_SG_DST2:
+ psgu = &dma_hw_desc->sg3u;
+ break;
+ default:
+ BUG();
+ }
+
+ *psgu |= cpu_to_le32(mult_value << mult_index);
+ break;
+ case PPC440SPE_XOR_ID:
+ xor_hw_desc = desc->hw_desc;
+ break;
+ default:
+ BUG();
+ }
+}
+
+/**
+ * ppc440spe_desc_set_dest_addr - set destination address into the descriptor
+ */
+static void ppc440spe_desc_set_dest_addr(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan,
+ dma_addr_t addrh, dma_addr_t addrl,
+ u32 dst_idx)
+{
+ struct dma_cdb *dma_hw_desc;
+ struct xor_cb *xor_hw_desc;
+ phys_addr_t addr64, tmphi, tmplow;
+ u32 *psgu, *psgl;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ if (!addrh) {
+ addr64 = addrl;
+ tmphi = (addr64 >> 32);
+ tmplow = (addr64 & 0xFFFFFFFF);
+ } else {
+ tmphi = addrh;
+ tmplow = addrl;
+ }
+ dma_hw_desc = desc->hw_desc;
+
+ psgu = dst_idx ? &dma_hw_desc->sg3u : &dma_hw_desc->sg2u;
+ psgl = dst_idx ? &dma_hw_desc->sg3l : &dma_hw_desc->sg2l;
+
+ *psgl = cpu_to_le32((u32)tmplow);
+ *psgu |= cpu_to_le32((u32)tmphi);
+ break;
+ case PPC440SPE_XOR_ID:
+ xor_hw_desc = desc->hw_desc;
+ xor_hw_desc->cbtal = addrl;
+ xor_hw_desc->cbtah |= addrh;
+ break;
+ }
+}
+
+/**
+ * ppc440spe_desc_set_byte_count - set number of data bytes involved
+ * into the operation
+ */
+static void ppc440spe_desc_set_byte_count(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan,
+ u32 byte_count)
+{
+ struct dma_cdb *dma_hw_desc;
+ struct xor_cb *xor_hw_desc;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_hw_desc = desc->hw_desc;
+ dma_hw_desc->cnt = cpu_to_le32(byte_count);
+ break;
+ case PPC440SPE_XOR_ID:
+ xor_hw_desc = desc->hw_desc;
+ xor_hw_desc->cbbc = byte_count;
+ break;
+ }
+}
+
+/**
+ * ppc440spe_desc_set_rxor_block_size - set RXOR block size
+ */
+static inline void ppc440spe_desc_set_rxor_block_size(u32 byte_count)
+{
+ /* assume that byte_count is aligned on the 512-boundary;
+ * thus write it directly to the register (bits 23:31 are
+ * reserved there).
+ */
+ dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CF2H, byte_count);
+}
+
+/**
+ * ppc440spe_desc_set_dcheck - set CHECK pattern
+ */
+static void ppc440spe_desc_set_dcheck(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan, u8 *qword)
+{
+ struct dma_cdb *dma_hw_desc;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_hw_desc = desc->hw_desc;
+ iowrite32(qword[0], &dma_hw_desc->sg3l);
+ iowrite32(qword[4], &dma_hw_desc->sg3u);
+ iowrite32(qword[8], &dma_hw_desc->sg2l);
+ iowrite32(qword[12], &dma_hw_desc->sg2u);
+ break;
+ default:
+ BUG();
+ }
+}
+
+/**
+ * ppc440spe_xor_set_link - set link address in xor CB
+ */
+static void ppc440spe_xor_set_link(struct ppc440spe_adma_desc_slot *prev_desc,
+ struct ppc440spe_adma_desc_slot *next_desc)
+{
+ struct xor_cb *xor_hw_desc = prev_desc->hw_desc;
+
+ if (unlikely(!next_desc || !(next_desc->phys))) {
+ printk(KERN_ERR "%s: next_desc=0x%p; next_desc->phys=0x%llx\n",
+ __func__, next_desc,
+ next_desc ? next_desc->phys : 0);
+ BUG();
+ }
+
+ xor_hw_desc->cbs = 0;
+ xor_hw_desc->cblal = next_desc->phys;
+ xor_hw_desc->cblah = 0;
+ xor_hw_desc->cbc |= XOR_CBCR_LNK_BIT;
+}
+
+/**
+ * ppc440spe_desc_set_link - set the address of descriptor following this
+ * descriptor in chain
+ */
+static void ppc440spe_desc_set_link(struct ppc440spe_adma_chan *chan,
+ struct ppc440spe_adma_desc_slot *prev_desc,
+ struct ppc440spe_adma_desc_slot *next_desc)
+{
+ unsigned long flags;
+ struct ppc440spe_adma_desc_slot *tail = next_desc;
+
+ if (unlikely(!prev_desc || !next_desc ||
+ (prev_desc->hw_next && prev_desc->hw_next != next_desc))) {
+ /* If previous next is overwritten something is wrong.
+ * though we may refetch from append to initiate list
+ * processing; in this case - it's ok.
+ */
+ printk(KERN_ERR "%s: prev_desc=0x%p; next_desc=0x%p; "
+ "prev->hw_next=0x%p\n", __func__, prev_desc,
+ next_desc, prev_desc ? prev_desc->hw_next : 0);
+ BUG();
+ }
+
+ local_irq_save(flags);
+
+ /* do s/w chaining both for DMA and XOR descriptors */
+ prev_desc->hw_next = next_desc;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ break;
+ case PPC440SPE_XOR_ID:
+ /* bind descriptor to the chain */
+ while (tail->hw_next)
+ tail = tail->hw_next;
+ xor_last_linked = tail;
+
+ if (prev_desc == xor_last_submit)
+ /* do not link to the last submitted CB */
+ break;
+ ppc440spe_xor_set_link(prev_desc, next_desc);
+ break;
+ }
+
+ local_irq_restore(flags);
+}
+
+/**
+ * ppc440spe_desc_get_src_addr - extract the source address from the descriptor
+ */
+static u32 ppc440spe_desc_get_src_addr(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan, int src_idx)
+{
+ struct dma_cdb *dma_hw_desc;
+ struct xor_cb *xor_hw_desc;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_hw_desc = desc->hw_desc;
+ /* May have 0, 1, 2, or 3 sources */
+ switch (dma_hw_desc->opc) {
+ case DMA_CDB_OPC_NO_OP:
+ case DMA_CDB_OPC_DFILL128:
+ return 0;
+ case DMA_CDB_OPC_DCHECK128:
+ if (unlikely(src_idx)) {
+ printk(KERN_ERR "%s: try to get %d source for"
+ " DCHECK128\n", __func__, src_idx);
+ BUG();
+ }
+ return le32_to_cpu(dma_hw_desc->sg1l);
+ case DMA_CDB_OPC_MULTICAST:
+ case DMA_CDB_OPC_MV_SG1_SG2:
+ if (unlikely(src_idx > 2)) {
+ printk(KERN_ERR "%s: try to get %d source from"
+ " DMA descr\n", __func__, src_idx);
+ BUG();
+ }
+ if (src_idx) {
+ if (le32_to_cpu(dma_hw_desc->sg1u) &
+ DMA_CUED_XOR_WIN_MSK) {
+ u8 region;
+
+ if (src_idx == 1)
+ return le32_to_cpu(
+ dma_hw_desc->sg1l) +
+ desc->unmap_len;
+
+ region = (le32_to_cpu(
+ dma_hw_desc->sg1u)) >>
+ DMA_CUED_REGION_OFF;
+
+ region &= DMA_CUED_REGION_MSK;
+ switch (region) {
+ case DMA_RXOR123:
+ return le32_to_cpu(
+ dma_hw_desc->sg1l) +
+ (desc->unmap_len << 1);
+ case DMA_RXOR124:
+ return le32_to_cpu(
+ dma_hw_desc->sg1l) +
+ (desc->unmap_len * 3);
+ case DMA_RXOR125:
+ return le32_to_cpu(
+ dma_hw_desc->sg1l) +
+ (desc->unmap_len << 2);
+ default:
+ printk(KERN_ERR
+ "%s: try to"
+ " get src3 for region %02x"
+ "PPC440SPE_DESC_RXOR12?\n",
+ __func__, region);
+ BUG();
+ }
+ } else {
+ printk(KERN_ERR
+ "%s: try to get %d"
+ " source for non-cued descr\n",
+ __func__, src_idx);
+ BUG();
+ }
+ }
+ return le32_to_cpu(dma_hw_desc->sg1l);
+ default:
+ printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
+ __func__, dma_hw_desc->opc);
+ BUG();
+ }
+ return le32_to_cpu(dma_hw_desc->sg1l);
+ case PPC440SPE_XOR_ID:
+ /* May have up to 16 sources */
+ xor_hw_desc = desc->hw_desc;
+ return xor_hw_desc->ops[src_idx].l;
+ }
+ return 0;
+}
+
+/**
+ * ppc440spe_desc_get_dest_addr - extract the destination address from the
+ * descriptor
+ */
+static u32 ppc440spe_desc_get_dest_addr(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan, int idx)
+{
+ struct dma_cdb *dma_hw_desc;
+ struct xor_cb *xor_hw_desc;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_hw_desc = desc->hw_desc;
+
+ if (likely(!idx))
+ return le32_to_cpu(dma_hw_desc->sg2l);
+ return le32_to_cpu(dma_hw_desc->sg3l);
+ case PPC440SPE_XOR_ID:
+ xor_hw_desc = desc->hw_desc;
+ return xor_hw_desc->cbtal;
+ }
+ return 0;
+}
+
+/**
+ * ppc440spe_desc_get_src_num - extract the number of source addresses from
+ * the descriptor
+ */
+static u32 ppc440spe_desc_get_src_num(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan)
+{
+ struct dma_cdb *dma_hw_desc;
+ struct xor_cb *xor_hw_desc;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_hw_desc = desc->hw_desc;
+
+ switch (dma_hw_desc->opc) {
+ case DMA_CDB_OPC_NO_OP:
+ case DMA_CDB_OPC_DFILL128:
+ return 0;
+ case DMA_CDB_OPC_DCHECK128:
+ return 1;
+ case DMA_CDB_OPC_MV_SG1_SG2:
+ case DMA_CDB_OPC_MULTICAST:
+ /*
+ * Only for RXOR operations we have more than
+ * one source
+ */
+ if (le32_to_cpu(dma_hw_desc->sg1u) &
+ DMA_CUED_XOR_WIN_MSK) {
+ /* RXOR op, there are 2 or 3 sources */
+ if (((le32_to_cpu(dma_hw_desc->sg1u) >>
+ DMA_CUED_REGION_OFF) &
+ DMA_CUED_REGION_MSK) == DMA_RXOR12) {
+ /* RXOR 1-2 */
+ return 2;
+ } else {
+ /* RXOR 1-2-3/1-2-4/1-2-5 */
+ return 3;
+ }
+ }
+ return 1;
+ default:
+ printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
+ __func__, dma_hw_desc->opc);
+ BUG();
+ }
+ case PPC440SPE_XOR_ID:
+ /* up to 16 sources */
+ xor_hw_desc = desc->hw_desc;
+ return xor_hw_desc->cbc & XOR_CDCR_OAC_MSK;
+ default:
+ BUG();
+ }
+ return 0;
+}
+
+/**
+ * ppc440spe_desc_get_dst_num - get the number of destination addresses in
+ * this descriptor
+ */
+static u32 ppc440spe_desc_get_dst_num(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan)
+{
+ struct dma_cdb *dma_hw_desc;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ /* May be 1 or 2 destinations */
+ dma_hw_desc = desc->hw_desc;
+ switch (dma_hw_desc->opc) {
+ case DMA_CDB_OPC_NO_OP:
+ case DMA_CDB_OPC_DCHECK128:
+ return 0;
+ case DMA_CDB_OPC_MV_SG1_SG2:
+ case DMA_CDB_OPC_DFILL128:
+ return 1;
+ case DMA_CDB_OPC_MULTICAST:
+ if (desc->dst_cnt == 2)
+ return 2;
+ else
+ return 1;
+ default:
+ printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
+ __func__, dma_hw_desc->opc);
+ BUG();
+ }
+ case PPC440SPE_XOR_ID:
+ /* Always only 1 destination */
+ return 1;
+ default:
+ BUG();
+ }
+ return 0;
+}
+
+/**
+ * ppc440spe_desc_get_link - get the address of the descriptor that
+ * follows this one
+ */
+static inline u32 ppc440spe_desc_get_link(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan)
+{
+ if (!desc->hw_next)
+ return 0;
+
+ return desc->hw_next->phys;
+}
+
+/**
+ * ppc440spe_desc_is_aligned - check alignment
+ */
+static inline int ppc440spe_desc_is_aligned(
+ struct ppc440spe_adma_desc_slot *desc, int num_slots)
+{
+ return (desc->idx & (num_slots - 1)) ? 0 : 1;
+}
+
+/**
+ * ppc440spe_chan_xor_slot_count - get the number of slots necessary for
+ * XOR operation
+ */
+static int ppc440spe_chan_xor_slot_count(size_t len, int src_cnt,
+ int *slots_per_op)
+{
+ int slot_cnt;
+
+ /* each XOR descriptor provides up to 16 source operands */
+ slot_cnt = *slots_per_op = (src_cnt + XOR_MAX_OPS - 1)/XOR_MAX_OPS;
+
+ if (likely(len <= PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT))
+ return slot_cnt;
+
+ printk(KERN_ERR "%s: len %d > max %d !!\n",
+ __func__, len, PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT);
+ BUG();
+ return slot_cnt;
+}
+
+/**
+ * ppc440spe_dma2_pq_slot_count - get the number of slots necessary for
+ * DMA2 PQ operation
+ */
+static int ppc440spe_dma2_pq_slot_count(dma_addr_t *srcs,
+ int src_cnt, size_t len)
+{
+ signed long long order = 0;
+ int state = 0;
+ int addr_count = 0;
+ int i;
+ for (i = 1; i < src_cnt; i++) {
+ dma_addr_t cur_addr = srcs[i];
+ dma_addr_t old_addr = srcs[i-1];
+ switch (state) {
+ case 0:
+ if (cur_addr == old_addr + len) {
+ /* direct RXOR */
+ order = 1;
+ state = 1;
+ if (i == src_cnt-1)
+ addr_count++;
+ } else if (old_addr == cur_addr + len) {
+ /* reverse RXOR */
+ order = -1;
+ state = 1;
+ if (i == src_cnt-1)
+ addr_count++;
+ } else {
+ state = 3;
+ }
+ break;
+ case 1:
+ if (i == src_cnt-2 || (order == -1
+ && cur_addr != old_addr - len)) {
+ order = 0;
+ state = 0;
+ addr_count++;
+ } else if (cur_addr == old_addr + len*order) {
+ state = 2;
+ if (i == src_cnt-1)
+ addr_count++;
+ } else if (cur_addr == old_addr + 2*len) {
+ state = 2;
+ if (i == src_cnt-1)
+ addr_count++;
+ } else if (cur_addr == old_addr + 3*len) {
+ state = 2;
+ if (i == src_cnt-1)
+ addr_count++;
+ } else {
+ order = 0;
+ state = 0;
+ addr_count++;
+ }
+ break;
+ case 2:
+ order = 0;
+ state = 0;
+ addr_count++;
+ break;
+ }
+ if (state == 3)
+ break;
+ }
+ if (src_cnt <= 1 || (state != 1 && state != 2)) {
+ pr_err("%s: src_cnt=%d, state=%d, addr_count=%d, order=%lld\n",
+ __func__, src_cnt, state, addr_count, order);
+ for (i = 0; i < src_cnt; i++)
+ pr_err("\t[%d] 0x%llx \n", i, srcs[i]);
+ BUG();
+ }
+
+ return (addr_count + XOR_MAX_OPS - 1) / XOR_MAX_OPS;
+}
+
+
+/******************************************************************************
+ * ADMA channel low-level routines
+ ******************************************************************************/
+
+static u32
+ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan);
+static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan);
+
+/**
+ * ppc440spe_adma_device_clear_eot_status - interrupt ack to XOR or DMA engine
+ */
+static void ppc440spe_adma_device_clear_eot_status(
+ struct ppc440spe_adma_chan *chan)
+{
+ struct dma_regs *dma_reg;
+ struct xor_regs *xor_reg;
+ u8 *p = chan->device->dma_desc_pool_virt;
+ struct dma_cdb *cdb;
+ u32 rv, i;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ /* read FIFO to ack */
+ dma_reg = chan->device->dma_reg;
+ while ((rv = ioread32(&dma_reg->csfpl))) {
+ i = rv & DMA_CDB_ADDR_MSK;
+ cdb = (struct dma_cdb *)&p[i -
+ (u32)chan->device->dma_desc_pool];
+
+ /* Clear opcode to ack. This is necessary for
+ * ZeroSum operations only
+ */
+ cdb->opc = 0;
+
+ if (test_bit(PPC440SPE_RXOR_RUN,
+ &ppc440spe_rxor_state)) {
+ /* probably this is a completed RXOR op,
+ * get pointer to CDB using the fact that
+ * physical and virtual addresses of CDB
+ * in pools have the same offsets
+ */
+ if (le32_to_cpu(cdb->sg1u) &
+ DMA_CUED_XOR_BASE) {
+ /* this is a RXOR */
+ clear_bit(PPC440SPE_RXOR_RUN,
+ &ppc440spe_rxor_state);
+ }
+ }
+
+ if (rv & DMA_CDB_STATUS_MSK) {
+ /* ZeroSum check failed
+ */
+ struct ppc440spe_adma_desc_slot *iter;
+ dma_addr_t phys = rv & ~DMA_CDB_MSK;
+
+ /*
+ * Update the status of corresponding
+ * descriptor.
+ */
+ list_for_each_entry(iter, &chan->chain,
+ chain_node) {
+ if (iter->phys == phys)
+ break;
+ }
+ /*
+ * if cannot find the corresponding
+ * slot it's a bug
+ */
+ BUG_ON(&iter->chain_node == &chan->chain);
+
+ if (iter->xor_check_result) {
+ if (test_bit(PPC440SPE_DESC_PCHECK,
+ &iter->flags)) {
+ *iter->xor_check_result |=
+ SUM_CHECK_P_RESULT;
+ } else
+ if (test_bit(PPC440SPE_DESC_QCHECK,
+ &iter->flags)) {
+ *iter->xor_check_result |=
+ SUM_CHECK_Q_RESULT;
+ } else
+ BUG();
+ }
+ }
+ }
+
+ rv = ioread32(&dma_reg->dsts);
+ if (rv) {
+ pr_err("DMA%d err status: 0x%x\n",
+ chan->device->id, rv);
+ /* write back to clear */
+ iowrite32(rv, &dma_reg->dsts);
+ }
+ break;
+ case PPC440SPE_XOR_ID:
+ /* reset status bits to ack */
+ xor_reg = chan->device->xor_reg;
+ rv = ioread32be(&xor_reg->sr);
+ iowrite32be(rv, &xor_reg->sr);
+
+ if (rv & (XOR_IE_ICBIE_BIT|XOR_IE_ICIE_BIT|XOR_IE_RPTIE_BIT)) {
+ if (rv & XOR_IE_RPTIE_BIT) {
+ /* Read PLB Timeout Error.
+ * Try to resubmit the CB
+ */
+ u32 val = ioread32be(&xor_reg->ccbalr);
+
+ iowrite32be(val, &xor_reg->cblalr);
+
+ val = ioread32be(&xor_reg->crsr);
+ iowrite32be(val | XOR_CRSR_XAE_BIT,
+ &xor_reg->crsr);
+ } else
+ pr_err("XOR ERR 0x%x status\n", rv);
+ break;
+ }
+
+ /* if the XORcore is idle, but there are unprocessed CBs
+ * then refetch the s/w chain here
+ */
+ if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) &&
+ do_xor_refetch)
+ ppc440spe_chan_append(chan);
+ break;
+ }
+}
+
+/**
+ * ppc440spe_chan_is_busy - get the channel status
+ */
+static int ppc440spe_chan_is_busy(struct ppc440spe_adma_chan *chan)
+{
+ struct dma_regs *dma_reg;
+ struct xor_regs *xor_reg;
+ int busy = 0;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_reg = chan->device->dma_reg;
+ /* if command FIFO's head and tail pointers are equal and
+ * status tail is the same as command, then channel is free
+ */
+ if (ioread16(&dma_reg->cpfhp) != ioread16(&dma_reg->cpftp) ||
+ ioread16(&dma_reg->cpftp) != ioread16(&dma_reg->csftp))
+ busy = 1;
+ break;
+ case PPC440SPE_XOR_ID:
+ /* use the special status bit for the XORcore
+ */
+ xor_reg = chan->device->xor_reg;
+ busy = (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) ? 1 : 0;
+ break;
+ }
+
+ return busy;
+}
+
+/**
+ * ppc440spe_chan_set_first_xor_descriptor - init XORcore chain
+ */
+static void ppc440spe_chan_set_first_xor_descriptor(
+ struct ppc440spe_adma_chan *chan,
+ struct ppc440spe_adma_desc_slot *next_desc)
+{
+ struct xor_regs *xor_reg = chan->device->xor_reg;
+
+ if (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)
+ printk(KERN_INFO "%s: Warn: XORcore is running "
+ "when try to set the first CDB!\n",
+ __func__);
+
+ xor_last_submit = xor_last_linked = next_desc;
+
+ iowrite32be(XOR_CRSR_64BA_BIT, &xor_reg->crsr);
+
+ iowrite32be(next_desc->phys, &xor_reg->cblalr);
+ iowrite32be(0, &xor_reg->cblahr);
+ iowrite32be(ioread32be(&xor_reg->cbcr) | XOR_CBCR_LNK_BIT,
+ &xor_reg->cbcr);
+
+ chan->hw_chain_inited = 1;
+}
+
+/**
+ * ppc440spe_dma_put_desc - put DMA0,1 descriptor to FIFO.
+ * called with irqs disabled
+ */
+static void ppc440spe_dma_put_desc(struct ppc440spe_adma_chan *chan,
+ struct ppc440spe_adma_desc_slot *desc)
+{
+ u32 pcdb;
+ struct dma_regs *dma_reg = chan->device->dma_reg;
+
+ pcdb = desc->phys;
+ if (!test_bit(PPC440SPE_DESC_INT, &desc->flags))
+ pcdb |= DMA_CDB_NO_INT;
+
+ chan_last_sub[chan->device->id] = desc;
+
+ ADMA_LL_DBG(print_cb(chan, desc->hw_desc));
+
+ iowrite32(pcdb, &dma_reg->cpfpl);
+}
+
+/**
+ * ppc440spe_chan_append - update the h/w chain in the channel
+ */
+static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan)
+{
+ struct xor_regs *xor_reg;
+ struct ppc440spe_adma_desc_slot *iter;
+ struct xor_cb *xcb;
+ u32 cur_desc;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ cur_desc = ppc440spe_chan_get_current_descriptor(chan);
+
+ if (likely(cur_desc)) {
+ iter = chan_last_sub[chan->device->id];
+ BUG_ON(!iter);
+ } else {
+ /* first peer */
+ iter = chan_first_cdb[chan->device->id];
+ BUG_ON(!iter);
+ ppc440spe_dma_put_desc(chan, iter);
+ chan->hw_chain_inited = 1;
+ }
+
+ /* is there something new to append */
+ if (!iter->hw_next)
+ break;
+
+ /* flush descriptors from the s/w queue to fifo */
+ list_for_each_entry_continue(iter, &chan->chain, chain_node) {
+ ppc440spe_dma_put_desc(chan, iter);
+ if (!iter->hw_next)
+ break;
+ }
+ break;
+ case PPC440SPE_XOR_ID:
+ /* update h/w links and refetch */
+ if (!xor_last_submit->hw_next)
+ break;
+
+ xor_reg = chan->device->xor_reg;
+ /* the last linked CDB has to generate an interrupt
+ * that we'd be able to append the next lists to h/w
+ * regardless of the XOR engine state at the moment of
+ * appending of these next lists
+ */
+ xcb = xor_last_linked->hw_desc;
+ xcb->cbc |= XOR_CBCR_CBCE_BIT;
+
+ if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)) {
+ /* XORcore is idle. Refetch now */
+ do_xor_refetch = 0;
+ ppc440spe_xor_set_link(xor_last_submit,
+ xor_last_submit->hw_next);
+
+ ADMA_LL_DBG(print_cb_list(chan,
+ xor_last_submit->hw_next));
+
+ xor_last_submit = xor_last_linked;
+ iowrite32be(ioread32be(&xor_reg->crsr) |
+ XOR_CRSR_RCBE_BIT | XOR_CRSR_64BA_BIT,
+ &xor_reg->crsr);
+ } else {
+ /* XORcore is running. Refetch later in the handler */
+ do_xor_refetch = 1;
+ }
+
+ break;
+ }
+
+ local_irq_restore(flags);
+}
+
+/**
+ * ppc440spe_chan_get_current_descriptor - get the currently executed descriptor
+ */
+static u32
+ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan)
+{
+ struct dma_regs *dma_reg;
+ struct xor_regs *xor_reg;
+
+ if (unlikely(!chan->hw_chain_inited))
+ /* h/w descriptor chain is not initialized yet */
+ return 0;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_reg = chan->device->dma_reg;
+ return ioread32(&dma_reg->acpl) & (~DMA_CDB_MSK);
+ case PPC440SPE_XOR_ID:
+ xor_reg = chan->device->xor_reg;
+ return ioread32be(&xor_reg->ccbalr);
+ }
+ return 0;
+}
+
+/**
+ * ppc440spe_chan_run - enable the channel
+ */
+static void ppc440spe_chan_run(struct ppc440spe_adma_chan *chan)
+{
+ struct xor_regs *xor_reg;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ /* DMAs are always enabled, do nothing */
+ break;
+ case PPC440SPE_XOR_ID:
+ /* drain write buffer */
+ xor_reg = chan->device->xor_reg;
+
+ /* fetch descriptor pointed to in <link> */
+ iowrite32be(XOR_CRSR_64BA_BIT | XOR_CRSR_XAE_BIT,
+ &xor_reg->crsr);
+ break;
+ }
+}
+
+/******************************************************************************
+ * ADMA device level
+ ******************************************************************************/
+
+static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan);
+static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan);
+
+static dma_cookie_t
+ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx);
+
+static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *tx,
+ dma_addr_t addr, int index);
+static void
+ppc440spe_adma_memcpy_xor_set_src(struct ppc440spe_adma_desc_slot *tx,
+ dma_addr_t addr, int index);
+
+static void
+ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *tx,
+ dma_addr_t *paddr, unsigned long flags);
+static void
+ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *tx,
+ dma_addr_t addr, int index);
+static void
+ppc440spe_adma_pq_set_src_mult(struct ppc440spe_adma_desc_slot *tx,
+ unsigned char mult, int index, int dst_pos);
+static void
+ppc440spe_adma_pqzero_sum_set_dest(struct ppc440spe_adma_desc_slot *tx,
+ dma_addr_t paddr, dma_addr_t qaddr);
+
+static struct page *ppc440spe_rxor_srcs[32];
+
+/**
+ * ppc440spe_can_rxor - check if the operands may be processed with RXOR
+ */
+static int ppc440spe_can_rxor(struct page **srcs, int src_cnt, size_t len)
+{
+ int i, order = 0, state = 0;
+ int idx = 0;
+
+ if (unlikely(!(src_cnt > 1)))
+ return 0;
+
+ BUG_ON(src_cnt > ARRAY_SIZE(ppc440spe_rxor_srcs));
+
+ /* Skip holes in the source list before checking */
+ for (i = 0; i < src_cnt; i++) {
+ if (!srcs[i])
+ continue;
+ ppc440spe_rxor_srcs[idx++] = srcs[i];
+ }
+ src_cnt = idx;
+
+ for (i = 1; i < src_cnt; i++) {
+ char *cur_addr = page_address(ppc440spe_rxor_srcs[i]);
+ char *old_addr = page_address(ppc440spe_rxor_srcs[i - 1]);
+
+ switch (state) {
+ case 0:
+ if (cur_addr == old_addr + len) {
+ /* direct RXOR */
+ order = 1;
+ state = 1;
+ } else if (old_addr == cur_addr + len) {
+ /* reverse RXOR */
+ order = -1;
+ state = 1;
+ } else
+ goto out;
+ break;
+ case 1:
+ if ((i == src_cnt - 2) ||
+ (order == -1 && cur_addr != old_addr - len)) {
+ order = 0;
+ state = 0;
+ } else if ((cur_addr == old_addr + len * order) ||
+ (cur_addr == old_addr + 2 * len) ||
+ (cur_addr == old_addr + 3 * len)) {
+ state = 2;
+ } else {
+ order = 0;
+ state = 0;
+ }
+ break;
+ case 2:
+ order = 0;
+ state = 0;
+ break;
+ }
+ }
+
+out:
+ if (state == 1 || state == 2)
+ return 1;
+
+ return 0;
+}
+
+/**
+ * ppc440spe_adma_device_estimate - estimate the efficiency of processing
+ * the operation given on this channel. It's assumed that 'chan' is
+ * capable to process 'cap' type of operation.
+ * @chan: channel to use
+ * @cap: type of transaction
+ * @dst_lst: array of destination pointers
+ * @dst_cnt: number of destination operands
+ * @src_lst: array of source pointers
+ * @src_cnt: number of source operands
+ * @src_sz: size of each source operand
+ */
+static int ppc440spe_adma_estimate(struct dma_chan *chan,
+ enum dma_transaction_type cap, struct page **dst_lst, int dst_cnt,
+ struct page **src_lst, int src_cnt, size_t src_sz)
+{
+ int ef = 1;
+
+ if (cap == DMA_PQ || cap == DMA_PQ_VAL) {
+ /* If RAID-6 capabilities were not activated don't try
+ * to use them
+ */
+ if (unlikely(!ppc440spe_r6_enabled))
+ return -1;
+ }
+ /* In the current implementation of ppc440spe ADMA driver it
+ * makes sense to pick out only pq case, because it may be
+ * processed:
+ * (1) either using Biskup method on DMA2;
+ * (2) or on DMA0/1.
+ * Thus we give a favour to (1) if the sources are suitable;
+ * else let it be processed on one of the DMA0/1 engines.
+ * In the sum_product case where destination is also the
+ * source process it on DMA0/1 only.
+ */
+ if (cap == DMA_PQ && chan->chan_id == PPC440SPE_XOR_ID) {
+
+ if (dst_cnt == 1 && src_cnt == 2 && dst_lst[0] == src_lst[1])
+ ef = 0; /* sum_product case, process on DMA0/1 */
+ else if (ppc440spe_can_rxor(src_lst, src_cnt, src_sz))
+ ef = 3; /* override (DMA0/1 + idle) */
+ else
+ ef = 0; /* can't process on DMA2 if !rxor */
+ }
+
+ /* channel idleness increases the priority */
+ if (likely(ef) &&
+ !ppc440spe_chan_is_busy(to_ppc440spe_adma_chan(chan)))
+ ef++;
+
+ return ef;
+}
+
+struct dma_chan *
+ppc440spe_async_tx_find_best_channel(enum dma_transaction_type cap,
+ struct page **dst_lst, int dst_cnt, struct page **src_lst,
+ int src_cnt, size_t src_sz)
+{
+ struct dma_chan *best_chan = NULL;
+ struct ppc_dma_chan_ref *ref;
+ int best_rank = -1;
+
+ if (unlikely(!src_sz))
+ return NULL;
+ if (src_sz > PAGE_SIZE) {
+ /*
+ * should a user of the api ever pass > PAGE_SIZE requests
+ * we sort out cases where temporary page-sized buffers
+ * are used.
+ */
+ switch (cap) {
+ case DMA_PQ:
+ if (src_cnt == 1 && dst_lst[1] == src_lst[0])
+ return NULL;
+ if (src_cnt == 2 && dst_lst[1] == src_lst[1])
+ return NULL;
+ break;
+ case DMA_PQ_VAL:
+ case DMA_XOR_VAL:
+ return NULL;
+ default:
+ break;
+ }
+ }
+
+ list_for_each_entry(ref, &ppc440spe_adma_chan_list, node) {
+ if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
+ int rank;
+
+ rank = ppc440spe_adma_estimate(ref->chan, cap, dst_lst,
+ dst_cnt, src_lst, src_cnt, src_sz);
+ if (rank > best_rank) {
+ best_rank = rank;
+ best_chan = ref->chan;
+ }
+ }
+ }
+
+ return best_chan;
+}
+EXPORT_SYMBOL_GPL(ppc440spe_async_tx_find_best_channel);
+
+/**
+ * ppc440spe_get_group_entry - get group entry with index idx
+ * @tdesc: is the last allocated slot in the group.
+ */
+static struct ppc440spe_adma_desc_slot *
+ppc440spe_get_group_entry(struct ppc440spe_adma_desc_slot *tdesc, u32 entry_idx)
+{
+ struct ppc440spe_adma_desc_slot *iter = tdesc->group_head;
+ int i = 0;
+
+ if (entry_idx < 0 || entry_idx >= (tdesc->src_cnt + tdesc->dst_cnt)) {
+ printk("%s: entry_idx %d, src_cnt %d, dst_cnt %d\n",
+ __func__, entry_idx, tdesc->src_cnt, tdesc->dst_cnt);
+ BUG();
+ }
+
+ list_for_each_entry(iter, &tdesc->group_list, chain_node) {
+ if (i++ == entry_idx)
+ break;
+ }
+ return iter;
+}
+
+/**
+ * ppc440spe_adma_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &ppc440spe_chan->lock while calling this function
+ */
+static void ppc440spe_adma_free_slots(struct ppc440spe_adma_desc_slot *slot,
+ struct ppc440spe_adma_chan *chan)
+{
+ int stride = slot->slots_per_op;
+
+ while (stride--) {
+ slot->slots_per_op = 0;
+ slot = list_entry(slot->slot_node.next,
+ struct ppc440spe_adma_desc_slot,
+ slot_node);
+ }
+}
+
+static void ppc440spe_adma_unmap(struct ppc440spe_adma_chan *chan,
+ struct ppc440spe_adma_desc_slot *desc)
+{
+ u32 src_cnt, dst_cnt;
+ dma_addr_t addr;
+
+ /*
+ * get the number of sources & destination
+ * included in this descriptor and unmap
+ * them all
+ */
+ src_cnt = ppc440spe_desc_get_src_num(desc, chan);
+ dst_cnt = ppc440spe_desc_get_dst_num(desc, chan);
+
+ /* unmap destinations */
+ if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ while (dst_cnt--) {
+ addr = ppc440spe_desc_get_dest_addr(
+ desc, chan, dst_cnt);
+ dma_unmap_page(chan->device->dev,
+ addr, desc->unmap_len,
+ DMA_FROM_DEVICE);
+ }
+ }
+
+ /* unmap sources */
+ if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ while (src_cnt--) {
+ addr = ppc440spe_desc_get_src_addr(
+ desc, chan, src_cnt);
+ dma_unmap_page(chan->device->dev,
+ addr, desc->unmap_len,
+ DMA_TO_DEVICE);
+ }
+ }
+}
+
+/**
+ * ppc440spe_adma_run_tx_complete_actions - call functions to be called
+ * upon completion
+ */
+static dma_cookie_t ppc440spe_adma_run_tx_complete_actions(
+ struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan,
+ dma_cookie_t cookie)
+{
+ int i;
+
+ BUG_ON(desc->async_tx.cookie < 0);
+ if (desc->async_tx.cookie > 0) {
+ cookie = desc->async_tx.cookie;
+ desc->async_tx.cookie = 0;
+
+ /* call the callback (must not sleep or submit new
+ * operations to this channel)
+ */
+ if (desc->async_tx.callback)
+ desc->async_tx.callback(
+ desc->async_tx.callback_param);
+
+ /* unmap dma addresses
+ * (unmap_single vs unmap_page?)
+ *
+ * actually, ppc's dma_unmap_page() functions are empty, so
+ * the following code is just for the sake of completeness
+ */
+ if (chan && chan->needs_unmap && desc->group_head &&
+ desc->unmap_len) {
+ struct ppc440spe_adma_desc_slot *unmap =
+ desc->group_head;
+ /* assume 1 slot per op always */
+ u32 slot_count = unmap->slot_cnt;
+
+ /* Run through the group list and unmap addresses */
+ for (i = 0; i < slot_count; i++) {
+ BUG_ON(!unmap);
+ ppc440spe_adma_unmap(chan, unmap);
+ unmap = unmap->hw_next;
+ }
+ }
+ }
+
+ /* run dependent operations */
+ dma_run_dependencies(&desc->async_tx);
+
+ return cookie;
+}
+
+/**
+ * ppc440spe_adma_clean_slot - clean up CDB slot (if ack is set)
+ */
+static int ppc440spe_adma_clean_slot(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan)
+{
+ /* the client is allowed to attach dependent operations
+ * until 'ack' is set
+ */
+ if (!async_tx_test_ack(&desc->async_tx))
+ return 0;
+
+ /* leave the last descriptor in the chain
+ * so we can append to it
+ */
+ if (list_is_last(&desc->chain_node, &chan->chain) ||
+ desc->phys == ppc440spe_chan_get_current_descriptor(chan))
+ return 1;
+
+ if (chan->device->id != PPC440SPE_XOR_ID) {
+ /* our DMA interrupt handler clears opc field of
+ * each processed descriptor. For all types of
+ * operations except for ZeroSum we do not actually
+ * need ack from the interrupt handler. ZeroSum is a
+ * special case since the result of this operation
+ * is available from the handler only, so if we see
+ * such type of descriptor (which is unprocessed yet)
+ * then leave it in chain.
+ */
+ struct dma_cdb *cdb = desc->hw_desc;
+ if (cdb->opc == DMA_CDB_OPC_DCHECK128)
+ return 1;
+ }
+
+ dev_dbg(chan->device->common.dev, "\tfree slot %llx: %d stride: %d\n",
+ desc->phys, desc->idx, desc->slots_per_op);
+
+ list_del(&desc->chain_node);
+ ppc440spe_adma_free_slots(desc, chan);
+ return 0;
+}
+
+/**
+ * __ppc440spe_adma_slot_cleanup - this is the common clean-up routine
+ * which runs through the channel CDBs list until reach the descriptor
+ * currently processed. When routine determines that all CDBs of group
+ * are completed then corresponding callbacks (if any) are called and slots
+ * are freed.
+ */
+static void __ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan)
+{
+ struct ppc440spe_adma_desc_slot *iter, *_iter, *group_start = NULL;
+ dma_cookie_t cookie = 0;
+ u32 current_desc = ppc440spe_chan_get_current_descriptor(chan);
+ int busy = ppc440spe_chan_is_busy(chan);
+ int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
+
+ dev_dbg(chan->device->common.dev, "ppc440spe adma%d: %s\n",
+ chan->device->id, __func__);
+
+ if (!current_desc) {
+ /* There were no transactions yet, so
+ * nothing to clean
+ */
+ return;
+ }
+
+ /* free completed slots from the chain starting with
+ * the oldest descriptor
+ */
+ list_for_each_entry_safe(iter, _iter, &chan->chain,
+ chain_node) {
+ dev_dbg(chan->device->common.dev, "\tcookie: %d slot: %d "
+ "busy: %d this_desc: %#llx next_desc: %#x "
+ "cur: %#x ack: %d\n",
+ iter->async_tx.cookie, iter->idx, busy, iter->phys,
+ ppc440spe_desc_get_link(iter, chan), current_desc,
+ async_tx_test_ack(&iter->async_tx));
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+
+ /* do not advance past the current descriptor loaded into the
+ * hardware channel,subsequent descriptors are either in process
+ * or have not been submitted
+ */
+ if (seen_current)
+ break;
+
+ /* stop the search if we reach the current descriptor and the
+ * channel is busy, or if it appears that the current descriptor
+ * needs to be re-read (i.e. has been appended to)
+ */
+ if (iter->phys == current_desc) {
+ BUG_ON(seen_current++);
+ if (busy || ppc440spe_desc_get_link(iter, chan)) {
+ /* not all descriptors of the group have
+ * been completed; exit.
+ */
+ break;
+ }
+ }
+
+ /* detect the start of a group transaction */
+ if (!slot_cnt && !slots_per_op) {
+ slot_cnt = iter->slot_cnt;
+ slots_per_op = iter->slots_per_op;
+ if (slot_cnt <= slots_per_op) {
+ slot_cnt = 0;
+ slots_per_op = 0;
+ }
+ }
+
+ if (slot_cnt) {
+ if (!group_start)
+ group_start = iter;
+ slot_cnt -= slots_per_op;
+ }
+
+ /* all the members of a group are complete */
+ if (slots_per_op != 0 && slot_cnt == 0) {
+ struct ppc440spe_adma_desc_slot *grp_iter, *_grp_iter;
+ int end_of_chain = 0;
+
+ /* clean up the group */
+ slot_cnt = group_start->slot_cnt;
+ grp_iter = group_start;
+ list_for_each_entry_safe_from(grp_iter, _grp_iter,
+ &chan->chain, chain_node) {
+
+ cookie = ppc440spe_adma_run_tx_complete_actions(
+ grp_iter, chan, cookie);
+
+ slot_cnt -= slots_per_op;
+ end_of_chain = ppc440spe_adma_clean_slot(
+ grp_iter, chan);
+ if (end_of_chain && slot_cnt) {
+ /* Should wait for ZeroSum completion */
+ if (cookie > 0)
+ chan->completed_cookie = cookie;
+ return;
+ }
+
+ if (slot_cnt == 0 || end_of_chain)
+ break;
+ }
+
+ /* the group should be complete at this point */
+ BUG_ON(slot_cnt);
+
+ slots_per_op = 0;
+ group_start = NULL;
+ if (end_of_chain)
+ break;
+ else
+ continue;
+ } else if (slots_per_op) /* wait for group completion */
+ continue;
+
+ cookie = ppc440spe_adma_run_tx_complete_actions(iter, chan,
+ cookie);
+
+ if (ppc440spe_adma_clean_slot(iter, chan))
+ break;
+ }
+
+ BUG_ON(!seen_current);
+
+ if (cookie > 0) {
+ chan->completed_cookie = cookie;
+ pr_debug("\tcompleted cookie %d\n", cookie);
+ }
+
+}
+
+/**
+ * ppc440spe_adma_tasklet - clean up watch-dog initiator
+ */
+static void ppc440spe_adma_tasklet(unsigned long data)
+{
+ struct ppc440spe_adma_chan *chan = (struct ppc440spe_adma_chan *) data;
+
+ spin_lock_nested(&chan->lock, SINGLE_DEPTH_NESTING);
+ __ppc440spe_adma_slot_cleanup(chan);
+ spin_unlock(&chan->lock);
+}
+
+/**
+ * ppc440spe_adma_slot_cleanup - clean up scheduled initiator
+ */
+static void ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan)
+{
+ spin_lock_bh(&chan->lock);
+ __ppc440spe_adma_slot_cleanup(chan);
+ spin_unlock_bh(&chan->lock);
+}
+
+/**
+ * ppc440spe_adma_alloc_slots - allocate free slots (if any)
+ */
+static struct ppc440spe_adma_desc_slot *ppc440spe_adma_alloc_slots(
+ struct ppc440spe_adma_chan *chan, int num_slots,
+ int slots_per_op)
+{
+ struct ppc440spe_adma_desc_slot *iter = NULL, *_iter;
+ struct ppc440spe_adma_desc_slot *alloc_start = NULL;
+ struct list_head chain = LIST_HEAD_INIT(chain);
+ int slots_found, retry = 0;
+
+
+ BUG_ON(!num_slots || !slots_per_op);
+ /* start search from the last allocated descrtiptor
+ * if a contiguous allocation can not be found start searching
+ * from the beginning of the list
+ */
+retry:
+ slots_found = 0;
+ if (retry == 0)
+ iter = chan->last_used;
+ else
+ iter = list_entry(&chan->all_slots,
+ struct ppc440spe_adma_desc_slot,
+ slot_node);
+ list_for_each_entry_safe_continue(iter, _iter, &chan->all_slots,
+ slot_node) {
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+ if (iter->slots_per_op) {
+ slots_found = 0;
+ continue;
+ }
+
+ /* start the allocation if the slot is correctly aligned */
+ if (!slots_found++)
+ alloc_start = iter;
+
+ if (slots_found == num_slots) {
+ struct ppc440spe_adma_desc_slot *alloc_tail = NULL;
+ struct ppc440spe_adma_desc_slot *last_used = NULL;
+
+ iter = alloc_start;
+ while (num_slots) {
+ int i;
+ /* pre-ack all but the last descriptor */
+ if (num_slots != slots_per_op)
+ async_tx_ack(&iter->async_tx);
+
+ list_add_tail(&iter->chain_node, &chain);
+ alloc_tail = iter;
+ iter->async_tx.cookie = 0;
+ iter->hw_next = NULL;
+ iter->flags = 0;
+ iter->slot_cnt = num_slots;
+ iter->xor_check_result = NULL;
+ for (i = 0; i < slots_per_op; i++) {
+ iter->slots_per_op = slots_per_op - i;
+ last_used = iter;
+ iter = list_entry(iter->slot_node.next,
+ struct ppc440spe_adma_desc_slot,
+ slot_node);
+ }
+ num_slots -= slots_per_op;
+ }
+ alloc_tail->group_head = alloc_start;
+ alloc_tail->async_tx.cookie = -EBUSY;
+ list_splice(&chain, &alloc_tail->group_list);
+ chan->last_used = last_used;
+ return alloc_tail;
+ }
+ }
+ if (!retry++)
+ goto retry;
+
+ /* try to free some slots if the allocation fails */
+ tasklet_schedule(&chan->irq_tasklet);
+ return NULL;
+}
+
+/**
+ * ppc440spe_adma_alloc_chan_resources - allocate pools for CDB slots
+ */
+static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+ struct ppc440spe_adma_desc_slot *slot = NULL;
+ char *hw_desc;
+ int i, db_sz;
+ int init;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+ init = ppc440spe_chan->slots_allocated ? 0 : 1;
+ chan->chan_id = ppc440spe_chan->device->id;
+
+ /* Allocate descriptor slots */
+ i = ppc440spe_chan->slots_allocated;
+ if (ppc440spe_chan->device->id != PPC440SPE_XOR_ID)
+ db_sz = sizeof(struct dma_cdb);
+ else
+ db_sz = sizeof(struct xor_cb);
+
+ for (; i < (ppc440spe_chan->device->pool_size / db_sz); i++) {
+ slot = kzalloc(sizeof(struct ppc440spe_adma_desc_slot),
+ GFP_KERNEL);
+ if (!slot) {
+ printk(KERN_INFO "SPE ADMA Channel only initialized"
+ " %d descriptor slots", i--);
+ break;
+ }
+
+ hw_desc = (char *) ppc440spe_chan->device->dma_desc_pool_virt;
+ slot->hw_desc = (void *) &hw_desc[i * db_sz];
+ dma_async_tx_descriptor_init(&slot->async_tx, chan);
+ slot->async_tx.tx_submit = ppc440spe_adma_tx_submit;
+ INIT_LIST_HEAD(&slot->chain_node);
+ INIT_LIST_HEAD(&slot->slot_node);
+ INIT_LIST_HEAD(&slot->group_list);
+ slot->phys = ppc440spe_chan->device->dma_desc_pool + i * db_sz;
+ slot->idx = i;
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+ ppc440spe_chan->slots_allocated++;
+ list_add_tail(&slot->slot_node, &ppc440spe_chan->all_slots);
+ spin_unlock_bh(&ppc440spe_chan->lock);
+ }
+
+ if (i && !ppc440spe_chan->last_used) {
+ ppc440spe_chan->last_used =
+ list_entry(ppc440spe_chan->all_slots.next,
+ struct ppc440spe_adma_desc_slot,
+ slot_node);
+ }
+
+ dev_dbg(ppc440spe_chan->device->common.dev,
+ "ppc440spe adma%d: allocated %d descriptor slots\n",
+ ppc440spe_chan->device->id, i);
+
+ /* initialize the channel and the chain with a null operation */
+ if (init) {
+ switch (ppc440spe_chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ ppc440spe_chan->hw_chain_inited = 0;
+ /* Use WXOR for self-testing */
+ if (!ppc440spe_r6_tchan)
+ ppc440spe_r6_tchan = ppc440spe_chan;
+ break;
+ case PPC440SPE_XOR_ID:
+ ppc440spe_chan_start_null_xor(ppc440spe_chan);
+ break;
+ default:
+ BUG();
+ }
+ ppc440spe_chan->needs_unmap = 1;
+ }
+
+ return (i > 0) ? i : -ENOMEM;
+}
+
+/**
+ * ppc440spe_desc_assign_cookie - assign a cookie
+ */
+static dma_cookie_t ppc440spe_desc_assign_cookie(
+ struct ppc440spe_adma_chan *chan,
+ struct ppc440spe_adma_desc_slot *desc)
+{
+ dma_cookie_t cookie = chan->common.cookie;
+
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+ chan->common.cookie = desc->async_tx.cookie = cookie;
+ return cookie;
+}
+
+/**
+ * ppc440spe_rxor_set_region_data -
+ */
+static void ppc440spe_rxor_set_region(struct ppc440spe_adma_desc_slot *desc,
+ u8 xor_arg_no, u32 mask)
+{
+ struct xor_cb *xcb = desc->hw_desc;
+
+ xcb->ops[xor_arg_no].h |= mask;
+}
+
+/**
+ * ppc440spe_rxor_set_src -
+ */
+static void ppc440spe_rxor_set_src(struct ppc440spe_adma_desc_slot *desc,
+ u8 xor_arg_no, dma_addr_t addr)
+{
+ struct xor_cb *xcb = desc->hw_desc;
+
+ xcb->ops[xor_arg_no].h |= DMA_CUED_XOR_BASE;
+ xcb->ops[xor_arg_no].l = addr;
+}
+
+/**
+ * ppc440spe_rxor_set_mult -
+ */
+static void ppc440spe_rxor_set_mult(struct ppc440spe_adma_desc_slot *desc,
+ u8 xor_arg_no, u8 idx, u8 mult)
+{
+ struct xor_cb *xcb = desc->hw_desc;
+
+ xcb->ops[xor_arg_no].h |= mult << (DMA_CUED_MULT1_OFF + idx * 8);
+}
+
+/**
+ * ppc440spe_adma_check_threshold - append CDBs to h/w chain if threshold
+ * has been achieved
+ */
+static void ppc440spe_adma_check_threshold(struct ppc440spe_adma_chan *chan)
+{
+ dev_dbg(chan->device->common.dev, "ppc440spe adma%d: pending: %d\n",
+ chan->device->id, chan->pending);
+
+ if (chan->pending >= PPC440SPE_ADMA_THRESHOLD) {
+ chan->pending = 0;
+ ppc440spe_chan_append(chan);
+ }
+}
+
+/**
+ * ppc440spe_adma_tx_submit - submit new descriptor group to the channel
+ * (it's not necessary that descriptors will be submitted to the h/w
+ * chains too right now)
+ */
+static dma_cookie_t ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct ppc440spe_adma_desc_slot *sw_desc;
+ struct ppc440spe_adma_chan *chan = to_ppc440spe_adma_chan(tx->chan);
+ struct ppc440spe_adma_desc_slot *group_start, *old_chain_tail;
+ int slot_cnt;
+ int slots_per_op;
+ dma_cookie_t cookie;
+
+ sw_desc = tx_to_ppc440spe_adma_slot(tx);
+
+ group_start = sw_desc->group_head;
+ slot_cnt = group_start->slot_cnt;
+ slots_per_op = group_start->slots_per_op;
+
+ spin_lock_bh(&chan->lock);
+
+ cookie = ppc440spe_desc_assign_cookie(chan, sw_desc);
+
+ if (unlikely(list_empty(&chan->chain))) {
+ /* first peer */
+ list_splice_init(&sw_desc->group_list, &chan->chain);
+ chan_first_cdb[chan->device->id] = group_start;
+ } else {
+ /* isn't first peer, bind CDBs to chain */
+ old_chain_tail = list_entry(chan->chain.prev,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ list_splice_init(&sw_desc->group_list,
+ &old_chain_tail->chain_node);
+ /* fix up the hardware chain */
+ ppc440spe_desc_set_link(chan, old_chain_tail, group_start);
+ }
+
+ /* increment the pending count by the number of operations */
+ chan->pending += slot_cnt / slots_per_op;
+ ppc440spe_adma_check_threshold(chan);
+ spin_unlock_bh(&chan->lock);
+
+ dev_dbg(chan->device->common.dev,
+ "ppc440spe adma%d: %s cookie: %d slot: %d tx %p\n",
+ chan->device->id, __func__,
+ sw_desc->async_tx.cookie, sw_desc->idx, sw_desc);
+
+ return cookie;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_interrupt - prepare CDB for a pseudo DMA operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_interrupt(
+ struct dma_chan *chan, unsigned long flags)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+ struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+ int slot_cnt, slots_per_op;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+ dev_dbg(ppc440spe_chan->device->common.dev,
+ "ppc440spe adma%d: %s\n", ppc440spe_chan->device->id,
+ __func__);
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+ slot_cnt = slots_per_op = 1;
+ sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+ slots_per_op);
+ if (sw_desc) {
+ group_start = sw_desc->group_head;
+ ppc440spe_desc_init_interrupt(group_start, ppc440spe_chan);
+ group_start->unmap_len = 0;
+ sw_desc->async_tx.flags = flags;
+ }
+ spin_unlock_bh(&ppc440spe_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_memcpy - prepare CDB for a MEMCPY operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memcpy(
+ struct dma_chan *chan, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+ struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+ int slot_cnt, slots_per_op;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+ if (unlikely(!len))
+ return NULL;
+
+ BUG_ON(unlikely(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT));
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+
+ dev_dbg(ppc440spe_chan->device->common.dev,
+ "ppc440spe adma%d: %s len: %u int_en %d\n",
+ ppc440spe_chan->device->id, __func__, len,
+ flags & DMA_PREP_INTERRUPT ? 1 : 0);
+ slot_cnt = slots_per_op = 1;
+ sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+ slots_per_op);
+ if (sw_desc) {
+ group_start = sw_desc->group_head;
+ ppc440spe_desc_init_memcpy(group_start, flags);
+ ppc440spe_adma_set_dest(group_start, dma_dest, 0);
+ ppc440spe_adma_memcpy_xor_set_src(group_start, dma_src, 0);
+ ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len);
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ }
+ spin_unlock_bh(&ppc440spe_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_memset - prepare CDB for a MEMSET operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memset(
+ struct dma_chan *chan, dma_addr_t dma_dest, int value,
+ size_t len, unsigned long flags)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+ struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+ int slot_cnt, slots_per_op;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+ if (unlikely(!len))
+ return NULL;
+
+ BUG_ON(unlikely(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT));
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+
+ dev_dbg(ppc440spe_chan->device->common.dev,
+ "ppc440spe adma%d: %s cal: %u len: %u int_en %d\n",
+ ppc440spe_chan->device->id, __func__, value, len,
+ flags & DMA_PREP_INTERRUPT ? 1 : 0);
+
+ slot_cnt = slots_per_op = 1;
+ sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+ slots_per_op);
+ if (sw_desc) {
+ group_start = sw_desc->group_head;
+ ppc440spe_desc_init_memset(group_start, value, flags);
+ ppc440spe_adma_set_dest(group_start, dma_dest, 0);
+ ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len);
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ }
+ spin_unlock_bh(&ppc440spe_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_xor - prepare CDB for a XOR operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor(
+ struct dma_chan *chan, dma_addr_t dma_dest,
+ dma_addr_t *dma_src, u32 src_cnt, size_t len,
+ unsigned long flags)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+ struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+ int slot_cnt, slots_per_op;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+ ADMA_LL_DBG(prep_dma_xor_dbg(ppc440spe_chan->device->id,
+ dma_dest, dma_src, src_cnt));
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(unlikely(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT));
+
+ dev_dbg(ppc440spe_chan->device->common.dev,
+ "ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n",
+ ppc440spe_chan->device->id, __func__, src_cnt, len,
+ flags & DMA_PREP_INTERRUPT ? 1 : 0);
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+ slot_cnt = ppc440spe_chan_xor_slot_count(len, src_cnt, &slots_per_op);
+ sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+ slots_per_op);
+ if (sw_desc) {
+ group_start = sw_desc->group_head;
+ ppc440spe_desc_init_xor(group_start, src_cnt, flags);
+ ppc440spe_adma_set_dest(group_start, dma_dest, 0);
+ while (src_cnt--)
+ ppc440spe_adma_memcpy_xor_set_src(group_start,
+ dma_src[src_cnt], src_cnt);
+ ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len);
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ }
+ spin_unlock_bh(&ppc440spe_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static inline void
+ppc440spe_desc_set_xor_src_cnt(struct ppc440spe_adma_desc_slot *desc,
+ int src_cnt);
+static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor);
+
+/**
+ * ppc440spe_adma_init_dma2rxor_slot -
+ */
+static void ppc440spe_adma_init_dma2rxor_slot(
+ struct ppc440spe_adma_desc_slot *desc,
+ dma_addr_t *src, int src_cnt)
+{
+ int i;
+
+ /* initialize CDB */
+ for (i = 0; i < src_cnt; i++) {
+ ppc440spe_adma_dma2rxor_prep_src(desc, &desc->rxor_cursor, i,
+ desc->src_cnt, (u32)src[i]);
+ }
+}
+
+/**
+ * ppc440spe_dma01_prep_mult -
+ * for Q operation where destination is also the source
+ */
+static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_mult(
+ struct ppc440spe_adma_chan *ppc440spe_chan,
+ dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
+ const unsigned char *scf, size_t len, unsigned long flags)
+{
+ struct ppc440spe_adma_desc_slot *sw_desc = NULL;
+ unsigned long op = 0;
+ int slot_cnt;
+
+ set_bit(PPC440SPE_DESC_WXOR, &op);
+ slot_cnt = 2;
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+
+ /* use WXOR, each descriptor occupies one slot */
+ sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+ if (sw_desc) {
+ struct ppc440spe_adma_chan *chan;
+ struct ppc440spe_adma_desc_slot *iter;
+ struct dma_cdb *hw_desc;
+
+ chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+ set_bits(op, &sw_desc->flags);
+ sw_desc->src_cnt = src_cnt;
+ sw_desc->dst_cnt = dst_cnt;
+ /* First descriptor, zero data in the destination and copy it
+ * to q page using MULTICAST transfer.
+ */
+ iter = list_first_entry(&sw_desc->group_list,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ /* set 'next' pointer */
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MULTICAST;
+
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, dst[0], 0);
+ ppc440spe_desc_set_dest_addr(iter, chan, 0, dst[1], 1);
+ ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+ src[0]);
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+ iter->unmap_len = len;
+
+ /*
+ * Second descriptor, multiply data from the q page
+ * and store the result in real destination.
+ */
+ iter = list_first_entry(&iter->chain_node,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ iter->hw_next = NULL;
+ if (flags & DMA_PREP_INTERRUPT)
+ set_bit(PPC440SPE_DESC_INT, &iter->flags);
+ else
+ clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ ppc440spe_desc_set_src_addr(iter, chan, 0,
+ DMA_CUED_XOR_HB, dst[1]);
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, dst[0], 0);
+
+ ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+ DMA_CDB_SG_DST1, scf[0]);
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+ iter->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ }
+
+ spin_unlock_bh(&ppc440spe_chan->lock);
+
+ return sw_desc;
+}
+
+/**
+ * ppc440spe_dma01_prep_sum_product -
+ * Dx = A*(P+Pxy) + B*(Q+Qxy) operation where destination is also
+ * the source.
+ */
+static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_sum_product(
+ struct ppc440spe_adma_chan *ppc440spe_chan,
+ dma_addr_t *dst, dma_addr_t *src, int src_cnt,
+ const unsigned char *scf, size_t len, unsigned long flags)
+{
+ struct ppc440spe_adma_desc_slot *sw_desc = NULL;
+ unsigned long op = 0;
+ int slot_cnt;
+
+ set_bit(PPC440SPE_DESC_WXOR, &op);
+ slot_cnt = 3;
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+
+ /* WXOR, each descriptor occupies one slot */
+ sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+ if (sw_desc) {
+ struct ppc440spe_adma_chan *chan;
+ struct ppc440spe_adma_desc_slot *iter;
+ struct dma_cdb *hw_desc;
+
+ chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+ set_bits(op, &sw_desc->flags);
+ sw_desc->src_cnt = src_cnt;
+ sw_desc->dst_cnt = 1;
+ /* 1st descriptor, src[1] data to q page and zero destination */
+ iter = list_first_entry(&sw_desc->group_list,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MULTICAST;
+
+ ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+ *dst, 0);
+ ppc440spe_desc_set_dest_addr(iter, chan, 0,
+ ppc440spe_chan->qdest, 1);
+ ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+ src[1]);
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+ iter->unmap_len = len;
+
+ /* 2nd descriptor, multiply src[1] data and store the
+ * result in destination */
+ iter = list_first_entry(&iter->chain_node,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ /* set 'next' pointer */
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ if (flags & DMA_PREP_INTERRUPT)
+ set_bit(PPC440SPE_DESC_INT, &iter->flags);
+ else
+ clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+ ppc440spe_chan->qdest);
+ ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+ *dst, 0);
+ ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+ DMA_CDB_SG_DST1, scf[1]);
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+ iter->unmap_len = len;
+
+ /*
+ * 3rd descriptor, multiply src[0] data and xor it
+ * with destination
+ */
+ iter = list_first_entry(&iter->chain_node,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ iter->hw_next = NULL;
+ if (flags & DMA_PREP_INTERRUPT)
+ set_bit(PPC440SPE_DESC_INT, &iter->flags);
+ else
+ clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+ src[0]);
+ ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+ *dst, 0);
+ ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+ DMA_CDB_SG_DST1, scf[0]);
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+ iter->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ }
+
+ spin_unlock_bh(&ppc440spe_chan->lock);
+
+ return sw_desc;
+}
+
+static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_pq(
+ struct ppc440spe_adma_chan *ppc440spe_chan,
+ dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
+ const unsigned char *scf, size_t len, unsigned long flags)
+{
+ int slot_cnt;
+ struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter;
+ unsigned long op = 0;
+ unsigned char mult = 1;
+
+ pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n",
+ __func__, dst_cnt, src_cnt, len);
+ /* select operations WXOR/RXOR depending on the
+ * source addresses of operators and the number
+ * of destinations (RXOR support only Q-parity calculations)
+ */
+ set_bit(PPC440SPE_DESC_WXOR, &op);
+ if (!test_and_set_bit(PPC440SPE_RXOR_RUN, &ppc440spe_rxor_state)) {
+ /* no active RXOR;
+ * do RXOR if:
+ * - there are more than 1 source,
+ * - len is aligned on 512-byte boundary,
+ * - source addresses fit to one of 4 possible regions.
+ */
+ if (src_cnt > 1 &&
+ !(len & MQ0_CF2H_RXOR_BS_MASK) &&
+ (src[0] + len) == src[1]) {
+ /* may do RXOR R1 R2 */
+ set_bit(PPC440SPE_DESC_RXOR, &op);
+ if (src_cnt != 2) {
+ /* may try to enhance region of RXOR */
+ if ((src[1] + len) == src[2]) {
+ /* do RXOR R1 R2 R3 */
+ set_bit(PPC440SPE_DESC_RXOR123,
+ &op);
+ } else if ((src[1] + len * 2) == src[2]) {
+ /* do RXOR R1 R2 R4 */
+ set_bit(PPC440SPE_DESC_RXOR124, &op);
+ } else if ((src[1] + len * 3) == src[2]) {
+ /* do RXOR R1 R2 R5 */
+ set_bit(PPC440SPE_DESC_RXOR125,
+ &op);
+ } else {
+ /* do RXOR R1 R2 */
+ set_bit(PPC440SPE_DESC_RXOR12,
+ &op);
+ }
+ } else {
+ /* do RXOR R1 R2 */
+ set_bit(PPC440SPE_DESC_RXOR12, &op);
+ }
+ }
+
+ if (!test_bit(PPC440SPE_DESC_RXOR, &op)) {
+ /* can not do this operation with RXOR */
+ clear_bit(PPC440SPE_RXOR_RUN,
+ &ppc440spe_rxor_state);
+ } else {
+ /* can do; set block size right now */
+ ppc440spe_desc_set_rxor_block_size(len);
+ }
+ }
+
+ /* Number of necessary slots depends on operation type selected */
+ if (!test_bit(PPC440SPE_DESC_RXOR, &op)) {
+ /* This is a WXOR only chain. Need descriptors for each
+ * source to GF-XOR them with WXOR, and need descriptors
+ * for each destination to zero them with WXOR
+ */
+ slot_cnt = src_cnt;
+
+ if (flags & DMA_PREP_ZERO_P) {
+ slot_cnt++;
+ set_bit(PPC440SPE_ZERO_P, &op);
+ }
+ if (flags & DMA_PREP_ZERO_Q) {
+ slot_cnt++;
+ set_bit(PPC440SPE_ZERO_Q, &op);
+ }
+ } else {
+ /* Need 1/2 descriptor for RXOR operation, and
+ * need (src_cnt - (2 or 3)) for WXOR of sources
+ * remained (if any)
+ */
+ slot_cnt = dst_cnt;
+
+ if (flags & DMA_PREP_ZERO_P)
+ set_bit(PPC440SPE_ZERO_P, &op);
+ if (flags & DMA_PREP_ZERO_Q)
+ set_bit(PPC440SPE_ZERO_Q, &op);
+
+ if (test_bit(PPC440SPE_DESC_RXOR12, &op))
+ slot_cnt += src_cnt - 2;
+ else
+ slot_cnt += src_cnt - 3;
+
+ /* Thus we have either RXOR only chain or
+ * mixed RXOR/WXOR
+ */
+ if (slot_cnt == dst_cnt)
+ /* RXOR only chain */
+ clear_bit(PPC440SPE_DESC_WXOR, &op);
+ }
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+ /* for both RXOR/WXOR each descriptor occupies one slot */
+ sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+ if (sw_desc) {
+ ppc440spe_desc_init_dma01pq(sw_desc, dst_cnt, src_cnt,
+ flags, op);
+
+ /* setup dst/src/mult */
+ pr_debug("%s: set dst descriptor 0, 1: 0x%016llx, 0x%016llx\n",
+ __func__, dst[0], dst[1]);
+ ppc440spe_adma_pq_set_dest(sw_desc, dst, flags);
+ while (src_cnt--) {
+ ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt],
+ src_cnt);
+
+ /* NOTE: "Multi = 0 is equivalent to = 1" as it
+ * stated in 440SPSPe_RAID6_Addendum_UM_1_17.pdf
+ * doesn't work for RXOR with DMA0/1! Instead, multi=0
+ * leads to zeroing source data after RXOR.
+ * So, for P case set-up mult=1 explicitly.
+ */
+ if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+ mult = scf[src_cnt];
+ ppc440spe_adma_pq_set_src_mult(sw_desc,
+ mult, src_cnt, dst_cnt - 1);
+ }
+
+ /* Setup byte count foreach slot just allocated */
+ sw_desc->async_tx.flags = flags;
+ list_for_each_entry(iter, &sw_desc->group_list,
+ chain_node) {
+ ppc440spe_desc_set_byte_count(iter,
+ ppc440spe_chan, len);
+ iter->unmap_len = len;
+ }
+ }
+ spin_unlock_bh(&ppc440spe_chan->lock);
+
+ return sw_desc;
+}
+
+static struct ppc440spe_adma_desc_slot *ppc440spe_dma2_prep_pq(
+ struct ppc440spe_adma_chan *ppc440spe_chan,
+ dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
+ const unsigned char *scf, size_t len, unsigned long flags)
+{
+ int slot_cnt, descs_per_op;
+ struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter;
+ unsigned long op = 0;
+ unsigned char mult = 1;
+
+ BUG_ON(!dst_cnt);
+ /*pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n",
+ __func__, dst_cnt, src_cnt, len);*/
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+ descs_per_op = ppc440spe_dma2_pq_slot_count(src, src_cnt, len);
+ if (descs_per_op < 0) {
+ spin_unlock_bh(&ppc440spe_chan->lock);
+ return NULL;
+ }
+
+ /* depending on number of sources we have 1 or 2 RXOR chains */
+ slot_cnt = descs_per_op * dst_cnt;
+
+ sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+ if (sw_desc) {
+ op = slot_cnt;
+ sw_desc->async_tx.flags = flags;
+ list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+ ppc440spe_desc_init_dma2pq(iter, dst_cnt, src_cnt,
+ --op ? 0 : flags);
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
+ len);
+ iter->unmap_len = len;
+
+ ppc440spe_init_rxor_cursor(&(iter->rxor_cursor));
+ iter->rxor_cursor.len = len;
+ iter->descs_per_op = descs_per_op;
+ }
+ op = 0;
+ list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+ op++;
+ if (op % descs_per_op == 0)
+ ppc440spe_adma_init_dma2rxor_slot(iter, src,
+ src_cnt);
+ if (likely(!list_is_last(&iter->chain_node,
+ &sw_desc->group_list))) {
+ /* set 'next' pointer */
+ iter->hw_next =
+ list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ ppc440spe_xor_set_link(iter, iter->hw_next);
+ } else {
+ /* this is the last descriptor. */
+ iter->hw_next = NULL;
+ }
+ }
+
+ /* fixup head descriptor */
+ sw_desc->dst_cnt = dst_cnt;
+ if (flags & DMA_PREP_ZERO_P)
+ set_bit(PPC440SPE_ZERO_P, &sw_desc->flags);
+ if (flags & DMA_PREP_ZERO_Q)
+ set_bit(PPC440SPE_ZERO_Q, &sw_desc->flags);
+
+ /* setup dst/src/mult */
+ ppc440spe_adma_pq_set_dest(sw_desc, dst, flags);
+
+ while (src_cnt--) {
+ /* handle descriptors (if dst_cnt == 2) inside
+ * the ppc440spe_adma_pq_set_srcxxx() functions
+ */
+ ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt],
+ src_cnt);
+ if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+ mult = scf[src_cnt];
+ ppc440spe_adma_pq_set_src_mult(sw_desc,
+ mult, src_cnt, dst_cnt - 1);
+ }
+ }
+ spin_unlock_bh(&ppc440spe_chan->lock);
+ ppc440spe_desc_set_rxor_block_size(len);
+ return sw_desc;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_pq - prepare CDB (group) for a GF-XOR operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pq(
+ struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf,
+ size_t len, unsigned long flags)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+ struct ppc440spe_adma_desc_slot *sw_desc = NULL;
+ int dst_cnt = 0;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+ ADMA_LL_DBG(prep_dma_pq_dbg(ppc440spe_chan->device->id,
+ dst, src, src_cnt));
+ BUG_ON(!len);
+ BUG_ON(unlikely(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT));
+ BUG_ON(!src_cnt);
+
+ if (src_cnt == 1 && dst[1] == src[0]) {
+ dma_addr_t dest[2];
+
+ /* dst[1] is real destination (Q) */
+ dest[0] = dst[1];
+ /* this is the page to multicast source data to */
+ dest[1] = ppc440spe_chan->qdest;
+ sw_desc = ppc440spe_dma01_prep_mult(ppc440spe_chan,
+ dest, 2, src, src_cnt, scf, len, flags);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+ }
+
+ if (src_cnt == 2 && dst[1] == src[1]) {
+ sw_desc = ppc440spe_dma01_prep_sum_product(ppc440spe_chan,
+ &dst[1], src, 2, scf, len, flags);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+ }
+
+ if (!(flags & DMA_PREP_PQ_DISABLE_P)) {
+ BUG_ON(!dst[0]);
+ dst_cnt++;
+ flags |= DMA_PREP_ZERO_P;
+ }
+
+ if (!(flags & DMA_PREP_PQ_DISABLE_Q)) {
+ BUG_ON(!dst[1]);
+ dst_cnt++;
+ flags |= DMA_PREP_ZERO_Q;
+ }
+
+ BUG_ON(!dst_cnt);
+
+ dev_dbg(ppc440spe_chan->device->common.dev,
+ "ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n",
+ ppc440spe_chan->device->id, __func__, src_cnt, len,
+ flags & DMA_PREP_INTERRUPT ? 1 : 0);
+
+ switch (ppc440spe_chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ sw_desc = ppc440spe_dma01_prep_pq(ppc440spe_chan,
+ dst, dst_cnt, src, src_cnt, scf,
+ len, flags);
+ break;
+
+ case PPC440SPE_XOR_ID:
+ sw_desc = ppc440spe_dma2_prep_pq(ppc440spe_chan,
+ dst, dst_cnt, src, src_cnt, scf,
+ len, flags);
+ break;
+ }
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_pqzero_sum - prepare CDB group for
+ * a PQ_ZERO_SUM operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pqzero_sum(
+ struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ enum sum_check_flags *pqres, unsigned long flags)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+ struct ppc440spe_adma_desc_slot *sw_desc, *iter;
+ dma_addr_t pdest, qdest;
+ int slot_cnt, slots_per_op, idst, dst_cnt;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+ if (flags & DMA_PREP_PQ_DISABLE_P)
+ pdest = 0;
+ else
+ pdest = pq[0];
+
+ if (flags & DMA_PREP_PQ_DISABLE_Q)
+ qdest = 0;
+ else
+ qdest = pq[1];
+
+ ADMA_LL_DBG(prep_dma_pqzero_sum_dbg(ppc440spe_chan->device->id,
+ src, src_cnt, scf));
+
+ /* Always use WXOR for P/Q calculations (two destinations).
+ * Need 1 or 2 extra slots to verify results are zero.
+ */
+ idst = dst_cnt = (pdest && qdest) ? 2 : 1;
+
+ /* One additional slot per destination to clone P/Q
+ * before calculation (we have to preserve destinations).
+ */
+ slot_cnt = src_cnt + dst_cnt * 2;
+ slots_per_op = 1;
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+ sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+ slots_per_op);
+ if (sw_desc) {
+ ppc440spe_desc_init_dma01pqzero_sum(sw_desc, dst_cnt, src_cnt);
+
+ /* Setup byte count for each slot just allocated */
+ sw_desc->async_tx.flags = flags;
+ list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
+ len);
+ iter->unmap_len = len;
+ }
+
+ if (pdest) {
+ struct dma_cdb *hw_desc;
+ struct ppc440spe_adma_chan *chan;
+
+ iter = sw_desc->group_head;
+ chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ iter->src_cnt = 0;
+ iter->dst_cnt = 0;
+ ppc440spe_desc_set_dest_addr(iter, chan, 0,
+ ppc440spe_chan->pdest, 0);
+ ppc440spe_desc_set_src_addr(iter, chan, 0, 0, pdest);
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
+ len);
+ iter->unmap_len = 0;
+ /* override pdest to preserve original P */
+ pdest = ppc440spe_chan->pdest;
+ }
+ if (qdest) {
+ struct dma_cdb *hw_desc;
+ struct ppc440spe_adma_chan *chan;
+
+ iter = list_first_entry(&sw_desc->group_list,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
+
+ if (pdest) {
+ iter = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ }
+
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ iter->src_cnt = 0;
+ iter->dst_cnt = 0;
+ ppc440spe_desc_set_dest_addr(iter, chan, 0,
+ ppc440spe_chan->qdest, 0);
+ ppc440spe_desc_set_src_addr(iter, chan, 0, 0, qdest);
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
+ len);
+ iter->unmap_len = 0;
+ /* override qdest to preserve original Q */
+ qdest = ppc440spe_chan->qdest;
+ }
+
+ /* Setup destinations for P/Q ops */
+ ppc440spe_adma_pqzero_sum_set_dest(sw_desc, pdest, qdest);
+
+ /* Setup zero QWORDs into DCHECK CDBs */
+ idst = dst_cnt;
+ list_for_each_entry_reverse(iter, &sw_desc->group_list,
+ chain_node) {
+ /*
+ * The last CDB corresponds to Q-parity check,
+ * the one before last CDB corresponds
+ * P-parity check
+ */
+ if (idst == DMA_DEST_MAX_NUM) {
+ if (idst == dst_cnt) {
+ set_bit(PPC440SPE_DESC_QCHECK,
+ &iter->flags);
+ } else {
+ set_bit(PPC440SPE_DESC_PCHECK,
+ &iter->flags);
+ }
+ } else {
+ if (qdest) {
+ set_bit(PPC440SPE_DESC_QCHECK,
+ &iter->flags);
+ } else {
+ set_bit(PPC440SPE_DESC_PCHECK,
+ &iter->flags);
+ }
+ }
+ iter->xor_check_result = pqres;
+
+ /*
+ * set it to zero, if check fail then result will
+ * be updated
+ */
+ *iter->xor_check_result = 0;
+ ppc440spe_desc_set_dcheck(iter, ppc440spe_chan,
+ ppc440spe_qword);
+
+ if (!(--dst_cnt))
+ break;
+ }
+
+ /* Setup sources and mults for P/Q ops */
+ list_for_each_entry_continue_reverse(iter, &sw_desc->group_list,
+ chain_node) {
+ struct ppc440spe_adma_chan *chan;
+ u32 mult_dst;
+
+ chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
+ ppc440spe_desc_set_src_addr(iter, chan, 0,
+ DMA_CUED_XOR_HB,
+ src[src_cnt - 1]);
+ if (qdest) {
+ mult_dst = (dst_cnt - 1) ? DMA_CDB_SG_DST2 :
+ DMA_CDB_SG_DST1;
+ ppc440spe_desc_set_src_mult(iter, chan,
+ DMA_CUED_MULT1_OFF,
+ mult_dst,
+ scf[src_cnt - 1]);
+ }
+ if (!(--src_cnt))
+ break;
+ }
+ }
+ spin_unlock_bh(&ppc440spe_chan->lock);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_xor_zero_sum - prepare CDB group for
+ * XOR ZERO_SUM operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor_zero_sum(
+ struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
+ size_t len, enum sum_check_flags *result, unsigned long flags)
+{
+ struct dma_async_tx_descriptor *tx;
+ dma_addr_t pq[2];
+
+ /* validate P, disable Q */
+ pq[0] = src[0];
+ pq[1] = 0;
+ flags |= DMA_PREP_PQ_DISABLE_Q;
+
+ tx = ppc440spe_adma_prep_dma_pqzero_sum(chan, pq, &src[1],
+ src_cnt - 1, 0, len,
+ result, flags);
+ return tx;
+}
+
+/**
+ * ppc440spe_adma_set_dest - set destination address into descriptor
+ */
+static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *sw_desc,
+ dma_addr_t addr, int index)
+{
+ struct ppc440spe_adma_chan *chan;
+
+ BUG_ON(index >= sw_desc->dst_cnt);
+
+ chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ /* to do: support transfers lengths >
+ * PPC440SPE_ADMA_DMA/XOR_MAX_BYTE_COUNT
+ */
+ ppc440spe_desc_set_dest_addr(sw_desc->group_head,
+ chan, 0, addr, index);
+ break;
+ case PPC440SPE_XOR_ID:
+ sw_desc = ppc440spe_get_group_entry(sw_desc, index);
+ ppc440spe_desc_set_dest_addr(sw_desc,
+ chan, 0, addr, index);
+ break;
+ }
+}
+
+static void ppc440spe_adma_pq_zero_op(struct ppc440spe_adma_desc_slot *iter,
+ struct ppc440spe_adma_chan *chan, dma_addr_t addr)
+{
+ /* To clear destinations update the descriptor
+ * (P or Q depending on index) as follows:
+ * addr is destination (0 corresponds to SG2):
+ */
+ ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, addr, 0);
+
+ /* ... and the addr is source: */
+ ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, addr);
+
+ /* addr is always SG2 then the mult is always DST1 */
+ ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+ DMA_CDB_SG_DST1, 1);
+}
+
+/**
+ * ppc440spe_adma_pq_set_dest - set destination address into descriptor
+ * for the PQXOR operation
+ */
+static void ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *sw_desc,
+ dma_addr_t *addrs, unsigned long flags)
+{
+ struct ppc440spe_adma_desc_slot *iter;
+ struct ppc440spe_adma_chan *chan;
+ dma_addr_t paddr, qaddr;
+ dma_addr_t addr = 0, ppath, qpath;
+ int index = 0, i;
+
+ chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+ if (flags & DMA_PREP_PQ_DISABLE_P)
+ paddr = 0;
+ else
+ paddr = addrs[0];
+
+ if (flags & DMA_PREP_PQ_DISABLE_Q)
+ qaddr = 0;
+ else
+ qaddr = addrs[1];
+
+ if (!paddr || !qaddr)
+ addr = paddr ? paddr : qaddr;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ /* walk through the WXOR source list and set P/Q-destinations
+ * for each slot:
+ */
+ if (!test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
+ /* This is WXOR-only chain; may have 1/2 zero descs */
+ if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
+ index++;
+ if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
+ index++;
+
+ iter = ppc440spe_get_group_entry(sw_desc, index);
+ if (addr) {
+ /* one destination */
+ list_for_each_entry_from(iter,
+ &sw_desc->group_list, chain_node)
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, addr, 0);
+ } else {
+ /* two destinations */
+ list_for_each_entry_from(iter,
+ &sw_desc->group_list, chain_node) {
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, paddr, 0);
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, qaddr, 1);
+ }
+ }
+
+ if (index) {
+ /* To clear destinations update the descriptor
+ * (1st,2nd, or both depending on flags)
+ */
+ index = 0;
+ if (test_bit(PPC440SPE_ZERO_P,
+ &sw_desc->flags)) {
+ iter = ppc440spe_get_group_entry(
+ sw_desc, index++);
+ ppc440spe_adma_pq_zero_op(iter, chan,
+ paddr);
+ }
+
+ if (test_bit(PPC440SPE_ZERO_Q,
+ &sw_desc->flags)) {
+ iter = ppc440spe_get_group_entry(
+ sw_desc, index++);
+ ppc440spe_adma_pq_zero_op(iter, chan,
+ qaddr);
+ }
+
+ return;
+ }
+ } else {
+ /* This is RXOR-only or RXOR/WXOR mixed chain */
+
+ /* If we want to include destination into calculations,
+ * then make dest addresses cued with mult=1 (XOR).
+ */
+ ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ?
+ DMA_CUED_XOR_HB :
+ DMA_CUED_XOR_BASE |
+ (1 << DMA_CUED_MULT1_OFF);
+ qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ?
+ DMA_CUED_XOR_HB :
+ DMA_CUED_XOR_BASE |
+ (1 << DMA_CUED_MULT1_OFF);
+
+ /* Setup destination(s) in RXOR slot(s) */
+ iter = ppc440spe_get_group_entry(sw_desc, index++);
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ paddr ? ppath : qpath,
+ paddr ? paddr : qaddr, 0);
+ if (!addr) {
+ /* two destinations */
+ iter = ppc440spe_get_group_entry(sw_desc,
+ index++);
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ qpath, qaddr, 0);
+ }
+
+ if (test_bit(PPC440SPE_DESC_WXOR, &sw_desc->flags)) {
+ /* Setup destination(s) in remaining WXOR
+ * slots
+ */
+ iter = ppc440spe_get_group_entry(sw_desc,
+ index);
+ if (addr) {
+ /* one destination */
+ list_for_each_entry_from(iter,
+ &sw_desc->group_list,
+ chain_node)
+ ppc440spe_desc_set_dest_addr(
+ iter, chan,
+ DMA_CUED_XOR_BASE,
+ addr, 0);
+
+ } else {
+ /* two destinations */
+ list_for_each_entry_from(iter,
+ &sw_desc->group_list,
+ chain_node) {
+ ppc440spe_desc_set_dest_addr(
+ iter, chan,
+ DMA_CUED_XOR_BASE,
+ paddr, 0);
+ ppc440spe_desc_set_dest_addr(
+ iter, chan,
+ DMA_CUED_XOR_BASE,
+ qaddr, 1);
+ }
+ }
+ }
+
+ }
+ break;
+
+ case PPC440SPE_XOR_ID:
+ /* DMA2 descriptors have only 1 destination, so there are
+ * two chains - one for each dest.
+ * If we want to include destination into calculations,
+ * then make dest addresses cued with mult=1 (XOR).
+ */
+ ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ?
+ DMA_CUED_XOR_HB :
+ DMA_CUED_XOR_BASE |
+ (1 << DMA_CUED_MULT1_OFF);
+
+ qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ?
+ DMA_CUED_XOR_HB :
+ DMA_CUED_XOR_BASE |
+ (1 << DMA_CUED_MULT1_OFF);
+
+ iter = ppc440spe_get_group_entry(sw_desc, 0);
+ for (i = 0; i < sw_desc->descs_per_op; i++) {
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ paddr ? ppath : qpath,
+ paddr ? paddr : qaddr, 0);
+ iter = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ }
+
+ if (!addr) {
+ /* Two destinations; setup Q here */
+ iter = ppc440spe_get_group_entry(sw_desc,
+ sw_desc->descs_per_op);
+ for (i = 0; i < sw_desc->descs_per_op; i++) {
+ ppc440spe_desc_set_dest_addr(iter,
+ chan, qpath, qaddr, 0);
+ iter = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ }
+ }
+
+ break;
+ }
+}
+
+/**
+ * ppc440spe_adma_pq_zero_sum_set_dest - set destination address into descriptor
+ * for the PQ_ZERO_SUM operation
+ */
+static void ppc440spe_adma_pqzero_sum_set_dest(
+ struct ppc440spe_adma_desc_slot *sw_desc,
+ dma_addr_t paddr, dma_addr_t qaddr)
+{
+ struct ppc440spe_adma_desc_slot *iter, *end;
+ struct ppc440spe_adma_chan *chan;
+ dma_addr_t addr = 0;
+ int idx;
+
+ chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+ /* walk through the WXOR source list and set P/Q-destinations
+ * for each slot
+ */
+ idx = (paddr && qaddr) ? 2 : 1;
+ /* set end */
+ list_for_each_entry_reverse(end, &sw_desc->group_list,
+ chain_node) {
+ if (!(--idx))
+ break;
+ }
+ /* set start */
+ idx = (paddr && qaddr) ? 2 : 1;
+ iter = ppc440spe_get_group_entry(sw_desc, idx);
+
+ if (paddr && qaddr) {
+ /* two destinations */
+ list_for_each_entry_from(iter, &sw_desc->group_list,
+ chain_node) {
+ if (unlikely(iter == end))
+ break;
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, paddr, 0);
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, qaddr, 1);
+ }
+ } else {
+ /* one destination */
+ addr = paddr ? paddr : qaddr;
+ list_for_each_entry_from(iter, &sw_desc->group_list,
+ chain_node) {
+ if (unlikely(iter == end))
+ break;
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, addr, 0);
+ }
+ }
+
+ /* The remaining descriptors are DATACHECK. These have no need in
+ * destination. Actually, these destinations are used there
+ * as sources for check operation. So, set addr as source.
+ */
+ ppc440spe_desc_set_src_addr(end, chan, 0, 0, addr ? addr : paddr);
+
+ if (!addr) {
+ end = list_entry(end->chain_node.next,
+ struct ppc440spe_adma_desc_slot, chain_node);
+ ppc440spe_desc_set_src_addr(end, chan, 0, 0, qaddr);
+ }
+}
+
+/**
+ * ppc440spe_desc_set_xor_src_cnt - set source count into descriptor
+ */
+static inline void ppc440spe_desc_set_xor_src_cnt(
+ struct ppc440spe_adma_desc_slot *desc,
+ int src_cnt)
+{
+ struct xor_cb *hw_desc = desc->hw_desc;
+
+ hw_desc->cbc &= ~XOR_CDCR_OAC_MSK;
+ hw_desc->cbc |= src_cnt;
+}
+
+/**
+ * ppc440spe_adma_pq_set_src - set source address into descriptor
+ */
+static void ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *sw_desc,
+ dma_addr_t addr, int index)
+{
+ struct ppc440spe_adma_chan *chan;
+ dma_addr_t haddr = 0;
+ struct ppc440spe_adma_desc_slot *iter = NULL;
+
+ chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ /* DMA0,1 may do: WXOR, RXOR, RXOR+WXORs chain
+ */
+ if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
+ /* RXOR-only or RXOR/WXOR operation */
+ int iskip = test_bit(PPC440SPE_DESC_RXOR12,
+ &sw_desc->flags) ? 2 : 3;
+
+ if (index == 0) {
+ /* 1st slot (RXOR) */
+ /* setup sources region (R1-2-3, R1-2-4,
+ * or R1-2-5)
+ */
+ if (test_bit(PPC440SPE_DESC_RXOR12,
+ &sw_desc->flags))
+ haddr = DMA_RXOR12 <<
+ DMA_CUED_REGION_OFF;
+ else if (test_bit(PPC440SPE_DESC_RXOR123,
+ &sw_desc->flags))
+ haddr = DMA_RXOR123 <<
+ DMA_CUED_REGION_OFF;
+ else if (test_bit(PPC440SPE_DESC_RXOR124,
+ &sw_desc->flags))
+ haddr = DMA_RXOR124 <<
+ DMA_CUED_REGION_OFF;
+ else if (test_bit(PPC440SPE_DESC_RXOR125,
+ &sw_desc->flags))
+ haddr = DMA_RXOR125 <<
+ DMA_CUED_REGION_OFF;
+ else
+ BUG();
+ haddr |= DMA_CUED_XOR_BASE;
+ iter = ppc440spe_get_group_entry(sw_desc, 0);
+ } else if (index < iskip) {
+ /* 1st slot (RXOR)
+ * shall actually set source address only once
+ * instead of first <iskip>
+ */
+ iter = NULL;
+ } else {
+ /* 2nd/3d and next slots (WXOR);
+ * skip first slot with RXOR
+ */
+ haddr = DMA_CUED_XOR_HB;
+ iter = ppc440spe_get_group_entry(sw_desc,
+ index - iskip + sw_desc->dst_cnt);
+ }
+ } else {
+ int znum = 0;
+
+ /* WXOR-only operation; skip first slots with
+ * zeroing destinations
+ */
+ if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
+ znum++;
+ if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
+ znum++;
+
+ haddr = DMA_CUED_XOR_HB;
+ iter = ppc440spe_get_group_entry(sw_desc,
+ index + znum);
+ }
+
+ if (likely(iter)) {
+ ppc440spe_desc_set_src_addr(iter, chan, 0, haddr, addr);
+
+ if (!index &&
+ test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags) &&
+ sw_desc->dst_cnt == 2) {
+ /* if we have two destinations for RXOR, then
+ * setup source in the second descr too
+ */
+ iter = ppc440spe_get_group_entry(sw_desc, 1);
+ ppc440spe_desc_set_src_addr(iter, chan, 0,
+ haddr, addr);
+ }
+ }
+ break;
+
+ case PPC440SPE_XOR_ID:
+ /* DMA2 may do Biskup */
+ iter = sw_desc->group_head;
+ if (iter->dst_cnt == 2) {
+ /* both P & Q calculations required; set P src here */
+ ppc440spe_adma_dma2rxor_set_src(iter, index, addr);
+
+ /* this is for Q */
+ iter = ppc440spe_get_group_entry(sw_desc,
+ sw_desc->descs_per_op);
+ }
+ ppc440spe_adma_dma2rxor_set_src(iter, index, addr);
+ break;
+ }
+}
+
+/**
+ * ppc440spe_adma_memcpy_xor_set_src - set source address into descriptor
+ */
+static void ppc440spe_adma_memcpy_xor_set_src(
+ struct ppc440spe_adma_desc_slot *sw_desc,
+ dma_addr_t addr, int index)
+{
+ struct ppc440spe_adma_chan *chan;
+
+ chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+ sw_desc = sw_desc->group_head;
+
+ if (likely(sw_desc))
+ ppc440spe_desc_set_src_addr(sw_desc, chan, index, 0, addr);
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_inc_addr -
+ */
+static void ppc440spe_adma_dma2rxor_inc_addr(
+ struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_rxor *cursor, int index, int src_cnt)
+{
+ cursor->addr_count++;
+ if (index == src_cnt - 1) {
+ ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count);
+ } else if (cursor->addr_count == XOR_MAX_OPS) {
+ ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count);
+ cursor->addr_count = 0;
+ cursor->desc_count++;
+ }
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_prep_src - setup RXOR types in DMA2 CDB
+ */
+static int ppc440spe_adma_dma2rxor_prep_src(
+ struct ppc440spe_adma_desc_slot *hdesc,
+ struct ppc440spe_rxor *cursor, int index,
+ int src_cnt, u32 addr)
+{
+ int rval = 0;
+ u32 sign;
+ struct ppc440spe_adma_desc_slot *desc = hdesc;
+ int i;
+
+ for (i = 0; i < cursor->desc_count; i++) {
+ desc = list_entry(hdesc->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ }
+
+ switch (cursor->state) {
+ case 0:
+ if (addr == cursor->addrl + cursor->len) {
+ /* direct RXOR */
+ cursor->state = 1;
+ cursor->xor_count++;
+ if (index == src_cnt-1) {
+ ppc440spe_rxor_set_region(desc,
+ cursor->addr_count,
+ DMA_RXOR12 << DMA_CUED_REGION_OFF);
+ ppc440spe_adma_dma2rxor_inc_addr(
+ desc, cursor, index, src_cnt);
+ }
+ } else if (cursor->addrl == addr + cursor->len) {
+ /* reverse RXOR */
+ cursor->state = 1;
+ cursor->xor_count++;
+ set_bit(cursor->addr_count, &desc->reverse_flags[0]);
+ if (index == src_cnt-1) {
+ ppc440spe_rxor_set_region(desc,
+ cursor->addr_count,
+ DMA_RXOR12 << DMA_CUED_REGION_OFF);
+ ppc440spe_adma_dma2rxor_inc_addr(
+ desc, cursor, index, src_cnt);
+ }
+ } else {
+ printk(KERN_ERR "Cannot build "
+ "DMA2 RXOR command block.\n");
+ BUG();
+ }
+ break;
+ case 1:
+ sign = test_bit(cursor->addr_count,
+ desc->reverse_flags)
+ ? -1 : 1;
+ if (index == src_cnt-2 || (sign == -1
+ && addr != cursor->addrl - 2*cursor->len)) {
+ cursor->state = 0;
+ cursor->xor_count = 1;
+ cursor->addrl = addr;
+ ppc440spe_rxor_set_region(desc,
+ cursor->addr_count,
+ DMA_RXOR12 << DMA_CUED_REGION_OFF);
+ ppc440spe_adma_dma2rxor_inc_addr(
+ desc, cursor, index, src_cnt);
+ } else if (addr == cursor->addrl + 2*sign*cursor->len) {
+ cursor->state = 2;
+ cursor->xor_count = 0;
+ ppc440spe_rxor_set_region(desc,
+ cursor->addr_count,
+ DMA_RXOR123 << DMA_CUED_REGION_OFF);
+ if (index == src_cnt-1) {
+ ppc440spe_adma_dma2rxor_inc_addr(
+ desc, cursor, index, src_cnt);
+ }
+ } else if (addr == cursor->addrl + 3*cursor->len) {
+ cursor->state = 2;
+ cursor->xor_count = 0;
+ ppc440spe_rxor_set_region(desc,
+ cursor->addr_count,
+ DMA_RXOR124 << DMA_CUED_REGION_OFF);
+ if (index == src_cnt-1) {
+ ppc440spe_adma_dma2rxor_inc_addr(
+ desc, cursor, index, src_cnt);
+ }
+ } else if (addr == cursor->addrl + 4*cursor->len) {
+ cursor->state = 2;
+ cursor->xor_count = 0;
+ ppc440spe_rxor_set_region(desc,
+ cursor->addr_count,
+ DMA_RXOR125 << DMA_CUED_REGION_OFF);
+ if (index == src_cnt-1) {
+ ppc440spe_adma_dma2rxor_inc_addr(
+ desc, cursor, index, src_cnt);
+ }
+ } else {
+ cursor->state = 0;
+ cursor->xor_count = 1;
+ cursor->addrl = addr;
+ ppc440spe_rxor_set_region(desc,
+ cursor->addr_count,
+ DMA_RXOR12 << DMA_CUED_REGION_OFF);
+ ppc440spe_adma_dma2rxor_inc_addr(
+ desc, cursor, index, src_cnt);
+ }
+ break;
+ case 2:
+ cursor->state = 0;
+ cursor->addrl = addr;
+ cursor->xor_count++;
+ if (index) {
+ ppc440spe_adma_dma2rxor_inc_addr(
+ desc, cursor, index, src_cnt);
+ }
+ break;
+ }
+
+ return rval;
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_set_src - set RXOR source address; it's assumed that
+ * ppc440spe_adma_dma2rxor_prep_src() has already done prior this call
+ */
+static void ppc440spe_adma_dma2rxor_set_src(
+ struct ppc440spe_adma_desc_slot *desc,
+ int index, dma_addr_t addr)
+{
+ struct xor_cb *xcb = desc->hw_desc;
+ int k = 0, op = 0, lop = 0;
+
+ /* get the RXOR operand which corresponds to index addr */
+ while (op <= index) {
+ lop = op;
+ if (k == XOR_MAX_OPS) {
+ k = 0;
+ desc = list_entry(desc->chain_node.next,
+ struct ppc440spe_adma_desc_slot, chain_node);
+ xcb = desc->hw_desc;
+
+ }
+ if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) ==
+ (DMA_RXOR12 << DMA_CUED_REGION_OFF))
+ op += 2;
+ else
+ op += 3;
+ }
+
+ BUG_ON(k < 1);
+
+ if (test_bit(k-1, desc->reverse_flags)) {
+ /* reverse operand order; put last op in RXOR group */
+ if (index == op - 1)
+ ppc440spe_rxor_set_src(desc, k - 1, addr);
+ } else {
+ /* direct operand order; put first op in RXOR group */
+ if (index == lop)
+ ppc440spe_rxor_set_src(desc, k - 1, addr);
+ }
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_set_mult - set RXOR multipliers; it's assumed that
+ * ppc440spe_adma_dma2rxor_prep_src() has already done prior this call
+ */
+static void ppc440spe_adma_dma2rxor_set_mult(
+ struct ppc440spe_adma_desc_slot *desc,
+ int index, u8 mult)
+{
+ struct xor_cb *xcb = desc->hw_desc;
+ int k = 0, op = 0, lop = 0;
+
+ /* get the RXOR operand which corresponds to index mult */
+ while (op <= index) {
+ lop = op;
+ if (k == XOR_MAX_OPS) {
+ k = 0;
+ desc = list_entry(desc->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ xcb = desc->hw_desc;
+
+ }
+ if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) ==
+ (DMA_RXOR12 << DMA_CUED_REGION_OFF))
+ op += 2;
+ else
+ op += 3;
+ }
+
+ BUG_ON(k < 1);
+ if (test_bit(k-1, desc->reverse_flags)) {
+ /* reverse order */
+ ppc440spe_rxor_set_mult(desc, k - 1, op - index - 1, mult);
+ } else {
+ /* direct order */
+ ppc440spe_rxor_set_mult(desc, k - 1, index - lop, mult);
+ }
+}
+
+/**
+ * ppc440spe_init_rxor_cursor -
+ */
+static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor)
+{
+ memset(cursor, 0, sizeof(struct ppc440spe_rxor));
+ cursor->state = 2;
+}
+
+/**
+ * ppc440spe_adma_pq_set_src_mult - set multiplication coefficient into
+ * descriptor for the PQXOR operation
+ */
+static void ppc440spe_adma_pq_set_src_mult(
+ struct ppc440spe_adma_desc_slot *sw_desc,
+ unsigned char mult, int index, int dst_pos)
+{
+ struct ppc440spe_adma_chan *chan;
+ u32 mult_idx, mult_dst;
+ struct ppc440spe_adma_desc_slot *iter = NULL, *iter1 = NULL;
+
+ chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
+ int region = test_bit(PPC440SPE_DESC_RXOR12,
+ &sw_desc->flags) ? 2 : 3;
+
+ if (index < region) {
+ /* RXOR multipliers */
+ iter = ppc440spe_get_group_entry(sw_desc,
+ sw_desc->dst_cnt - 1);
+ if (sw_desc->dst_cnt == 2)
+ iter1 = ppc440spe_get_group_entry(
+ sw_desc, 0);
+
+ mult_idx = DMA_CUED_MULT1_OFF + (index << 3);
+ mult_dst = DMA_CDB_SG_SRC;
+ } else {
+ /* WXOR multiplier */
+ iter = ppc440spe_get_group_entry(sw_desc,
+ index - region +
+ sw_desc->dst_cnt);
+ mult_idx = DMA_CUED_MULT1_OFF;
+ mult_dst = dst_pos ? DMA_CDB_SG_DST2 :
+ DMA_CDB_SG_DST1;
+ }
+ } else {
+ int znum = 0;
+
+ /* WXOR-only;
+ * skip first slots with destinations (if ZERO_DST has
+ * place)
+ */
+ if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
+ znum++;
+ if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
+ znum++;
+
+ iter = ppc440spe_get_group_entry(sw_desc, index + znum);
+ mult_idx = DMA_CUED_MULT1_OFF;
+ mult_dst = dst_pos ? DMA_CDB_SG_DST2 : DMA_CDB_SG_DST1;
+ }
+
+ if (likely(iter)) {
+ ppc440spe_desc_set_src_mult(iter, chan,
+ mult_idx, mult_dst, mult);
+
+ if (unlikely(iter1)) {
+ /* if we have two destinations for RXOR, then
+ * we've just set Q mult. Set-up P now.
+ */
+ ppc440spe_desc_set_src_mult(iter1, chan,
+ mult_idx, mult_dst, 1);
+ }
+
+ }
+ break;
+
+ case PPC440SPE_XOR_ID:
+ iter = sw_desc->group_head;
+ if (sw_desc->dst_cnt == 2) {
+ /* both P & Q calculations required; set P mult here */
+ ppc440spe_adma_dma2rxor_set_mult(iter, index, 1);
+
+ /* and then set Q mult */
+ iter = ppc440spe_get_group_entry(sw_desc,
+ sw_desc->descs_per_op);
+ }
+ ppc440spe_adma_dma2rxor_set_mult(iter, index, mult);
+ break;
+ }
+}
+
+/**
+ * ppc440spe_adma_free_chan_resources - free the resources allocated
+ */
+static void ppc440spe_adma_free_chan_resources(struct dma_chan *chan)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+ struct ppc440spe_adma_desc_slot *iter, *_iter;
+ int in_use_descs = 0;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+ ppc440spe_adma_slot_cleanup(ppc440spe_chan);
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+ list_for_each_entry_safe(iter, _iter, &ppc440spe_chan->chain,
+ chain_node) {
+ in_use_descs++;
+ list_del(&iter->chain_node);
+ }
+ list_for_each_entry_safe_reverse(iter, _iter,
+ &ppc440spe_chan->all_slots, slot_node) {
+ list_del(&iter->slot_node);
+ kfree(iter);
+ ppc440spe_chan->slots_allocated--;
+ }
+ ppc440spe_chan->last_used = NULL;
+
+ dev_dbg(ppc440spe_chan->device->common.dev,
+ "ppc440spe adma%d %s slots_allocated %d\n",
+ ppc440spe_chan->device->id,
+ __func__, ppc440spe_chan->slots_allocated);
+ spin_unlock_bh(&ppc440spe_chan->lock);
+
+ /* one is ok since we left it on there on purpose */
+ if (in_use_descs > 1)
+ printk(KERN_ERR "SPE: Freeing %d in use descriptors!\n",
+ in_use_descs - 1);
+}
+
+/**
+ * ppc440spe_adma_is_complete - poll the status of an ADMA transaction
+ * @chan: ADMA channel handle
+ * @cookie: ADMA transaction identifier
+ */
+static enum dma_status ppc440spe_adma_is_complete(struct dma_chan *chan,
+ dma_cookie_t cookie, dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+ enum dma_status ret;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+ last_used = chan->cookie;
+ last_complete = ppc440spe_chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ if (ret == DMA_SUCCESS)
+ return ret;
+
+ ppc440spe_adma_slot_cleanup(ppc440spe_chan);
+
+ last_used = chan->cookie;
+ last_complete = ppc440spe_chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+/**
+ * ppc440spe_adma_eot_handler - end of transfer interrupt handler
+ */
+static irqreturn_t ppc440spe_adma_eot_handler(int irq, void *data)
+{
+ struct ppc440spe_adma_chan *chan = data;
+
+ dev_dbg(chan->device->common.dev,
+ "ppc440spe adma%d: %s\n", chan->device->id, __func__);
+
+ tasklet_schedule(&chan->irq_tasklet);
+ ppc440spe_adma_device_clear_eot_status(chan);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * ppc440spe_adma_err_handler - DMA error interrupt handler;
+ * do the same things as a eot handler
+ */
+static irqreturn_t ppc440spe_adma_err_handler(int irq, void *data)
+{
+ struct ppc440spe_adma_chan *chan = data;
+
+ dev_dbg(chan->device->common.dev,
+ "ppc440spe adma%d: %s\n", chan->device->id, __func__);
+
+ tasklet_schedule(&chan->irq_tasklet);
+ ppc440spe_adma_device_clear_eot_status(chan);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * ppc440spe_test_callback - called when test operation has been done
+ */
+static void ppc440spe_test_callback(void *unused)
+{
+ complete(&ppc440spe_r6_test_comp);
+}
+
+/**
+ * ppc440spe_adma_issue_pending - flush all pending descriptors to h/w
+ */
+static void ppc440spe_adma_issue_pending(struct dma_chan *chan)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+ dev_dbg(ppc440spe_chan->device->common.dev,
+ "ppc440spe adma%d: %s %d \n", ppc440spe_chan->device->id,
+ __func__, ppc440spe_chan->pending);
+
+ if (ppc440spe_chan->pending) {
+ ppc440spe_chan->pending = 0;
+ ppc440spe_chan_append(ppc440spe_chan);
+ }
+}
+
+/**
+ * ppc440spe_chan_start_null_xor - initiate the first XOR operation (DMA engines
+ * use FIFOs (as opposite to chains used in XOR) so this is a XOR
+ * specific operation)
+ */
+static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan)
+{
+ struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+ dma_cookie_t cookie;
+ int slot_cnt, slots_per_op;
+
+ dev_dbg(chan->device->common.dev,
+ "ppc440spe adma%d: %s\n", chan->device->id, __func__);
+
+ spin_lock_bh(&chan->lock);
+ slot_cnt = ppc440spe_chan_xor_slot_count(0, 2, &slots_per_op);
+ sw_desc = ppc440spe_adma_alloc_slots(chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ group_start = sw_desc->group_head;
+ list_splice_init(&sw_desc->group_list, &chan->chain);
+ async_tx_ack(&sw_desc->async_tx);
+ ppc440spe_desc_init_null_xor(group_start);
+
+ cookie = chan->common.cookie;
+ cookie++;
+ if (cookie <= 1)
+ cookie = 2;
+
+ /* initialize the completed cookie to be less than
+ * the most recently used cookie
+ */
+ chan->completed_cookie = cookie - 1;
+ chan->common.cookie = sw_desc->async_tx.cookie = cookie;
+
+ /* channel should not be busy */
+ BUG_ON(ppc440spe_chan_is_busy(chan));
+
+ /* set the descriptor address */
+ ppc440spe_chan_set_first_xor_descriptor(chan, sw_desc);
+
+ /* run the descriptor */
+ ppc440spe_chan_run(chan);
+ } else
+ printk(KERN_ERR "ppc440spe adma%d"
+ " failed to allocate null descriptor\n",
+ chan->device->id);
+ spin_unlock_bh(&chan->lock);
+}
+
+/**
+ * ppc440spe_test_raid6 - test are RAID-6 capabilities enabled successfully.
+ * For this we just perform one WXOR operation with the same source
+ * and destination addresses, the GF-multiplier is 1; so if RAID-6
+ * capabilities are enabled then we'll get src/dst filled with zero.
+ */
+static int ppc440spe_test_raid6(struct ppc440spe_adma_chan *chan)
+{
+ struct ppc440spe_adma_desc_slot *sw_desc, *iter;
+ struct page *pg;
+ char *a;
+ dma_addr_t dma_addr, addrs[2];
+ unsigned long op = 0;
+ int rval = 0;
+
+ set_bit(PPC440SPE_DESC_WXOR, &op);
+
+ pg = alloc_page(GFP_KERNEL);
+ if (!pg)
+ return -ENOMEM;
+
+ spin_lock_bh(&chan->lock);
+ sw_desc = ppc440spe_adma_alloc_slots(chan, 1, 1);
+ if (sw_desc) {
+ /* 1 src, 1 dsr, int_ena, WXOR */
+ ppc440spe_desc_init_dma01pq(sw_desc, 1, 1, 1, op);
+ list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+ ppc440spe_desc_set_byte_count(iter, chan, PAGE_SIZE);
+ iter->unmap_len = PAGE_SIZE;
+ }
+ } else {
+ rval = -EFAULT;
+ spin_unlock_bh(&chan->lock);
+ goto exit;
+ }
+ spin_unlock_bh(&chan->lock);
+
+ /* Fill the test page with ones */
+ memset(page_address(pg), 0xFF, PAGE_SIZE);
+ dma_addr = dma_map_page(chan->device->dev, pg, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+ /* Setup addresses */
+ ppc440spe_adma_pq_set_src(sw_desc, dma_addr, 0);
+ ppc440spe_adma_pq_set_src_mult(sw_desc, 1, 0, 0);
+ addrs[0] = dma_addr;
+ addrs[1] = 0;
+ ppc440spe_adma_pq_set_dest(sw_desc, addrs, DMA_PREP_PQ_DISABLE_Q);
+
+ async_tx_ack(&sw_desc->async_tx);
+ sw_desc->async_tx.callback = ppc440spe_test_callback;
+ sw_desc->async_tx.callback_param = NULL;
+
+ init_completion(&ppc440spe_r6_test_comp);
+
+ ppc440spe_adma_tx_submit(&sw_desc->async_tx);
+ ppc440spe_adma_issue_pending(&chan->common);
+
+ wait_for_completion(&ppc440spe_r6_test_comp);
+
+ /* Now check if the test page is zeroed */
+ a = page_address(pg);
+ if ((*(u32 *)a) == 0 && memcmp(a, a+4, PAGE_SIZE-4) == 0) {
+ /* page is zero - RAID-6 enabled */
+ rval = 0;
+ } else {
+ /* RAID-6 was not enabled */
+ rval = -EINVAL;
+ }
+exit:
+ __free_page(pg);
+ return rval;
+}
+
+static void ppc440spe_adma_init_capabilities(struct ppc440spe_adma_device *adev)
+{
+ switch (adev->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_cap_set(DMA_MEMCPY, adev->common.cap_mask);
+ dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask);
+ dma_cap_set(DMA_MEMSET, adev->common.cap_mask);
+ dma_cap_set(DMA_PQ, adev->common.cap_mask);
+ dma_cap_set(DMA_PQ_VAL, adev->common.cap_mask);
+ dma_cap_set(DMA_XOR_VAL, adev->common.cap_mask);
+ break;
+ case PPC440SPE_XOR_ID:
+ dma_cap_set(DMA_XOR, adev->common.cap_mask);
+ dma_cap_set(DMA_PQ, adev->common.cap_mask);
+ dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask);
+ adev->common.cap_mask = adev->common.cap_mask;
+ break;
+ }
+
+ /* Set base routines */
+ adev->common.device_alloc_chan_resources =
+ ppc440spe_adma_alloc_chan_resources;
+ adev->common.device_free_chan_resources =
+ ppc440spe_adma_free_chan_resources;
+ adev->common.device_is_tx_complete = ppc440spe_adma_is_complete;
+ adev->common.device_issue_pending = ppc440spe_adma_issue_pending;
+
+ /* Set prep routines based on capability */
+ if (dma_has_cap(DMA_MEMCPY, adev->common.cap_mask)) {
+ adev->common.device_prep_dma_memcpy =
+ ppc440spe_adma_prep_dma_memcpy;
+ }
+ if (dma_has_cap(DMA_MEMSET, adev->common.cap_mask)) {
+ adev->common.device_prep_dma_memset =
+ ppc440spe_adma_prep_dma_memset;
+ }
+ if (dma_has_cap(DMA_XOR, adev->common.cap_mask)) {
+ adev->common.max_xor = XOR_MAX_OPS;
+ adev->common.device_prep_dma_xor =
+ ppc440spe_adma_prep_dma_xor;
+ }
+ if (dma_has_cap(DMA_PQ, adev->common.cap_mask)) {
+ switch (adev->id) {
+ case PPC440SPE_DMA0_ID:
+ dma_set_maxpq(&adev->common,
+ DMA0_FIFO_SIZE / sizeof(struct dma_cdb), 0);
+ break;
+ case PPC440SPE_DMA1_ID:
+ dma_set_maxpq(&adev->common,
+ DMA1_FIFO_SIZE / sizeof(struct dma_cdb), 0);
+ break;
+ case PPC440SPE_XOR_ID:
+ adev->common.max_pq = XOR_MAX_OPS * 3;
+ break;
+ }
+ adev->common.device_prep_dma_pq =
+ ppc440spe_adma_prep_dma_pq;
+ }
+ if (dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask)) {
+ switch (adev->id) {
+ case PPC440SPE_DMA0_ID:
+ adev->common.max_pq = DMA0_FIFO_SIZE /
+ sizeof(struct dma_cdb);
+ break;
+ case PPC440SPE_DMA1_ID:
+ adev->common.max_pq = DMA1_FIFO_SIZE /
+ sizeof(struct dma_cdb);
+ break;
+ }
+ adev->common.device_prep_dma_pq_val =
+ ppc440spe_adma_prep_dma_pqzero_sum;
+ }
+ if (dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask)) {
+ switch (adev->id) {
+ case PPC440SPE_DMA0_ID:
+ adev->common.max_xor = DMA0_FIFO_SIZE /
+ sizeof(struct dma_cdb);
+ break;
+ case PPC440SPE_DMA1_ID:
+ adev->common.max_xor = DMA1_FIFO_SIZE /
+ sizeof(struct dma_cdb);
+ break;
+ }
+ adev->common.device_prep_dma_xor_val =
+ ppc440spe_adma_prep_dma_xor_zero_sum;
+ }
+ if (dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask)) {
+ adev->common.device_prep_dma_interrupt =
+ ppc440spe_adma_prep_dma_interrupt;
+ }
+ pr_info("%s: AMCC(R) PPC440SP(E) ADMA Engine: "
+ "( %s%s%s%s%s%s%s)\n",
+ dev_name(adev->dev),
+ dma_has_cap(DMA_PQ, adev->common.cap_mask) ? "pq " : "",
+ dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask) ? "pq_val " : "",
+ dma_has_cap(DMA_XOR, adev->common.cap_mask) ? "xor " : "",
+ dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask) ? "xor_val " : "",
+ dma_has_cap(DMA_MEMCPY, adev->common.cap_mask) ? "memcpy " : "",
+ dma_has_cap(DMA_MEMSET, adev->common.cap_mask) ? "memset " : "",
+ dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask) ? "intr " : "");
+}
+
+static int ppc440spe_adma_setup_irqs(struct ppc440spe_adma_device *adev,
+ struct ppc440spe_adma_chan *chan,
+ int *initcode)
+{
+ struct device_node *np;
+ int ret;
+
+ np = container_of(adev->dev, struct of_device, dev)->node;
+ if (adev->id != PPC440SPE_XOR_ID) {
+ adev->err_irq = irq_of_parse_and_map(np, 1);
+ if (adev->err_irq == NO_IRQ) {
+ dev_warn(adev->dev, "no err irq resource?\n");
+ *initcode = PPC_ADMA_INIT_IRQ2;
+ adev->err_irq = -ENXIO;
+ } else
+ atomic_inc(&ppc440spe_adma_err_irq_ref);
+ } else {
+ adev->err_irq = -ENXIO;
+ }
+
+ adev->irq = irq_of_parse_and_map(np, 0);
+ if (adev->irq == NO_IRQ) {
+ dev_err(adev->dev, "no irq resource\n");
+ *initcode = PPC_ADMA_INIT_IRQ1;
+ ret = -ENXIO;
+ goto err_irq_map;
+ }
+ dev_dbg(adev->dev, "irq %d, err irq %d\n",
+ adev->irq, adev->err_irq);
+
+ ret = request_irq(adev->irq, ppc440spe_adma_eot_handler,
+ 0, dev_driver_string(adev->dev), chan);
+ if (ret) {
+ dev_err(adev->dev, "can't request irq %d\n",
+ adev->irq);
+ *initcode = PPC_ADMA_INIT_IRQ1;
+ ret = -EIO;
+ goto err_req1;
+ }
+
+ /* only DMA engines have a separate error IRQ
+ * so it's Ok if err_irq < 0 in XOR engine case.
+ */
+ if (adev->err_irq > 0) {
+ /* both DMA engines share common error IRQ */
+ ret = request_irq(adev->err_irq,
+ ppc440spe_adma_err_handler,
+ IRQF_SHARED,
+ dev_driver_string(adev->dev),
+ chan);
+ if (ret) {
+ dev_err(adev->dev, "can't request irq %d\n",
+ adev->err_irq);
+ *initcode = PPC_ADMA_INIT_IRQ2;
+ ret = -EIO;
+ goto err_req2;
+ }
+ }
+
+ if (adev->id == PPC440SPE_XOR_ID) {
+ /* enable XOR engine interrupts */
+ iowrite32be(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT |
+ XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT,
+ &adev->xor_reg->ier);
+ } else {
+ u32 mask, enable;
+
+ np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe");
+ if (!np) {
+ pr_err("%s: can't find I2O device tree node\n",
+ __func__);
+ ret = -ENODEV;
+ goto err_req2;
+ }
+ adev->i2o_reg = of_iomap(np, 0);
+ if (!adev->i2o_reg) {
+ pr_err("%s: failed to map I2O registers\n", __func__);
+ of_node_put(np);
+ ret = -EINVAL;
+ goto err_req2;
+ }
+ of_node_put(np);
+ /* Unmask 'CS FIFO Attention' interrupts and
+ * enable generating interrupts on errors
+ */
+ enable = (adev->id == PPC440SPE_DMA0_ID) ?
+ ~(I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) :
+ ~(I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM);
+ mask = ioread32(&adev->i2o_reg->iopim) & enable;
+ iowrite32(mask, &adev->i2o_reg->iopim);
+ }
+ return 0;
+
+err_req2:
+ free_irq(adev->irq, chan);
+err_req1:
+ irq_dispose_mapping(adev->irq);
+err_irq_map:
+ if (adev->err_irq > 0) {
+ if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref))
+ irq_dispose_mapping(adev->err_irq);
+ }
+ return ret;
+}
+
+static void ppc440spe_adma_release_irqs(struct ppc440spe_adma_device *adev,
+ struct ppc440spe_adma_chan *chan)
+{
+ u32 mask, disable;
+
+ if (adev->id == PPC440SPE_XOR_ID) {
+ /* disable XOR engine interrupts */
+ mask = ioread32be(&adev->xor_reg->ier);
+ mask &= ~(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT |
+ XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT);
+ iowrite32be(mask, &adev->xor_reg->ier);
+ } else {
+ /* disable DMAx engine interrupts */
+ disable = (adev->id == PPC440SPE_DMA0_ID) ?
+ (I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) :
+ (I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM);
+ mask = ioread32(&adev->i2o_reg->iopim) | disable;
+ iowrite32(mask, &adev->i2o_reg->iopim);
+ }
+ free_irq(adev->irq, chan);
+ irq_dispose_mapping(adev->irq);
+ if (adev->err_irq > 0) {
+ free_irq(adev->err_irq, chan);
+ if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref)) {
+ irq_dispose_mapping(adev->err_irq);
+ iounmap(adev->i2o_reg);
+ }
+ }
+}
+
+/**
+ * ppc440spe_adma_probe - probe the asynch device
+ */
+static int __devinit ppc440spe_adma_probe(struct of_device *ofdev,
+ const struct of_device_id *match)
+{
+ struct device_node *np = ofdev->node;
+ struct resource res;
+ struct ppc440spe_adma_device *adev;
+ struct ppc440spe_adma_chan *chan;
+ struct ppc_dma_chan_ref *ref, *_ref;
+ int ret = 0, initcode = PPC_ADMA_INIT_OK;
+ const u32 *idx;
+ int len;
+ void *regs;
+ u32 id, pool_size;
+
+ if (of_device_is_compatible(np, "amcc,xor-accelerator")) {
+ id = PPC440SPE_XOR_ID;
+ /* As far as the XOR engine is concerned, it does not
+ * use FIFOs but uses linked list. So there is no dependency
+ * between pool size to allocate and the engine configuration.
+ */
+ pool_size = PAGE_SIZE << 1;
+ } else {
+ /* it is DMA0 or DMA1 */
+ idx = of_get_property(np, "cell-index", &len);
+ if (!idx || (len != sizeof(u32))) {
+ dev_err(&ofdev->dev, "Device node %s has missing "
+ "or invalid cell-index property\n",
+ np->full_name);
+ return -EINVAL;
+ }
+ id = *idx;
+ /* DMA0,1 engines use FIFO to maintain CDBs, so we
+ * should allocate the pool accordingly to size of this
+ * FIFO. Thus, the pool size depends on the FIFO depth:
+ * how much CDBs pointers the FIFO may contain then so
+ * much CDBs we should provide in the pool.
+ * That is
+ * CDB size = 32B;
+ * CDBs number = (DMA0_FIFO_SIZE >> 3);
+ * Pool size = CDBs number * CDB size =
+ * = (DMA0_FIFO_SIZE >> 3) << 5 = DMA0_FIFO_SIZE << 2.
+ */
+ pool_size = (id == PPC440SPE_DMA0_ID) ?
+ DMA0_FIFO_SIZE : DMA1_FIFO_SIZE;
+ pool_size <<= 2;
+ }
+
+ if (of_address_to_resource(np, 0, &res)) {
+ dev_err(&ofdev->dev, "failed to get memory resource\n");
+ initcode = PPC_ADMA_INIT_MEMRES;
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (!request_mem_region(res.start, resource_size(&res),
+ dev_driver_string(&ofdev->dev))) {
+ dev_err(&ofdev->dev, "failed to request memory region "
+ "(0x%016llx-0x%016llx)\n",
+ (u64)res.start, (u64)res.end);
+ initcode = PPC_ADMA_INIT_MEMREG;
+ ret = -EBUSY;
+ goto out;
+ }
+
+ /* create a device */
+ adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+ if (!adev) {
+ dev_err(&ofdev->dev, "failed to allocate device\n");
+ initcode = PPC_ADMA_INIT_ALLOC;
+ ret = -ENOMEM;
+ goto err_adev_alloc;
+ }
+
+ adev->id = id;
+ adev->pool_size = pool_size;
+ /* allocate coherent memory for hardware descriptors */
+ adev->dma_desc_pool_virt = dma_alloc_coherent(&ofdev->dev,
+ adev->pool_size, &adev->dma_desc_pool,
+ GFP_KERNEL);
+ if (adev->dma_desc_pool_virt == NULL) {
+ dev_err(&ofdev->dev, "failed to allocate %d bytes of coherent "
+ "memory for hardware descriptors\n",
+ adev->pool_size);
+ initcode = PPC_ADMA_INIT_COHERENT;
+ ret = -ENOMEM;
+ goto err_dma_alloc;
+ }
+ dev_dbg(&ofdev->dev, "allocted descriptor pool virt 0x%p phys 0x%llx\n",
+ adev->dma_desc_pool_virt, (u64)adev->dma_desc_pool);
+
+ regs = ioremap(res.start, resource_size(&res));
+ if (!regs) {
+ dev_err(&ofdev->dev, "failed to ioremap regs!\n");
+ goto err_regs_alloc;
+ }
+
+ if (adev->id == PPC440SPE_XOR_ID) {
+ adev->xor_reg = regs;
+ /* Reset XOR */
+ iowrite32be(XOR_CRSR_XASR_BIT, &adev->xor_reg->crsr);
+ iowrite32be(XOR_CRSR_64BA_BIT, &adev->xor_reg->crrr);
+ } else {
+ size_t fifo_size = (adev->id == PPC440SPE_DMA0_ID) ?
+ DMA0_FIFO_SIZE : DMA1_FIFO_SIZE;
+ adev->dma_reg = regs;
+ /* DMAx_FIFO_SIZE is defined in bytes,
+ * <fsiz> - is defined in number of CDB pointers (8byte).
+ * DMA FIFO Length = CSlength + CPlength, where
+ * CSlength = CPlength = (fsiz + 1) * 8.
+ */
+ iowrite32(DMA_FIFO_ENABLE | ((fifo_size >> 3) - 2),
+ &adev->dma_reg->fsiz);
+ /* Configure DMA engine */
+ iowrite32(DMA_CFG_DXEPR_HP | DMA_CFG_DFMPP_HP | DMA_CFG_FALGN,
+ &adev->dma_reg->cfg);
+ /* Clear Status */
+ iowrite32(~0, &adev->dma_reg->dsts);
+ }
+
+ adev->dev = &ofdev->dev;
+ adev->common.dev = &ofdev->dev;
+ INIT_LIST_HEAD(&adev->common.channels);
+ dev_set_drvdata(&ofdev->dev, adev);
+
+ /* create a channel */
+ chan = kzalloc(sizeof(*chan), GFP_KERNEL);
+ if (!chan) {
+ dev_err(&ofdev->dev, "can't allocate channel structure\n");
+ initcode = PPC_ADMA_INIT_CHANNEL;
+ ret = -ENOMEM;
+ goto err_chan_alloc;
+ }
+
+ spin_lock_init(&chan->lock);
+ INIT_LIST_HEAD(&chan->chain);
+ INIT_LIST_HEAD(&chan->all_slots);
+ chan->device = adev;
+ chan->common.device = &adev->common;
+ list_add_tail(&chan->common.device_node, &adev->common.channels);
+ tasklet_init(&chan->irq_tasklet, ppc440spe_adma_tasklet,
+ (unsigned long)chan);
+
+ /* allocate and map helper pages for async validation or
+ * async_mult/async_sum_product operations on DMA0/1.
+ */
+ if (adev->id != PPC440SPE_XOR_ID) {
+ chan->pdest_page = alloc_page(GFP_KERNEL);
+ chan->qdest_page = alloc_page(GFP_KERNEL);
+ if (!chan->pdest_page ||
+ !chan->qdest_page) {
+ if (chan->pdest_page)
+ __free_page(chan->pdest_page);
+ if (chan->qdest_page)
+ __free_page(chan->qdest_page);
+ ret = -ENOMEM;
+ goto err_page_alloc;
+ }
+ chan->pdest = dma_map_page(&ofdev->dev, chan->pdest_page, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ chan->qdest = dma_map_page(&ofdev->dev, chan->qdest_page, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ }
+
+ ref = kmalloc(sizeof(*ref), GFP_KERNEL);
+ if (ref) {
+ ref->chan = &chan->common;
+ INIT_LIST_HEAD(&ref->node);
+ list_add_tail(&ref->node, &ppc440spe_adma_chan_list);
+ } else {
+ dev_err(&ofdev->dev, "failed to allocate channel reference!\n");
+ ret = -ENOMEM;
+ goto err_ref_alloc;
+ }
+
+ ret = ppc440spe_adma_setup_irqs(adev, chan, &initcode);
+ if (ret)
+ goto err_irq;
+
+ ppc440spe_adma_init_capabilities(adev);
+
+ ret = dma_async_device_register(&adev->common);
+ if (ret) {
+ initcode = PPC_ADMA_INIT_REGISTER;
+ dev_err(&ofdev->dev, "failed to register dma device\n");
+ goto err_dev_reg;
+ }
+
+ goto out;
+
+err_dev_reg:
+ ppc440spe_adma_release_irqs(adev, chan);
+err_irq:
+ list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list, node) {
+ if (chan == to_ppc440spe_adma_chan(ref->chan)) {
+ list_del(&ref->node);
+ kfree(ref);
+ }
+ }
+err_ref_alloc:
+ if (adev->id != PPC440SPE_XOR_ID) {
+ dma_unmap_page(&ofdev->dev, chan->pdest,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_unmap_page(&ofdev->dev, chan->qdest,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ __free_page(chan->pdest_page);
+ __free_page(chan->qdest_page);
+ }
+err_page_alloc:
+ kfree(chan);
+err_chan_alloc:
+ if (adev->id == PPC440SPE_XOR_ID)
+ iounmap(adev->xor_reg);
+ else
+ iounmap(adev->dma_reg);
+err_regs_alloc:
+ dma_free_coherent(adev->dev, adev->pool_size,
+ adev->dma_desc_pool_virt,
+ adev->dma_desc_pool);
+err_dma_alloc:
+ kfree(adev);
+err_adev_alloc:
+ release_mem_region(res.start, resource_size(&res));
+out:
+ if (id < PPC440SPE_ADMA_ENGINES_NUM)
+ ppc440spe_adma_devices[id] = initcode;
+
+ return ret;
+}
+
+/**
+ * ppc440spe_adma_remove - remove the asynch device
+ */
+static int __devexit ppc440spe_adma_remove(struct of_device *ofdev)
+{
+ struct ppc440spe_adma_device *adev = dev_get_drvdata(&ofdev->dev);
+ struct device_node *np = ofdev->node;
+ struct resource res;
+ struct dma_chan *chan, *_chan;
+ struct ppc_dma_chan_ref *ref, *_ref;
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+
+ dev_set_drvdata(&ofdev->dev, NULL);
+ if (adev->id < PPC440SPE_ADMA_ENGINES_NUM)
+ ppc440spe_adma_devices[adev->id] = -1;
+
+ dma_async_device_unregister(&adev->common);
+
+ list_for_each_entry_safe(chan, _chan, &adev->common.channels,
+ device_node) {
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+ ppc440spe_adma_release_irqs(adev, ppc440spe_chan);
+ tasklet_kill(&ppc440spe_chan->irq_tasklet);
+ if (adev->id != PPC440SPE_XOR_ID) {
+ dma_unmap_page(&ofdev->dev, ppc440spe_chan->pdest,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_unmap_page(&ofdev->dev, ppc440spe_chan->qdest,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ __free_page(ppc440spe_chan->pdest_page);
+ __free_page(ppc440spe_chan->qdest_page);
+ }
+ list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list,
+ node) {
+ if (ppc440spe_chan ==
+ to_ppc440spe_adma_chan(ref->chan)) {
+ list_del(&ref->node);
+ kfree(ref);
+ }
+ }
+ list_del(&chan->device_node);
+ kfree(ppc440spe_chan);
+ }
+
+ dma_free_coherent(adev->dev, adev->pool_size,
+ adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ if (adev->id == PPC440SPE_XOR_ID)
+ iounmap(adev->xor_reg);
+ else
+ iounmap(adev->dma_reg);
+ of_address_to_resource(np, 0, &res);
+ release_mem_region(res.start, resource_size(&res));
+ kfree(adev);
+ return 0;
+}
+
+/*
+ * /sys driver interface to enable h/w RAID-6 capabilities
+ * Files created in e.g. /sys/devices/plb.0/400100100.dma0/driver/
+ * directory are "devices", "enable" and "poly".
+ * "devices" shows available engines.
+ * "enable" is used to enable RAID-6 capabilities or to check
+ * whether these has been activated.
+ * "poly" allows setting/checking used polynomial (for PPC440SPe only).
+ */
+
+static ssize_t show_ppc440spe_devices(struct device_driver *dev, char *buf)
+{
+ ssize_t size = 0;
+ int i;
+
+ for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++) {
+ if (ppc440spe_adma_devices[i] == -1)
+ continue;
+ size += snprintf(buf + size, PAGE_SIZE - size,
+ "PPC440SP(E)-ADMA.%d: %s\n", i,
+ ppc_adma_errors[ppc440spe_adma_devices[i]]);
+ }
+ return size;
+}
+
+static ssize_t show_ppc440spe_r6enable(struct device_driver *dev, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE,
+ "PPC440SP(e) RAID-6 capabilities are %sABLED.\n",
+ ppc440spe_r6_enabled ? "EN" : "DIS");
+}
+
+static ssize_t store_ppc440spe_r6enable(struct device_driver *dev,
+ const char *buf, size_t count)
+{
+ unsigned long val;
+
+ if (!count || count > 11)
+ return -EINVAL;
+
+ if (!ppc440spe_r6_tchan)
+ return -EFAULT;
+
+ /* Write a key */
+ sscanf(buf, "%lx", &val);
+ dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_XORBA, val);
+ isync();
+
+ /* Verify whether it really works now */
+ if (ppc440spe_test_raid6(ppc440spe_r6_tchan) == 0) {
+ pr_info("PPC440SP(e) RAID-6 has been activated "
+ "successfully\n");
+ ppc440spe_r6_enabled = 1;
+ } else {
+ pr_info("PPC440SP(e) RAID-6 hasn't been activated!"
+ " Error key ?\n");
+ ppc440spe_r6_enabled = 0;
+ }
+ return count;
+}
+
+static ssize_t show_ppc440spe_r6poly(struct device_driver *dev, char *buf)
+{
+ ssize_t size = 0;
+ u32 reg;
+
+#ifdef CONFIG_440SP
+ /* 440SP has fixed polynomial */
+ reg = 0x4d;
+#else
+ reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL);
+ reg >>= MQ0_CFBHL_POLY;
+ reg &= 0xFF;
+#endif
+
+ size = snprintf(buf, PAGE_SIZE, "PPC440SP(e) RAID-6 driver "
+ "uses 0x1%02x polynomial.\n", reg);
+ return size;
+}
+
+static ssize_t store_ppc440spe_r6poly(struct device_driver *dev,
+ const char *buf, size_t count)
+{
+ unsigned long reg, val;
+
+#ifdef CONFIG_440SP
+ /* 440SP uses default 0x14D polynomial only */
+ return -EINVAL;
+#endif
+
+ if (!count || count > 6)
+ return -EINVAL;
+
+ /* e.g., 0x14D or 0x11D */
+ sscanf(buf, "%lx", &val);
+
+ if (val & ~0x1FF)
+ return -EINVAL;
+
+ val &= 0xFF;
+ reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL);
+ reg &= ~(0xFF << MQ0_CFBHL_POLY);
+ reg |= val << MQ0_CFBHL_POLY;
+ dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL, reg);
+
+ return count;
+}
+
+static DRIVER_ATTR(devices, S_IRUGO, show_ppc440spe_devices, NULL);
+static DRIVER_ATTR(enable, S_IRUGO | S_IWUSR, show_ppc440spe_r6enable,
+ store_ppc440spe_r6enable);
+static DRIVER_ATTR(poly, S_IRUGO | S_IWUSR, show_ppc440spe_r6poly,
+ store_ppc440spe_r6poly);
+
+/*
+ * Common initialisation for RAID engines; allocate memory for
+ * DMAx FIFOs, perform configuration common for all DMA engines.
+ * Further DMA engine specific configuration is done at probe time.
+ */
+static int ppc440spe_configure_raid_devices(void)
+{
+ struct device_node *np;
+ struct resource i2o_res;
+ struct i2o_regs __iomem *i2o_reg;
+ dcr_host_t i2o_dcr_host;
+ unsigned int dcr_base, dcr_len;
+ int i, ret;
+
+ np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe");
+ if (!np) {
+ pr_err("%s: can't find I2O device tree node\n",
+ __func__);
+ return -ENODEV;
+ }
+
+ if (of_address_to_resource(np, 0, &i2o_res)) {
+ of_node_put(np);
+ return -EINVAL;
+ }
+
+ i2o_reg = of_iomap(np, 0);
+ if (!i2o_reg) {
+ pr_err("%s: failed to map I2O registers\n", __func__);
+ of_node_put(np);
+ return -EINVAL;
+ }
+
+ /* Get I2O DCRs base */
+ dcr_base = dcr_resource_start(np, 0);
+ dcr_len = dcr_resource_len(np, 0);
+ if (!dcr_base && !dcr_len) {
+ pr_err("%s: can't get DCR registers base/len!\n",
+ np->full_name);
+ of_node_put(np);
+ iounmap(i2o_reg);
+ return -ENODEV;
+ }
+
+ i2o_dcr_host = dcr_map(np, dcr_base, dcr_len);
+ if (!DCR_MAP_OK(i2o_dcr_host)) {
+ pr_err("%s: failed to map DCRs!\n", np->full_name);
+ of_node_put(np);
+ iounmap(i2o_reg);
+ return -ENODEV;
+ }
+ of_node_put(np);
+
+ /* Provide memory regions for DMA's FIFOs: I2O, DMA0 and DMA1 share
+ * the base address of FIFO memory space.
+ * Actually we need twice more physical memory than programmed in the
+ * <fsiz> register (because there are two FIFOs for each DMA: CP and CS)
+ */
+ ppc440spe_dma_fifo_buf = kmalloc((DMA0_FIFO_SIZE + DMA1_FIFO_SIZE) << 1,
+ GFP_KERNEL);
+ if (!ppc440spe_dma_fifo_buf) {
+ pr_err("%s: DMA FIFO buffer allocation failed.\n", __func__);
+ iounmap(i2o_reg);
+ dcr_unmap(i2o_dcr_host, dcr_len);
+ return -ENOMEM;
+ }
+
+ /*
+ * Configure h/w
+ */
+ /* Reset I2O/DMA */
+ mtdcri(SDR0, DCRN_SDR0_SRST, DCRN_SDR0_SRST_I2ODMA);
+ mtdcri(SDR0, DCRN_SDR0_SRST, 0);
+
+ /* Setup the base address of mmaped registers */
+ dcr_write(i2o_dcr_host, DCRN_I2O0_IBAH, (u32)(i2o_res.start >> 32));
+ dcr_write(i2o_dcr_host, DCRN_I2O0_IBAL, (u32)(i2o_res.start) |
+ I2O_REG_ENABLE);
+ dcr_unmap(i2o_dcr_host, dcr_len);
+
+ /* Setup FIFO memory space base address */
+ iowrite32(0, &i2o_reg->ifbah);
+ iowrite32(((u32)__pa(ppc440spe_dma_fifo_buf)), &i2o_reg->ifbal);
+
+ /* set zero FIFO size for I2O, so the whole
+ * ppc440spe_dma_fifo_buf is used by DMAs.
+ * DMAx_FIFOs will be configured while probe.
+ */
+ iowrite32(0, &i2o_reg->ifsiz);
+ iounmap(i2o_reg);
+
+ /* To prepare WXOR/RXOR functionality we need access to
+ * Memory Queue Module DCRs (finally it will be enabled
+ * via /sys interface of the ppc440spe ADMA driver).
+ */
+ np = of_find_compatible_node(NULL, NULL, "ibm,mq-440spe");
+ if (!np) {
+ pr_err("%s: can't find MQ device tree node\n",
+ __func__);
+ ret = -ENODEV;
+ goto out_free;
+ }
+
+ /* Get MQ DCRs base */
+ dcr_base = dcr_resource_start(np, 0);
+ dcr_len = dcr_resource_len(np, 0);
+ if (!dcr_base && !dcr_len) {
+ pr_err("%s: can't get DCR registers base/len!\n",
+ np->full_name);
+ ret = -ENODEV;
+ goto out_mq;
+ }
+
+ ppc440spe_mq_dcr_host = dcr_map(np, dcr_base, dcr_len);
+ if (!DCR_MAP_OK(ppc440spe_mq_dcr_host)) {
+ pr_err("%s: failed to map DCRs!\n", np->full_name);
+ ret = -ENODEV;
+ goto out_mq;
+ }
+ of_node_put(np);
+ ppc440spe_mq_dcr_len = dcr_len;
+
+ /* Set HB alias */
+ dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_BAUH, DMA_CUED_XOR_HB);
+
+ /* Set:
+ * - LL transaction passing limit to 1;
+ * - Memory controller cycle limit to 1;
+ * - Galois Polynomial to 0x14d (default)
+ */
+ dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL,
+ (1 << MQ0_CFBHL_TPLM) | (1 << MQ0_CFBHL_HBCL) |
+ (PPC440SPE_DEFAULT_POLY << MQ0_CFBHL_POLY));
+
+ atomic_set(&ppc440spe_adma_err_irq_ref, 0);
+ for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++)
+ ppc440spe_adma_devices[i] = -1;
+
+ return 0;
+
+out_mq:
+ of_node_put(np);
+out_free:
+ kfree(ppc440spe_dma_fifo_buf);
+ return ret;
+}
+
+static struct of_device_id __devinitdata ppc440spe_adma_of_match[] = {
+ { .compatible = "ibm,dma-440spe", },
+ { .compatible = "amcc,xor-accelerator", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, ppc440spe_adma_of_match);
+
+static struct of_platform_driver ppc440spe_adma_driver = {
+ .match_table = ppc440spe_adma_of_match,
+ .probe = ppc440spe_adma_probe,
+ .remove = __devexit_p(ppc440spe_adma_remove),
+ .driver = {
+ .name = "PPC440SP(E)-ADMA",
+ .owner = THIS_MODULE,
+ },
+};
+
+static __init int ppc440spe_adma_init(void)
+{
+ int ret;
+
+ ret = ppc440spe_configure_raid_devices();
+ if (ret)
+ return ret;
+
+ ret = of_register_platform_driver(&ppc440spe_adma_driver);
+ if (ret) {
+ pr_err("%s: failed to register platform driver\n",
+ __func__);
+ goto out_reg;
+ }
+
+ /* Initialization status */
+ ret = driver_create_file(&ppc440spe_adma_driver.driver,
+ &driver_attr_devices);
+ if (ret)
+ goto out_dev;
+
+ /* RAID-6 h/w enable entry */
+ ret = driver_create_file(&ppc440spe_adma_driver.driver,
+ &driver_attr_enable);
+ if (ret)
+ goto out_en;
+
+ /* GF polynomial to use */
+ ret = driver_create_file(&ppc440spe_adma_driver.driver,
+ &driver_attr_poly);
+ if (!ret)
+ return ret;
+
+ driver_remove_file(&ppc440spe_adma_driver.driver,
+ &driver_attr_enable);
+out_en:
+ driver_remove_file(&ppc440spe_adma_driver.driver,
+ &driver_attr_devices);
+out_dev:
+ /* User will not be able to enable h/w RAID-6 */
+ pr_err("%s: failed to create RAID-6 driver interface\n",
+ __func__);
+ of_unregister_platform_driver(&ppc440spe_adma_driver);
+out_reg:
+ dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len);
+ kfree(ppc440spe_dma_fifo_buf);
+ return ret;
+}
+
+static void __exit ppc440spe_adma_exit(void)
+{
+ driver_remove_file(&ppc440spe_adma_driver.driver,
+ &driver_attr_poly);
+ driver_remove_file(&ppc440spe_adma_driver.driver,
+ &driver_attr_enable);
+ driver_remove_file(&ppc440spe_adma_driver.driver,
+ &driver_attr_devices);
+ of_unregister_platform_driver(&ppc440spe_adma_driver);
+ dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len);
+ kfree(ppc440spe_dma_fifo_buf);
+}
+
+arch_initcall(ppc440spe_adma_init);
+module_exit(ppc440spe_adma_exit);
+
+MODULE_AUTHOR("Yuri Tikhonov <yur@emcraft.com>");
+MODULE_DESCRIPTION("PPC440SPE ADMA Engine Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/ppc4xx/adma.h b/drivers/dma/ppc4xx/adma.h
new file mode 100644
index 00000000000..8ada5a812e3
--- /dev/null
+++ b/drivers/dma/ppc4xx/adma.h
@@ -0,0 +1,195 @@
+/*
+ * 2006-2009 (C) DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of
+ * any kind, whether express or implied.
+ */
+
+#ifndef _PPC440SPE_ADMA_H
+#define _PPC440SPE_ADMA_H
+
+#include <linux/types.h>
+#include "dma.h"
+#include "xor.h"
+
+#define to_ppc440spe_adma_chan(chan) \
+ container_of(chan, struct ppc440spe_adma_chan, common)
+#define to_ppc440spe_adma_device(dev) \
+ container_of(dev, struct ppc440spe_adma_device, common)
+#define tx_to_ppc440spe_adma_slot(tx) \
+ container_of(tx, struct ppc440spe_adma_desc_slot, async_tx)
+
+/* Default polynomial (for 440SP is only available) */
+#define PPC440SPE_DEFAULT_POLY 0x4d
+
+#define PPC440SPE_ADMA_ENGINES_NUM (XOR_ENGINES_NUM + DMA_ENGINES_NUM)
+
+#define PPC440SPE_ADMA_WATCHDOG_MSEC 3
+#define PPC440SPE_ADMA_THRESHOLD 1
+
+#define PPC440SPE_DMA0_ID 0
+#define PPC440SPE_DMA1_ID 1
+#define PPC440SPE_XOR_ID 2
+
+#define PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT 0xFFFFFFUL
+/* this is the XOR_CBBCR width */
+#define PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT (1 << 31)
+#define PPC440SPE_ADMA_ZERO_SUM_MAX_BYTE_COUNT PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT
+
+#define PPC440SPE_RXOR_RUN 0
+
+#define MQ0_CF2H_RXOR_BS_MASK 0x1FF
+
+#undef ADMA_LL_DEBUG
+
+/**
+ * struct ppc440spe_adma_device - internal representation of an ADMA device
+ * @dev: device
+ * @dma_reg: base for DMAx register access
+ * @xor_reg: base for XOR register access
+ * @i2o_reg: base for I2O register access
+ * @id: HW ADMA Device selector
+ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @pool_size: size of the pool
+ * @irq: DMAx or XOR irq number
+ * @err_irq: DMAx error irq number
+ * @common: embedded struct dma_device
+ */
+struct ppc440spe_adma_device {
+ struct device *dev;
+ struct dma_regs __iomem *dma_reg;
+ struct xor_regs __iomem *xor_reg;
+ struct i2o_regs __iomem *i2o_reg;
+ int id;
+ void *dma_desc_pool_virt;
+ dma_addr_t dma_desc_pool;
+ size_t pool_size;
+ int irq;
+ int err_irq;
+ struct dma_device common;
+};
+
+/**
+ * struct ppc440spe_adma_chan - internal representation of an ADMA channel
+ * @lock: serializes enqueue/dequeue operations to the slot pool
+ * @device: parent device
+ * @chain: device chain view of the descriptors
+ * @common: common dmaengine channel object members
+ * @all_slots: complete domain of slots usable by the channel
+ * @pending: allows batching of hardware operations
+ * @completed_cookie: identifier for the most recently completed operation
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ * @hw_chain_inited: h/w descriptor chain initialization flag
+ * @irq_tasklet: bottom half where ppc440spe_adma_slot_cleanup runs
+ * @needs_unmap: if buffers should not be unmapped upon final processing
+ * @pdest_page: P destination page for async validate operation
+ * @qdest_page: Q destination page for async validate operation
+ * @pdest: P dma addr for async validate operation
+ * @qdest: Q dma addr for async validate operation
+ */
+struct ppc440spe_adma_chan {
+ spinlock_t lock;
+ struct ppc440spe_adma_device *device;
+ struct list_head chain;
+ struct dma_chan common;
+ struct list_head all_slots;
+ struct ppc440spe_adma_desc_slot *last_used;
+ int pending;
+ dma_cookie_t completed_cookie;
+ int slots_allocated;
+ int hw_chain_inited;
+ struct tasklet_struct irq_tasklet;
+ u8 needs_unmap;
+ struct page *pdest_page;
+ struct page *qdest_page;
+ dma_addr_t pdest;
+ dma_addr_t qdest;
+};
+
+struct ppc440spe_rxor {
+ u32 addrl;
+ u32 addrh;
+ int len;
+ int xor_count;
+ int addr_count;
+ int desc_count;
+ int state;
+};
+
+/**
+ * struct ppc440spe_adma_desc_slot - PPC440SPE-ADMA software descriptor
+ * @phys: hardware address of the hardware descriptor chain
+ * @group_head: first operation in a transaction
+ * @hw_next: pointer to the next descriptor in chain
+ * @async_tx: support for the async_tx api
+ * @slot_node: node on the iop_adma_chan.all_slots list
+ * @chain_node: node on the op_adma_chan.chain list
+ * @group_list: list of slots that make up a multi-descriptor transaction
+ * for example transfer lengths larger than the supported hw max
+ * @unmap_len: transaction bytecount
+ * @hw_desc: virtual address of the hardware descriptor chain
+ * @stride: currently chained or not
+ * @idx: pool index
+ * @slot_cnt: total slots used in an transaction (group of operations)
+ * @src_cnt: number of sources set in this descriptor
+ * @dst_cnt: number of destinations set in the descriptor
+ * @slots_per_op: number of slots per operation
+ * @descs_per_op: number of slot per P/Q operation see comment
+ * for ppc440spe_prep_dma_pqxor function
+ * @flags: desc state/type
+ * @reverse_flags: 1 if a corresponding rxor address uses reversed address order
+ * @xor_check_result: result of zero sum
+ * @crc32_result: result crc calculation
+ */
+struct ppc440spe_adma_desc_slot {
+ dma_addr_t phys;
+ struct ppc440spe_adma_desc_slot *group_head;
+ struct ppc440spe_adma_desc_slot *hw_next;
+ struct dma_async_tx_descriptor async_tx;
+ struct list_head slot_node;
+ struct list_head chain_node; /* node in channel ops list */
+ struct list_head group_list; /* list */
+ unsigned int unmap_len;
+ void *hw_desc;
+ u16 stride;
+ u16 idx;
+ u16 slot_cnt;
+ u8 src_cnt;
+ u8 dst_cnt;
+ u8 slots_per_op;
+ u8 descs_per_op;
+ unsigned long flags;
+ unsigned long reverse_flags[8];
+
+#define PPC440SPE_DESC_INT 0 /* generate interrupt on complete */
+#define PPC440SPE_ZERO_P 1 /* clear P destionaion */
+#define PPC440SPE_ZERO_Q 2 /* clear Q destination */
+#define PPC440SPE_COHERENT 3 /* src/dst are coherent */
+
+#define PPC440SPE_DESC_WXOR 4 /* WXORs are in chain */
+#define PPC440SPE_DESC_RXOR 5 /* RXOR is in chain */
+
+#define PPC440SPE_DESC_RXOR123 8 /* CDB for RXOR123 operation */
+#define PPC440SPE_DESC_RXOR124 9 /* CDB for RXOR124 operation */
+#define PPC440SPE_DESC_RXOR125 10 /* CDB for RXOR125 operation */
+#define PPC440SPE_DESC_RXOR12 11 /* CDB for RXOR12 operation */
+#define PPC440SPE_DESC_RXOR_REV 12 /* CDB has srcs in reversed order */
+
+#define PPC440SPE_DESC_PCHECK 13
+#define PPC440SPE_DESC_QCHECK 14
+
+#define PPC440SPE_DESC_RXOR_MSK 0x3
+
+ struct ppc440spe_rxor rxor_cursor;
+
+ union {
+ u32 *xor_check_result;
+ u32 *crc32_result;
+ };
+};
+
+#endif /* _PPC440SPE_ADMA_H */
diff --git a/drivers/dma/ppc4xx/dma.h b/drivers/dma/ppc4xx/dma.h
new file mode 100644
index 00000000000..bcde2df2f37
--- /dev/null
+++ b/drivers/dma/ppc4xx/dma.h
@@ -0,0 +1,223 @@
+/*
+ * 440SPe's DMA engines support header file
+ *
+ * 2006-2009 (C) DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * This file is licensed under the term of the GNU General Public License
+ * version 2. The program licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef _PPC440SPE_DMA_H
+#define _PPC440SPE_DMA_H
+
+#include <linux/types.h>
+
+/* Number of elements in the array with statical CDBs */
+#define MAX_STAT_DMA_CDBS 16
+/* Number of DMA engines available on the contoller */
+#define DMA_ENGINES_NUM 2
+
+/* Maximum h/w supported number of destinations */
+#define DMA_DEST_MAX_NUM 2
+
+/* FIFO's params */
+#define DMA0_FIFO_SIZE 0x1000
+#define DMA1_FIFO_SIZE 0x1000
+#define DMA_FIFO_ENABLE (1<<12)
+
+/* DMA Configuration Register. Data Transfer Engine PLB Priority: */
+#define DMA_CFG_DXEPR_LP (0<<26)
+#define DMA_CFG_DXEPR_HP (3<<26)
+#define DMA_CFG_DXEPR_HHP (2<<26)
+#define DMA_CFG_DXEPR_HHHP (1<<26)
+
+/* DMA Configuration Register. DMA FIFO Manager PLB Priority: */
+#define DMA_CFG_DFMPP_LP (0<<23)
+#define DMA_CFG_DFMPP_HP (3<<23)
+#define DMA_CFG_DFMPP_HHP (2<<23)
+#define DMA_CFG_DFMPP_HHHP (1<<23)
+
+/* DMA Configuration Register. Force 64-byte Alignment */
+#define DMA_CFG_FALGN (1 << 19)
+
+/*UIC0:*/
+#define D0CPF_INT (1<<12)
+#define D0CSF_INT (1<<11)
+#define D1CPF_INT (1<<10)
+#define D1CSF_INT (1<<9)
+/*UIC1:*/
+#define DMAE_INT (1<<9)
+
+/* I2O IOP Interrupt Mask Register */
+#define I2O_IOPIM_P0SNE (1<<3)
+#define I2O_IOPIM_P0EM (1<<5)
+#define I2O_IOPIM_P1SNE (1<<6)
+#define I2O_IOPIM_P1EM (1<<8)
+
+/* DMA CDB fields */
+#define DMA_CDB_MSK (0xF)
+#define DMA_CDB_64B_ADDR (1<<2)
+#define DMA_CDB_NO_INT (1<<3)
+#define DMA_CDB_STATUS_MSK (0x3)
+#define DMA_CDB_ADDR_MSK (0xFFFFFFF0)
+
+/* DMA CDB OpCodes */
+#define DMA_CDB_OPC_NO_OP (0x00)
+#define DMA_CDB_OPC_MV_SG1_SG2 (0x01)
+#define DMA_CDB_OPC_MULTICAST (0x05)
+#define DMA_CDB_OPC_DFILL128 (0x24)
+#define DMA_CDB_OPC_DCHECK128 (0x23)
+
+#define DMA_CUED_XOR_BASE (0x10000000)
+#define DMA_CUED_XOR_HB (0x00000008)
+
+#ifdef CONFIG_440SP
+#define DMA_CUED_MULT1_OFF 0
+#define DMA_CUED_MULT2_OFF 8
+#define DMA_CUED_MULT3_OFF 16
+#define DMA_CUED_REGION_OFF 24
+#define DMA_CUED_XOR_WIN_MSK (0xFC000000)
+#else
+#define DMA_CUED_MULT1_OFF 2
+#define DMA_CUED_MULT2_OFF 10
+#define DMA_CUED_MULT3_OFF 18
+#define DMA_CUED_REGION_OFF 26
+#define DMA_CUED_XOR_WIN_MSK (0xF0000000)
+#endif
+
+#define DMA_CUED_REGION_MSK 0x3
+#define DMA_RXOR123 0x0
+#define DMA_RXOR124 0x1
+#define DMA_RXOR125 0x2
+#define DMA_RXOR12 0x3
+
+/* S/G addresses */
+#define DMA_CDB_SG_SRC 1
+#define DMA_CDB_SG_DST1 2
+#define DMA_CDB_SG_DST2 3
+
+/*
+ * DMAx engines Command Descriptor Block Type
+ */
+struct dma_cdb {
+ /*
+ * Basic CDB structure (Table 20-17, p.499, 440spe_um_1_22.pdf)
+ */
+ u8 pad0[2]; /* reserved */
+ u8 attr; /* attributes */
+ u8 opc; /* opcode */
+ u32 sg1u; /* upper SG1 address */
+ u32 sg1l; /* lower SG1 address */
+ u32 cnt; /* SG count, 3B used */
+ u32 sg2u; /* upper SG2 address */
+ u32 sg2l; /* lower SG2 address */
+ u32 sg3u; /* upper SG3 address */
+ u32 sg3l; /* lower SG3 address */
+};
+
+/*
+ * DMAx hardware registers (p.515 in 440SPe UM 1.22)
+ */
+struct dma_regs {
+ u32 cpfpl;
+ u32 cpfph;
+ u32 csfpl;
+ u32 csfph;
+ u32 dsts;
+ u32 cfg;
+ u8 pad0[0x8];
+ u16 cpfhp;
+ u16 cpftp;
+ u16 csfhp;
+ u16 csftp;
+ u8 pad1[0x8];
+ u32 acpl;
+ u32 acph;
+ u32 s1bpl;
+ u32 s1bph;
+ u32 s2bpl;
+ u32 s2bph;
+ u32 s3bpl;
+ u32 s3bph;
+ u8 pad2[0x10];
+ u32 earl;
+ u32 earh;
+ u8 pad3[0x8];
+ u32 seat;
+ u32 sead;
+ u32 op;
+ u32 fsiz;
+};
+
+/*
+ * I2O hardware registers (p.528 in 440SPe UM 1.22)
+ */
+struct i2o_regs {
+ u32 ists;
+ u32 iseat;
+ u32 isead;
+ u8 pad0[0x14];
+ u32 idbel;
+ u8 pad1[0xc];
+ u32 ihis;
+ u32 ihim;
+ u8 pad2[0x8];
+ u32 ihiq;
+ u32 ihoq;
+ u8 pad3[0x8];
+ u32 iopis;
+ u32 iopim;
+ u32 iopiq;
+ u8 iopoq;
+ u8 pad4[3];
+ u16 iiflh;
+ u16 iiflt;
+ u16 iiplh;
+ u16 iiplt;
+ u16 ioflh;
+ u16 ioflt;
+ u16 ioplh;
+ u16 ioplt;
+ u32 iidc;
+ u32 ictl;
+ u32 ifcpp;
+ u8 pad5[0x4];
+ u16 mfac0;
+ u16 mfac1;
+ u16 mfac2;
+ u16 mfac3;
+ u16 mfac4;
+ u16 mfac5;
+ u16 mfac6;
+ u16 mfac7;
+ u16 ifcfh;
+ u16 ifcht;
+ u8 pad6[0x4];
+ u32 iifmc;
+ u32 iodb;
+ u32 iodbc;
+ u32 ifbal;
+ u32 ifbah;
+ u32 ifsiz;
+ u32 ispd0;
+ u32 ispd1;
+ u32 ispd2;
+ u32 ispd3;
+ u32 ihipl;
+ u32 ihiph;
+ u32 ihopl;
+ u32 ihoph;
+ u32 iiipl;
+ u32 iiiph;
+ u32 iiopl;
+ u32 iioph;
+ u32 ifcpl;
+ u32 ifcph;
+ u8 pad7[0x8];
+ u32 iopt;
+};
+
+#endif /* _PPC440SPE_DMA_H */
diff --git a/drivers/dma/ppc4xx/xor.h b/drivers/dma/ppc4xx/xor.h
new file mode 100644
index 00000000000..daed7384daa
--- /dev/null
+++ b/drivers/dma/ppc4xx/xor.h
@@ -0,0 +1,110 @@
+/*
+ * 440SPe's XOR engines support header file
+ *
+ * 2006-2009 (C) DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * This file is licensed under the term of the GNU General Public License
+ * version 2. The program licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef _PPC440SPE_XOR_H
+#define _PPC440SPE_XOR_H
+
+#include <linux/types.h>
+
+/* Number of XOR engines available on the contoller */
+#define XOR_ENGINES_NUM 1
+
+/* Number of operands supported in the h/w */
+#define XOR_MAX_OPS 16
+
+/*
+ * XOR Command Block Control Register bits
+ */
+#define XOR_CBCR_LNK_BIT (1<<31) /* link present */
+#define XOR_CBCR_TGT_BIT (1<<30) /* target present */
+#define XOR_CBCR_CBCE_BIT (1<<29) /* command block compete enable */
+#define XOR_CBCR_RNZE_BIT (1<<28) /* result not zero enable */
+#define XOR_CBCR_XNOR_BIT (1<<15) /* XOR/XNOR */
+#define XOR_CDCR_OAC_MSK (0x7F) /* operand address count */
+
+/*
+ * XORCore Status Register bits
+ */
+#define XOR_SR_XCP_BIT (1<<31) /* core processing */
+#define XOR_SR_ICB_BIT (1<<17) /* invalid CB */
+#define XOR_SR_IC_BIT (1<<16) /* invalid command */
+#define XOR_SR_IPE_BIT (1<<15) /* internal parity error */
+#define XOR_SR_RNZ_BIT (1<<2) /* result not Zero */
+#define XOR_SR_CBC_BIT (1<<1) /* CB complete */
+#define XOR_SR_CBLC_BIT (1<<0) /* CB list complete */
+
+/*
+ * XORCore Control Set and Reset Register bits
+ */
+#define XOR_CRSR_XASR_BIT (1<<31) /* soft reset */
+#define XOR_CRSR_XAE_BIT (1<<30) /* enable */
+#define XOR_CRSR_RCBE_BIT (1<<29) /* refetch CB enable */
+#define XOR_CRSR_PAUS_BIT (1<<28) /* pause */
+#define XOR_CRSR_64BA_BIT (1<<27) /* 64/32 CB format */
+#define XOR_CRSR_CLP_BIT (1<<25) /* continue list processing */
+
+/*
+ * XORCore Interrupt Enable Register
+ */
+#define XOR_IE_ICBIE_BIT (1<<17) /* Invalid Command Block IRQ Enable */
+#define XOR_IE_ICIE_BIT (1<<16) /* Invalid Command IRQ Enable */
+#define XOR_IE_RPTIE_BIT (1<<14) /* Read PLB Timeout Error IRQ Enable */
+#define XOR_IE_CBCIE_BIT (1<<1) /* CB complete interrupt enable */
+#define XOR_IE_CBLCI_BIT (1<<0) /* CB list complete interrupt enable */
+
+/*
+ * XOR Accelerator engine Command Block Type
+ */
+struct xor_cb {
+ /*
+ * Basic 64-bit format XOR CB (Table 19-1, p.463, 440spe_um_1_22.pdf)
+ */
+ u32 cbc; /* control */
+ u32 cbbc; /* byte count */
+ u32 cbs; /* status */
+ u8 pad0[4]; /* reserved */
+ u32 cbtah; /* target address high */
+ u32 cbtal; /* target address low */
+ u32 cblah; /* link address high */
+ u32 cblal; /* link address low */
+ struct {
+ u32 h;
+ u32 l;
+ } __attribute__ ((packed)) ops[16];
+} __attribute__ ((packed));
+
+/*
+ * XOR hardware registers Table 19-3, UM 1.22
+ */
+struct xor_regs {
+ u32 op_ar[16][2]; /* operand address[0]-high,[1]-low registers */
+ u8 pad0[352]; /* reserved */
+ u32 cbcr; /* CB control register */
+ u32 cbbcr; /* CB byte count register */
+ u32 cbsr; /* CB status register */
+ u8 pad1[4]; /* reserved */
+ u32 cbtahr; /* operand target address high register */
+ u32 cbtalr; /* operand target address low register */
+ u32 cblahr; /* CB link address high register */
+ u32 cblalr; /* CB link address low register */
+ u32 crsr; /* control set register */
+ u32 crrr; /* control reset register */
+ u32 ccbahr; /* current CB address high register */
+ u32 ccbalr; /* current CB address low register */
+ u32 plbr; /* PLB configuration register */
+ u32 ier; /* interrupt enable register */
+ u32 pecr; /* parity error count register */
+ u32 sr; /* status register */
+ u32 revidr; /* revision ID register */
+};
+
+#endif /* _PPC440SPE_XOR_H */
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
new file mode 100644
index 00000000000..d10cc899c46
--- /dev/null
+++ b/drivers/dma/shdma.c
@@ -0,0 +1,876 @@
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * base is drivers/dma/flsdma.c
+ *
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * - DMA of SuperH does not have Hardware DMA chain mode.
+ * - MAX DMA size is 16MB.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
+#include <cpu/dma.h>
+#include <asm/dma-sh.h>
+#include "shdma.h"
+
+/* DMA descriptor control */
+enum sh_dmae_desc_status {
+ DESC_IDLE,
+ DESC_PREPARED,
+ DESC_SUBMITTED,
+ DESC_COMPLETED, /* completed, have to call callback */
+ DESC_WAITING, /* callback called, waiting for ack / re-submit */
+};
+
+#define NR_DESCS_PER_CHANNEL 32
+/*
+ * Define the default configuration for dual address memory-memory transfer.
+ * The 0x400 value represents auto-request, external->external.
+ *
+ * And this driver set 4byte burst mode.
+ * If you want to change mode, you need to change RS_DEFAULT of value.
+ * (ex 1byte burst mode -> (RS_DUAL & ~TS_32)
+ */
+#define RS_DEFAULT (RS_DUAL)
+
+static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all);
+
+#define SH_DMAC_CHAN_BASE(id) (dma_base_addr[id])
+static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
+{
+ ctrl_outl(data, (SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
+}
+
+static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg)
+{
+ return ctrl_inl((SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
+}
+
+static void dmae_init(struct sh_dmae_chan *sh_chan)
+{
+ u32 chcr = RS_DEFAULT; /* default is DUAL mode */
+ sh_dmae_writel(sh_chan, chcr, CHCR);
+}
+
+/*
+ * Reset DMA controller
+ *
+ * SH7780 has two DMAOR register
+ */
+static void sh_dmae_ctl_stop(int id)
+{
+ unsigned short dmaor = dmaor_read_reg(id);
+
+ dmaor &= ~(DMAOR_NMIF | DMAOR_AE);
+ dmaor_write_reg(id, dmaor);
+}
+
+static int sh_dmae_rst(int id)
+{
+ unsigned short dmaor;
+
+ sh_dmae_ctl_stop(id);
+ dmaor = dmaor_read_reg(id) | DMAOR_INIT;
+
+ dmaor_write_reg(id, dmaor);
+ if (dmaor_read_reg(id) & (DMAOR_AE | DMAOR_NMIF)) {
+ pr_warning(KERN_ERR "dma-sh: Can't initialize DMAOR.\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int dmae_is_busy(struct sh_dmae_chan *sh_chan)
+{
+ u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+ if (chcr & CHCR_DE) {
+ if (!(chcr & CHCR_TE))
+ return -EBUSY; /* working */
+ }
+ return 0; /* waiting */
+}
+
+static inline unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan)
+{
+ u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+ return ts_shift[(chcr & CHCR_TS_MASK) >> CHCR_TS_SHIFT];
+}
+
+static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs *hw)
+{
+ sh_dmae_writel(sh_chan, hw->sar, SAR);
+ sh_dmae_writel(sh_chan, hw->dar, DAR);
+ sh_dmae_writel(sh_chan, hw->tcr >> calc_xmit_shift(sh_chan), TCR);
+}
+
+static void dmae_start(struct sh_dmae_chan *sh_chan)
+{
+ u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+
+ chcr |= CHCR_DE | CHCR_IE;
+ sh_dmae_writel(sh_chan, chcr, CHCR);
+}
+
+static void dmae_halt(struct sh_dmae_chan *sh_chan)
+{
+ u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+
+ chcr &= ~(CHCR_DE | CHCR_TE | CHCR_IE);
+ sh_dmae_writel(sh_chan, chcr, CHCR);
+}
+
+static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val)
+{
+ int ret = dmae_is_busy(sh_chan);
+ /* When DMA was working, can not set data to CHCR */
+ if (ret)
+ return ret;
+
+ sh_dmae_writel(sh_chan, val, CHCR);
+ return 0;
+}
+
+#define DMARS1_ADDR 0x04
+#define DMARS2_ADDR 0x08
+#define DMARS_SHIFT 8
+#define DMARS_CHAN_MSK 0x01
+static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
+{
+ u32 addr;
+ int shift = 0;
+ int ret = dmae_is_busy(sh_chan);
+ if (ret)
+ return ret;
+
+ if (sh_chan->id & DMARS_CHAN_MSK)
+ shift = DMARS_SHIFT;
+
+ switch (sh_chan->id) {
+ /* DMARS0 */
+ case 0:
+ case 1:
+ addr = SH_DMARS_BASE;
+ break;
+ /* DMARS1 */
+ case 2:
+ case 3:
+ addr = (SH_DMARS_BASE + DMARS1_ADDR);
+ break;
+ /* DMARS2 */
+ case 4:
+ case 5:
+ addr = (SH_DMARS_BASE + DMARS2_ADDR);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ctrl_outw((val << shift) |
+ (ctrl_inw(addr) & (shift ? 0xFF00 : 0x00FF)),
+ addr);
+
+ return 0;
+}
+
+static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct sh_desc *desc = tx_to_sh_desc(tx), *chunk, *last = desc, *c;
+ struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan);
+ dma_async_tx_callback callback = tx->callback;
+ dma_cookie_t cookie;
+
+ spin_lock_bh(&sh_chan->desc_lock);
+
+ cookie = sh_chan->common.cookie;
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+
+ sh_chan->common.cookie = cookie;
+ tx->cookie = cookie;
+
+ /* Mark all chunks of this descriptor as submitted, move to the queue */
+ list_for_each_entry_safe(chunk, c, desc->node.prev, node) {
+ /*
+ * All chunks are on the global ld_free, so, we have to find
+ * the end of the chain ourselves
+ */
+ if (chunk != desc && (chunk->mark == DESC_IDLE ||
+ chunk->async_tx.cookie > 0 ||
+ chunk->async_tx.cookie == -EBUSY ||
+ &chunk->node == &sh_chan->ld_free))
+ break;
+ chunk->mark = DESC_SUBMITTED;
+ /* Callback goes to the last chunk */
+ chunk->async_tx.callback = NULL;
+ chunk->cookie = cookie;
+ list_move_tail(&chunk->node, &sh_chan->ld_queue);
+ last = chunk;
+ }
+
+ last->async_tx.callback = callback;
+ last->async_tx.callback_param = tx->callback_param;
+
+ dev_dbg(sh_chan->dev, "submit #%d@%p on %d: %x[%d] -> %x\n",
+ tx->cookie, &last->async_tx, sh_chan->id,
+ desc->hw.sar, desc->hw.tcr, desc->hw.dar);
+
+ spin_unlock_bh(&sh_chan->desc_lock);
+
+ return cookie;
+}
+
+/* Called with desc_lock held */
+static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan)
+{
+ struct sh_desc *desc;
+
+ list_for_each_entry(desc, &sh_chan->ld_free, node)
+ if (desc->mark != DESC_PREPARED) {
+ BUG_ON(desc->mark != DESC_IDLE);
+ list_del(&desc->node);
+ return desc;
+ }
+
+ return NULL;
+}
+
+static int sh_dmae_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+ struct sh_desc *desc;
+
+ spin_lock_bh(&sh_chan->desc_lock);
+ while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) {
+ spin_unlock_bh(&sh_chan->desc_lock);
+ desc = kzalloc(sizeof(struct sh_desc), GFP_KERNEL);
+ if (!desc) {
+ spin_lock_bh(&sh_chan->desc_lock);
+ break;
+ }
+ dma_async_tx_descriptor_init(&desc->async_tx,
+ &sh_chan->common);
+ desc->async_tx.tx_submit = sh_dmae_tx_submit;
+ desc->mark = DESC_IDLE;
+
+ spin_lock_bh(&sh_chan->desc_lock);
+ list_add(&desc->node, &sh_chan->ld_free);
+ sh_chan->descs_allocated++;
+ }
+ spin_unlock_bh(&sh_chan->desc_lock);
+
+ return sh_chan->descs_allocated;
+}
+
+/*
+ * sh_dma_free_chan_resources - Free all resources of the channel.
+ */
+static void sh_dmae_free_chan_resources(struct dma_chan *chan)
+{
+ struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+ struct sh_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ /* Prepared and not submitted descriptors can still be on the queue */
+ if (!list_empty(&sh_chan->ld_queue))
+ sh_dmae_chan_ld_cleanup(sh_chan, true);
+
+ spin_lock_bh(&sh_chan->desc_lock);
+
+ list_splice_init(&sh_chan->ld_free, &list);
+ sh_chan->descs_allocated = 0;
+
+ spin_unlock_bh(&sh_chan->desc_lock);
+
+ list_for_each_entry_safe(desc, _desc, &list, node)
+ kfree(desc);
+}
+
+static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
+ struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
+ size_t len, unsigned long flags)
+{
+ struct sh_dmae_chan *sh_chan;
+ struct sh_desc *first = NULL, *prev = NULL, *new;
+ size_t copy_size;
+ LIST_HEAD(tx_list);
+ int chunks = (len + SH_DMA_TCR_MAX) / (SH_DMA_TCR_MAX + 1);
+
+ if (!chan)
+ return NULL;
+
+ if (!len)
+ return NULL;
+
+ sh_chan = to_sh_chan(chan);
+
+ /* Have to lock the whole loop to protect against concurrent release */
+ spin_lock_bh(&sh_chan->desc_lock);
+
+ /*
+ * Chaining:
+ * first descriptor is what user is dealing with in all API calls, its
+ * cookie is at first set to -EBUSY, at tx-submit to a positive
+ * number
+ * if more than one chunk is needed further chunks have cookie = -EINVAL
+ * the last chunk, if not equal to the first, has cookie = -ENOSPC
+ * all chunks are linked onto the tx_list head with their .node heads
+ * only during this function, then they are immediately spliced
+ * back onto the free list in form of a chain
+ */
+ do {
+ /* Allocate the link descriptor from the free list */
+ new = sh_dmae_get_desc(sh_chan);
+ if (!new) {
+ dev_err(sh_chan->dev,
+ "No free memory for link descriptor\n");
+ list_for_each_entry(new, &tx_list, node)
+ new->mark = DESC_IDLE;
+ list_splice(&tx_list, &sh_chan->ld_free);
+ spin_unlock_bh(&sh_chan->desc_lock);
+ return NULL;
+ }
+
+ copy_size = min(len, (size_t)SH_DMA_TCR_MAX + 1);
+
+ new->hw.sar = dma_src;
+ new->hw.dar = dma_dest;
+ new->hw.tcr = copy_size;
+ if (!first) {
+ /* First desc */
+ new->async_tx.cookie = -EBUSY;
+ first = new;
+ } else {
+ /* Other desc - invisible to the user */
+ new->async_tx.cookie = -EINVAL;
+ }
+
+ dev_dbg(sh_chan->dev,
+ "chaining %u of %u with %p, dst %x, cookie %d\n",
+ copy_size, len, &new->async_tx, dma_dest,
+ new->async_tx.cookie);
+
+ new->mark = DESC_PREPARED;
+ new->async_tx.flags = flags;
+ new->chunks = chunks--;
+
+ prev = new;
+ len -= copy_size;
+ dma_src += copy_size;
+ dma_dest += copy_size;
+ /* Insert the link descriptor to the LD ring */
+ list_add_tail(&new->node, &tx_list);
+ } while (len);
+
+ if (new != first)
+ new->async_tx.cookie = -ENOSPC;
+
+ /* Put them back on the free list, so, they don't get lost */
+ list_splice_tail(&tx_list, &sh_chan->ld_free);
+
+ spin_unlock_bh(&sh_chan->desc_lock);
+
+ return &first->async_tx;
+}
+
+static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all)
+{
+ struct sh_desc *desc, *_desc;
+ /* Is the "exposed" head of a chain acked? */
+ bool head_acked = false;
+ dma_cookie_t cookie = 0;
+ dma_async_tx_callback callback = NULL;
+ void *param = NULL;
+
+ spin_lock_bh(&sh_chan->desc_lock);
+ list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) {
+ struct dma_async_tx_descriptor *tx = &desc->async_tx;
+
+ BUG_ON(tx->cookie > 0 && tx->cookie != desc->cookie);
+ BUG_ON(desc->mark != DESC_SUBMITTED &&
+ desc->mark != DESC_COMPLETED &&
+ desc->mark != DESC_WAITING);
+
+ /*
+ * queue is ordered, and we use this loop to (1) clean up all
+ * completed descriptors, and to (2) update descriptor flags of
+ * any chunks in a (partially) completed chain
+ */
+ if (!all && desc->mark == DESC_SUBMITTED &&
+ desc->cookie != cookie)
+ break;
+
+ if (tx->cookie > 0)
+ cookie = tx->cookie;
+
+ if (desc->mark == DESC_COMPLETED && desc->chunks == 1) {
+ BUG_ON(sh_chan->completed_cookie != desc->cookie - 1);
+ sh_chan->completed_cookie = desc->cookie;
+ }
+
+ /* Call callback on the last chunk */
+ if (desc->mark == DESC_COMPLETED && tx->callback) {
+ desc->mark = DESC_WAITING;
+ callback = tx->callback;
+ param = tx->callback_param;
+ dev_dbg(sh_chan->dev, "descriptor #%d@%p on %d callback\n",
+ tx->cookie, tx, sh_chan->id);
+ BUG_ON(desc->chunks != 1);
+ break;
+ }
+
+ if (tx->cookie > 0 || tx->cookie == -EBUSY) {
+ if (desc->mark == DESC_COMPLETED) {
+ BUG_ON(tx->cookie < 0);
+ desc->mark = DESC_WAITING;
+ }
+ head_acked = async_tx_test_ack(tx);
+ } else {
+ switch (desc->mark) {
+ case DESC_COMPLETED:
+ desc->mark = DESC_WAITING;
+ /* Fall through */
+ case DESC_WAITING:
+ if (head_acked)
+ async_tx_ack(&desc->async_tx);
+ }
+ }
+
+ dev_dbg(sh_chan->dev, "descriptor %p #%d completed.\n",
+ tx, tx->cookie);
+
+ if (((desc->mark == DESC_COMPLETED ||
+ desc->mark == DESC_WAITING) &&
+ async_tx_test_ack(&desc->async_tx)) || all) {
+ /* Remove from ld_queue list */
+ desc->mark = DESC_IDLE;
+ list_move(&desc->node, &sh_chan->ld_free);
+ }
+ }
+ spin_unlock_bh(&sh_chan->desc_lock);
+
+ if (callback)
+ callback(param);
+
+ return callback;
+}
+
+/*
+ * sh_chan_ld_cleanup - Clean up link descriptors
+ *
+ * This function cleans up the ld_queue of DMA channel.
+ */
+static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all)
+{
+ while (__ld_cleanup(sh_chan, all))
+ ;
+}
+
+static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan)
+{
+ struct sh_desc *sd;
+
+ spin_lock_bh(&sh_chan->desc_lock);
+ /* DMA work check */
+ if (dmae_is_busy(sh_chan)) {
+ spin_unlock_bh(&sh_chan->desc_lock);
+ return;
+ }
+
+ /* Find the first un-transfer desciptor */
+ list_for_each_entry(sd, &sh_chan->ld_queue, node)
+ if (sd->mark == DESC_SUBMITTED) {
+ /* Get the ld start address from ld_queue */
+ dmae_set_reg(sh_chan, &sd->hw);
+ dmae_start(sh_chan);
+ break;
+ }
+
+ spin_unlock_bh(&sh_chan->desc_lock);
+}
+
+static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan)
+{
+ struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+ sh_chan_xfer_ld_queue(sh_chan);
+}
+
+static enum dma_status sh_dmae_is_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done,
+ dma_cookie_t *used)
+{
+ struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+
+ sh_dmae_chan_ld_cleanup(sh_chan, false);
+
+ last_used = chan->cookie;
+ last_complete = sh_chan->completed_cookie;
+ BUG_ON(last_complete < 0);
+
+ if (done)
+ *done = last_complete;
+
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static irqreturn_t sh_dmae_interrupt(int irq, void *data)
+{
+ irqreturn_t ret = IRQ_NONE;
+ struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
+ u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+
+ if (chcr & CHCR_TE) {
+ /* DMA stop */
+ dmae_halt(sh_chan);
+
+ ret = IRQ_HANDLED;
+ tasklet_schedule(&sh_chan->tasklet);
+ }
+
+ return ret;
+}
+
+#if defined(CONFIG_CPU_SH4)
+static irqreturn_t sh_dmae_err(int irq, void *data)
+{
+ int err = 0;
+ struct sh_dmae_device *shdev = (struct sh_dmae_device *)data;
+
+ /* IRQ Multi */
+ if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+ int cnt = 0;
+ switch (irq) {
+#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
+ case DMTE6_IRQ:
+ cnt++;
+#endif
+ case DMTE0_IRQ:
+ if (dmaor_read_reg(cnt) & (DMAOR_NMIF | DMAOR_AE)) {
+ disable_irq(irq);
+ return IRQ_HANDLED;
+ }
+ default:
+ return IRQ_NONE;
+ }
+ } else {
+ /* reset dma controller */
+ err = sh_dmae_rst(0);
+ if (err)
+ return err;
+#ifdef SH_DMAC_BASE1
+ if (shdev->pdata.mode & SHDMA_DMAOR1) {
+ err = sh_dmae_rst(1);
+ if (err)
+ return err;
+ }
+#endif
+ disable_irq(irq);
+ return IRQ_HANDLED;
+ }
+}
+#endif
+
+static void dmae_do_tasklet(unsigned long data)
+{
+ struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
+ struct sh_desc *desc;
+ u32 sar_buf = sh_dmae_readl(sh_chan, SAR);
+
+ spin_lock(&sh_chan->desc_lock);
+ list_for_each_entry(desc, &sh_chan->ld_queue, node) {
+ if ((desc->hw.sar + desc->hw.tcr) == sar_buf &&
+ desc->mark == DESC_SUBMITTED) {
+ dev_dbg(sh_chan->dev, "done #%d@%p dst %u\n",
+ desc->async_tx.cookie, &desc->async_tx,
+ desc->hw.dar);
+ desc->mark = DESC_COMPLETED;
+ break;
+ }
+ }
+ spin_unlock(&sh_chan->desc_lock);
+
+ /* Next desc */
+ sh_chan_xfer_ld_queue(sh_chan);
+ sh_dmae_chan_ld_cleanup(sh_chan, false);
+}
+
+static unsigned int get_dmae_irq(unsigned int id)
+{
+ unsigned int irq = 0;
+ if (id < ARRAY_SIZE(dmte_irq_map))
+ irq = dmte_irq_map[id];
+ return irq;
+}
+
+static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id)
+{
+ int err;
+ unsigned int irq = get_dmae_irq(id);
+ unsigned long irqflags = IRQF_DISABLED;
+ struct sh_dmae_chan *new_sh_chan;
+
+ /* alloc channel */
+ new_sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL);
+ if (!new_sh_chan) {
+ dev_err(shdev->common.dev,
+ "No free memory for allocating dma channels!\n");
+ return -ENOMEM;
+ }
+
+ new_sh_chan->dev = shdev->common.dev;
+ new_sh_chan->id = id;
+
+ /* Init DMA tasklet */
+ tasklet_init(&new_sh_chan->tasklet, dmae_do_tasklet,
+ (unsigned long)new_sh_chan);
+
+ /* Init the channel */
+ dmae_init(new_sh_chan);
+
+ spin_lock_init(&new_sh_chan->desc_lock);
+
+ /* Init descripter manage list */
+ INIT_LIST_HEAD(&new_sh_chan->ld_queue);
+ INIT_LIST_HEAD(&new_sh_chan->ld_free);
+
+ /* copy struct dma_device */
+ new_sh_chan->common.device = &shdev->common;
+
+ /* Add the channel to DMA device channel list */
+ list_add_tail(&new_sh_chan->common.device_node,
+ &shdev->common.channels);
+ shdev->common.chancnt++;
+
+ if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+ irqflags = IRQF_SHARED;
+#if defined(DMTE6_IRQ)
+ if (irq >= DMTE6_IRQ)
+ irq = DMTE6_IRQ;
+ else
+#endif
+ irq = DMTE0_IRQ;
+ }
+
+ snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id),
+ "sh-dmae%d", new_sh_chan->id);
+
+ /* set up channel irq */
+ err = request_irq(irq, &sh_dmae_interrupt, irqflags,
+ new_sh_chan->dev_id, new_sh_chan);
+ if (err) {
+ dev_err(shdev->common.dev, "DMA channel %d request_irq error "
+ "with return %d\n", id, err);
+ goto err_no_irq;
+ }
+
+ /* CHCR register control function */
+ new_sh_chan->set_chcr = dmae_set_chcr;
+ /* DMARS register control function */
+ new_sh_chan->set_dmars = dmae_set_dmars;
+
+ shdev->chan[id] = new_sh_chan;
+ return 0;
+
+err_no_irq:
+ /* remove from dmaengine device node */
+ list_del(&new_sh_chan->common.device_node);
+ kfree(new_sh_chan);
+ return err;
+}
+
+static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
+{
+ int i;
+
+ for (i = shdev->common.chancnt - 1 ; i >= 0 ; i--) {
+ if (shdev->chan[i]) {
+ struct sh_dmae_chan *shchan = shdev->chan[i];
+ if (!(shdev->pdata.mode & SHDMA_MIX_IRQ))
+ free_irq(dmte_irq_map[i], shchan);
+
+ list_del(&shchan->common.device_node);
+ kfree(shchan);
+ shdev->chan[i] = NULL;
+ }
+ }
+ shdev->common.chancnt = 0;
+}
+
+static int __init sh_dmae_probe(struct platform_device *pdev)
+{
+ int err = 0, cnt, ecnt;
+ unsigned long irqflags = IRQF_DISABLED;
+#if defined(CONFIG_CPU_SH4)
+ int eirq[] = { DMAE0_IRQ,
+#if defined(DMAE1_IRQ)
+ DMAE1_IRQ
+#endif
+ };
+#endif
+ struct sh_dmae_device *shdev;
+
+ /* get platform data */
+ if (!pdev->dev.platform_data)
+ return -ENODEV;
+
+ shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL);
+ if (!shdev) {
+ dev_err(&pdev->dev, "No enough memory\n");
+ return -ENOMEM;
+ }
+
+ /* platform data */
+ memcpy(&shdev->pdata, pdev->dev.platform_data,
+ sizeof(struct sh_dmae_pdata));
+
+ /* reset dma controller */
+ err = sh_dmae_rst(0);
+ if (err)
+ goto rst_err;
+
+ /* SH7780/85/23 has DMAOR1 */
+ if (shdev->pdata.mode & SHDMA_DMAOR1) {
+ err = sh_dmae_rst(1);
+ if (err)
+ goto rst_err;
+ }
+
+ INIT_LIST_HEAD(&shdev->common.channels);
+
+ dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask);
+ shdev->common.device_alloc_chan_resources
+ = sh_dmae_alloc_chan_resources;
+ shdev->common.device_free_chan_resources = sh_dmae_free_chan_resources;
+ shdev->common.device_prep_dma_memcpy = sh_dmae_prep_memcpy;
+ shdev->common.device_is_tx_complete = sh_dmae_is_complete;
+ shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending;
+ shdev->common.dev = &pdev->dev;
+ /* Default transfer size of 32 bytes requires 32-byte alignment */
+ shdev->common.copy_align = 5;
+
+#if defined(CONFIG_CPU_SH4)
+ /* Non Mix IRQ mode SH7722/SH7730 etc... */
+ if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+ irqflags = IRQF_SHARED;
+ eirq[0] = DMTE0_IRQ;
+#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
+ eirq[1] = DMTE6_IRQ;
+#endif
+ }
+
+ for (ecnt = 0 ; ecnt < ARRAY_SIZE(eirq); ecnt++) {
+ err = request_irq(eirq[ecnt], sh_dmae_err, irqflags,
+ "DMAC Address Error", shdev);
+ if (err) {
+ dev_err(&pdev->dev, "DMA device request_irq"
+ "error (irq %d) with return %d\n",
+ eirq[ecnt], err);
+ goto eirq_err;
+ }
+ }
+#endif /* CONFIG_CPU_SH4 */
+
+ /* Create DMA Channel */
+ for (cnt = 0 ; cnt < MAX_DMA_CHANNELS ; cnt++) {
+ err = sh_dmae_chan_probe(shdev, cnt);
+ if (err)
+ goto chan_probe_err;
+ }
+
+ platform_set_drvdata(pdev, shdev);
+ dma_async_device_register(&shdev->common);
+
+ return err;
+
+chan_probe_err:
+ sh_dmae_chan_remove(shdev);
+
+eirq_err:
+ for (ecnt-- ; ecnt >= 0; ecnt--)
+ free_irq(eirq[ecnt], shdev);
+
+rst_err:
+ kfree(shdev);
+
+ return err;
+}
+
+static int __exit sh_dmae_remove(struct platform_device *pdev)
+{
+ struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
+
+ dma_async_device_unregister(&shdev->common);
+
+ if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+ free_irq(DMTE0_IRQ, shdev);
+#if defined(DMTE6_IRQ)
+ free_irq(DMTE6_IRQ, shdev);
+#endif
+ }
+
+ /* channel data remove */
+ sh_dmae_chan_remove(shdev);
+
+ if (!(shdev->pdata.mode & SHDMA_MIX_IRQ)) {
+ free_irq(DMAE0_IRQ, shdev);
+#if defined(DMAE1_IRQ)
+ free_irq(DMAE1_IRQ, shdev);
+#endif
+ }
+ kfree(shdev);
+
+ return 0;
+}
+
+static void sh_dmae_shutdown(struct platform_device *pdev)
+{
+ struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
+ sh_dmae_ctl_stop(0);
+ if (shdev->pdata.mode & SHDMA_DMAOR1)
+ sh_dmae_ctl_stop(1);
+}
+
+static struct platform_driver sh_dmae_driver = {
+ .remove = __exit_p(sh_dmae_remove),
+ .shutdown = sh_dmae_shutdown,
+ .driver = {
+ .name = "sh-dma-engine",
+ },
+};
+
+static int __init sh_dmae_init(void)
+{
+ return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe);
+}
+module_init(sh_dmae_init);
+
+static void __exit sh_dmae_exit(void)
+{
+ platform_driver_unregister(&sh_dmae_driver);
+}
+module_exit(sh_dmae_exit);
+
+MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>");
+MODULE_DESCRIPTION("Renesas SH DMA Engine driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h
new file mode 100644
index 00000000000..108f1cffb6f
--- /dev/null
+++ b/drivers/dma/shdma.h
@@ -0,0 +1,67 @@
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+#ifndef __DMA_SHDMA_H
+#define __DMA_SHDMA_H
+
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+
+#define SH_DMA_TCR_MAX 0x00FFFFFF /* 16MB */
+
+struct sh_dmae_regs {
+ u32 sar; /* SAR / source address */
+ u32 dar; /* DAR / destination address */
+ u32 tcr; /* TCR / transfer count */
+};
+
+struct sh_desc {
+ struct sh_dmae_regs hw;
+ struct list_head node;
+ struct dma_async_tx_descriptor async_tx;
+ dma_cookie_t cookie;
+ int chunks;
+ int mark;
+};
+
+struct device;
+
+struct sh_dmae_chan {
+ dma_cookie_t completed_cookie; /* The maximum cookie completed */
+ spinlock_t desc_lock; /* Descriptor operation lock */
+ struct list_head ld_queue; /* Link descriptors queue */
+ struct list_head ld_free; /* Link descriptors free */
+ struct dma_chan common; /* DMA common channel */
+ struct device *dev; /* Channel device */
+ struct tasklet_struct tasklet; /* Tasklet */
+ int descs_allocated; /* desc count */
+ int id; /* Raw id of this channel */
+ char dev_id[16]; /* unique name per DMAC of channel */
+
+ /* Set chcr */
+ int (*set_chcr)(struct sh_dmae_chan *sh_chan, u32 regs);
+ /* Set DMA resource */
+ int (*set_dmars)(struct sh_dmae_chan *sh_chan, u16 res);
+};
+
+struct sh_dmae_device {
+ struct dma_device common;
+ struct sh_dmae_chan *chan[MAX_DMA_CHANNELS];
+ struct sh_dmae_pdata pdata;
+};
+
+#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, common)
+#define to_sh_desc(lh) container_of(lh, struct sh_desc, node)
+#define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx)
+
+#endif /* __DMA_SHDMA_H */
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
new file mode 100644
index 00000000000..3ebc61067e5
--- /dev/null
+++ b/drivers/dma/txx9dmac.c
@@ -0,0 +1,1361 @@
+/*
+ * Driver for the TXx9 SoC DMA Controller
+ *
+ * Copyright (C) 2009 Atsushi Nemoto
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/scatterlist.h>
+#include "txx9dmac.h"
+
+static struct txx9dmac_chan *to_txx9dmac_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct txx9dmac_chan, chan);
+}
+
+static struct txx9dmac_cregs __iomem *__dma_regs(const struct txx9dmac_chan *dc)
+{
+ return dc->ch_regs;
+}
+
+static struct txx9dmac_cregs32 __iomem *__dma_regs32(
+ const struct txx9dmac_chan *dc)
+{
+ return dc->ch_regs;
+}
+
+#define channel64_readq(dc, name) \
+ __raw_readq(&(__dma_regs(dc)->name))
+#define channel64_writeq(dc, name, val) \
+ __raw_writeq((val), &(__dma_regs(dc)->name))
+#define channel64_readl(dc, name) \
+ __raw_readl(&(__dma_regs(dc)->name))
+#define channel64_writel(dc, name, val) \
+ __raw_writel((val), &(__dma_regs(dc)->name))
+
+#define channel32_readl(dc, name) \
+ __raw_readl(&(__dma_regs32(dc)->name))
+#define channel32_writel(dc, name, val) \
+ __raw_writel((val), &(__dma_regs32(dc)->name))
+
+#define channel_readq(dc, name) channel64_readq(dc, name)
+#define channel_writeq(dc, name, val) channel64_writeq(dc, name, val)
+#define channel_readl(dc, name) \
+ (is_dmac64(dc) ? \
+ channel64_readl(dc, name) : channel32_readl(dc, name))
+#define channel_writel(dc, name, val) \
+ (is_dmac64(dc) ? \
+ channel64_writel(dc, name, val) : channel32_writel(dc, name, val))
+
+static dma_addr_t channel64_read_CHAR(const struct txx9dmac_chan *dc)
+{
+ if (sizeof(__dma_regs(dc)->CHAR) == sizeof(u64))
+ return channel64_readq(dc, CHAR);
+ else
+ return channel64_readl(dc, CHAR);
+}
+
+static void channel64_write_CHAR(const struct txx9dmac_chan *dc, dma_addr_t val)
+{
+ if (sizeof(__dma_regs(dc)->CHAR) == sizeof(u64))
+ channel64_writeq(dc, CHAR, val);
+ else
+ channel64_writel(dc, CHAR, val);
+}
+
+static void channel64_clear_CHAR(const struct txx9dmac_chan *dc)
+{
+#if defined(CONFIG_32BIT) && !defined(CONFIG_64BIT_PHYS_ADDR)
+ channel64_writel(dc, CHAR, 0);
+ channel64_writel(dc, __pad_CHAR, 0);
+#else
+ channel64_writeq(dc, CHAR, 0);
+#endif
+}
+
+static dma_addr_t channel_read_CHAR(const struct txx9dmac_chan *dc)
+{
+ if (is_dmac64(dc))
+ return channel64_read_CHAR(dc);
+ else
+ return channel32_readl(dc, CHAR);
+}
+
+static void channel_write_CHAR(const struct txx9dmac_chan *dc, dma_addr_t val)
+{
+ if (is_dmac64(dc))
+ channel64_write_CHAR(dc, val);
+ else
+ channel32_writel(dc, CHAR, val);
+}
+
+static struct txx9dmac_regs __iomem *__txx9dmac_regs(
+ const struct txx9dmac_dev *ddev)
+{
+ return ddev->regs;
+}
+
+static struct txx9dmac_regs32 __iomem *__txx9dmac_regs32(
+ const struct txx9dmac_dev *ddev)
+{
+ return ddev->regs;
+}
+
+#define dma64_readl(ddev, name) \
+ __raw_readl(&(__txx9dmac_regs(ddev)->name))
+#define dma64_writel(ddev, name, val) \
+ __raw_writel((val), &(__txx9dmac_regs(ddev)->name))
+
+#define dma32_readl(ddev, name) \
+ __raw_readl(&(__txx9dmac_regs32(ddev)->name))
+#define dma32_writel(ddev, name, val) \
+ __raw_writel((val), &(__txx9dmac_regs32(ddev)->name))
+
+#define dma_readl(ddev, name) \
+ (__is_dmac64(ddev) ? \
+ dma64_readl(ddev, name) : dma32_readl(ddev, name))
+#define dma_writel(ddev, name, val) \
+ (__is_dmac64(ddev) ? \
+ dma64_writel(ddev, name, val) : dma32_writel(ddev, name, val))
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+ return &chan->dev->device;
+}
+static struct device *chan2parent(struct dma_chan *chan)
+{
+ return chan->dev->device.parent;
+}
+
+static struct txx9dmac_desc *
+txd_to_txx9dmac_desc(struct dma_async_tx_descriptor *txd)
+{
+ return container_of(txd, struct txx9dmac_desc, txd);
+}
+
+static dma_addr_t desc_read_CHAR(const struct txx9dmac_chan *dc,
+ const struct txx9dmac_desc *desc)
+{
+ return is_dmac64(dc) ? desc->hwdesc.CHAR : desc->hwdesc32.CHAR;
+}
+
+static void desc_write_CHAR(const struct txx9dmac_chan *dc,
+ struct txx9dmac_desc *desc, dma_addr_t val)
+{
+ if (is_dmac64(dc))
+ desc->hwdesc.CHAR = val;
+ else
+ desc->hwdesc32.CHAR = val;
+}
+
+#define TXX9_DMA_MAX_COUNT 0x04000000
+
+#define TXX9_DMA_INITIAL_DESC_COUNT 64
+
+static struct txx9dmac_desc *txx9dmac_first_active(struct txx9dmac_chan *dc)
+{
+ return list_entry(dc->active_list.next,
+ struct txx9dmac_desc, desc_node);
+}
+
+static struct txx9dmac_desc *txx9dmac_last_active(struct txx9dmac_chan *dc)
+{
+ return list_entry(dc->active_list.prev,
+ struct txx9dmac_desc, desc_node);
+}
+
+static struct txx9dmac_desc *txx9dmac_first_queued(struct txx9dmac_chan *dc)
+{
+ return list_entry(dc->queue.next, struct txx9dmac_desc, desc_node);
+}
+
+static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc)
+{
+ if (!list_empty(&desc->tx_list))
+ desc = list_entry(desc->tx_list.prev, typeof(*desc), desc_node);
+ return desc;
+}
+
+static dma_cookie_t txx9dmac_tx_submit(struct dma_async_tx_descriptor *tx);
+
+static struct txx9dmac_desc *txx9dmac_desc_alloc(struct txx9dmac_chan *dc,
+ gfp_t flags)
+{
+ struct txx9dmac_dev *ddev = dc->ddev;
+ struct txx9dmac_desc *desc;
+
+ desc = kzalloc(sizeof(*desc), flags);
+ if (!desc)
+ return NULL;
+ INIT_LIST_HEAD(&desc->tx_list);
+ dma_async_tx_descriptor_init(&desc->txd, &dc->chan);
+ desc->txd.tx_submit = txx9dmac_tx_submit;
+ /* txd.flags will be overwritten in prep funcs */
+ desc->txd.flags = DMA_CTRL_ACK;
+ desc->txd.phys = dma_map_single(chan2parent(&dc->chan), &desc->hwdesc,
+ ddev->descsize, DMA_TO_DEVICE);
+ return desc;
+}
+
+static struct txx9dmac_desc *txx9dmac_desc_get(struct txx9dmac_chan *dc)
+{
+ struct txx9dmac_desc *desc, *_desc;
+ struct txx9dmac_desc *ret = NULL;
+ unsigned int i = 0;
+
+ spin_lock_bh(&dc->lock);
+ list_for_each_entry_safe(desc, _desc, &dc->free_list, desc_node) {
+ if (async_tx_test_ack(&desc->txd)) {
+ list_del(&desc->desc_node);
+ ret = desc;
+ break;
+ }
+ dev_dbg(chan2dev(&dc->chan), "desc %p not ACKed\n", desc);
+ i++;
+ }
+ spin_unlock_bh(&dc->lock);
+
+ dev_vdbg(chan2dev(&dc->chan), "scanned %u descriptors on freelist\n",
+ i);
+ if (!ret) {
+ ret = txx9dmac_desc_alloc(dc, GFP_ATOMIC);
+ if (ret) {
+ spin_lock_bh(&dc->lock);
+ dc->descs_allocated++;
+ spin_unlock_bh(&dc->lock);
+ } else
+ dev_err(chan2dev(&dc->chan),
+ "not enough descriptors available\n");
+ }
+ return ret;
+}
+
+static void txx9dmac_sync_desc_for_cpu(struct txx9dmac_chan *dc,
+ struct txx9dmac_desc *desc)
+{
+ struct txx9dmac_dev *ddev = dc->ddev;
+ struct txx9dmac_desc *child;
+
+ list_for_each_entry(child, &desc->tx_list, desc_node)
+ dma_sync_single_for_cpu(chan2parent(&dc->chan),
+ child->txd.phys, ddev->descsize,
+ DMA_TO_DEVICE);
+ dma_sync_single_for_cpu(chan2parent(&dc->chan),
+ desc->txd.phys, ddev->descsize,
+ DMA_TO_DEVICE);
+}
+
+/*
+ * Move a descriptor, including any children, to the free list.
+ * `desc' must not be on any lists.
+ */
+static void txx9dmac_desc_put(struct txx9dmac_chan *dc,
+ struct txx9dmac_desc *desc)
+{
+ if (desc) {
+ struct txx9dmac_desc *child;
+
+ txx9dmac_sync_desc_for_cpu(dc, desc);
+
+ spin_lock_bh(&dc->lock);
+ list_for_each_entry(child, &desc->tx_list, desc_node)
+ dev_vdbg(chan2dev(&dc->chan),
+ "moving child desc %p to freelist\n",
+ child);
+ list_splice_init(&desc->tx_list, &dc->free_list);
+ dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n",
+ desc);
+ list_add(&desc->desc_node, &dc->free_list);
+ spin_unlock_bh(&dc->lock);
+ }
+}
+
+/* Called with dc->lock held and bh disabled */
+static dma_cookie_t
+txx9dmac_assign_cookie(struct txx9dmac_chan *dc, struct txx9dmac_desc *desc)
+{
+ dma_cookie_t cookie = dc->chan.cookie;
+
+ if (++cookie < 0)
+ cookie = 1;
+
+ dc->chan.cookie = cookie;
+ desc->txd.cookie = cookie;
+
+ return cookie;
+}
+
+/*----------------------------------------------------------------------*/
+
+static void txx9dmac_dump_regs(struct txx9dmac_chan *dc)
+{
+ if (is_dmac64(dc))
+ dev_err(chan2dev(&dc->chan),
+ " CHAR: %#llx SAR: %#llx DAR: %#llx CNTR: %#x"
+ " SAIR: %#x DAIR: %#x CCR: %#x CSR: %#x\n",
+ (u64)channel64_read_CHAR(dc),
+ channel64_readq(dc, SAR),
+ channel64_readq(dc, DAR),
+ channel64_readl(dc, CNTR),
+ channel64_readl(dc, SAIR),
+ channel64_readl(dc, DAIR),
+ channel64_readl(dc, CCR),
+ channel64_readl(dc, CSR));
+ else
+ dev_err(chan2dev(&dc->chan),
+ " CHAR: %#x SAR: %#x DAR: %#x CNTR: %#x"
+ " SAIR: %#x DAIR: %#x CCR: %#x CSR: %#x\n",
+ channel32_readl(dc, CHAR),
+ channel32_readl(dc, SAR),
+ channel32_readl(dc, DAR),
+ channel32_readl(dc, CNTR),
+ channel32_readl(dc, SAIR),
+ channel32_readl(dc, DAIR),
+ channel32_readl(dc, CCR),
+ channel32_readl(dc, CSR));
+}
+
+static void txx9dmac_reset_chan(struct txx9dmac_chan *dc)
+{
+ channel_writel(dc, CCR, TXX9_DMA_CCR_CHRST);
+ if (is_dmac64(dc)) {
+ channel64_clear_CHAR(dc);
+ channel_writeq(dc, SAR, 0);
+ channel_writeq(dc, DAR, 0);
+ } else {
+ channel_writel(dc, CHAR, 0);
+ channel_writel(dc, SAR, 0);
+ channel_writel(dc, DAR, 0);
+ }
+ channel_writel(dc, CNTR, 0);
+ channel_writel(dc, SAIR, 0);
+ channel_writel(dc, DAIR, 0);
+ channel_writel(dc, CCR, 0);
+ mmiowb();
+}
+
+/* Called with dc->lock held and bh disabled */
+static void txx9dmac_dostart(struct txx9dmac_chan *dc,
+ struct txx9dmac_desc *first)
+{
+ struct txx9dmac_slave *ds = dc->chan.private;
+ u32 sai, dai;
+
+ dev_vdbg(chan2dev(&dc->chan), "dostart %u %p\n",
+ first->txd.cookie, first);
+ /* ASSERT: channel is idle */
+ if (channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT) {
+ dev_err(chan2dev(&dc->chan),
+ "BUG: Attempted to start non-idle channel\n");
+ txx9dmac_dump_regs(dc);
+ /* The tasklet will hopefully advance the queue... */
+ return;
+ }
+
+ if (is_dmac64(dc)) {
+ channel64_writel(dc, CNTR, 0);
+ channel64_writel(dc, CSR, 0xffffffff);
+ if (ds) {
+ if (ds->tx_reg) {
+ sai = ds->reg_width;
+ dai = 0;
+ } else {
+ sai = 0;
+ dai = ds->reg_width;
+ }
+ } else {
+ sai = 8;
+ dai = 8;
+ }
+ channel64_writel(dc, SAIR, sai);
+ channel64_writel(dc, DAIR, dai);
+ /* All 64-bit DMAC supports SMPCHN */
+ channel64_writel(dc, CCR, dc->ccr);
+ /* Writing a non zero value to CHAR will assert XFACT */
+ channel64_write_CHAR(dc, first->txd.phys);
+ } else {
+ channel32_writel(dc, CNTR, 0);
+ channel32_writel(dc, CSR, 0xffffffff);
+ if (ds) {
+ if (ds->tx_reg) {
+ sai = ds->reg_width;
+ dai = 0;
+ } else {
+ sai = 0;
+ dai = ds->reg_width;
+ }
+ } else {
+ sai = 4;
+ dai = 4;
+ }
+ channel32_writel(dc, SAIR, sai);
+ channel32_writel(dc, DAIR, dai);
+ if (txx9_dma_have_SMPCHN()) {
+ channel32_writel(dc, CCR, dc->ccr);
+ /* Writing a non zero value to CHAR will assert XFACT */
+ channel32_writel(dc, CHAR, first->txd.phys);
+ } else {
+ channel32_writel(dc, CHAR, first->txd.phys);
+ channel32_writel(dc, CCR, dc->ccr);
+ }
+ }
+}
+
+/*----------------------------------------------------------------------*/
+
+static void
+txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
+ struct txx9dmac_desc *desc)
+{
+ dma_async_tx_callback callback;
+ void *param;
+ struct dma_async_tx_descriptor *txd = &desc->txd;
+ struct txx9dmac_slave *ds = dc->chan.private;
+
+ dev_vdbg(chan2dev(&dc->chan), "descriptor %u %p complete\n",
+ txd->cookie, desc);
+
+ dc->completed = txd->cookie;
+ callback = txd->callback;
+ param = txd->callback_param;
+
+ txx9dmac_sync_desc_for_cpu(dc, desc);
+ list_splice_init(&desc->tx_list, &dc->free_list);
+ list_move(&desc->desc_node, &dc->free_list);
+
+ if (!ds) {
+ dma_addr_t dmaaddr;
+ if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ dmaaddr = is_dmac64(dc) ?
+ desc->hwdesc.DAR : desc->hwdesc32.DAR;
+ if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+ dma_unmap_single(chan2parent(&dc->chan),
+ dmaaddr, desc->len, DMA_FROM_DEVICE);
+ else
+ dma_unmap_page(chan2parent(&dc->chan),
+ dmaaddr, desc->len, DMA_FROM_DEVICE);
+ }
+ if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ dmaaddr = is_dmac64(dc) ?
+ desc->hwdesc.SAR : desc->hwdesc32.SAR;
+ if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+ dma_unmap_single(chan2parent(&dc->chan),
+ dmaaddr, desc->len, DMA_TO_DEVICE);
+ else
+ dma_unmap_page(chan2parent(&dc->chan),
+ dmaaddr, desc->len, DMA_TO_DEVICE);
+ }
+ }
+
+ /*
+ * The API requires that no submissions are done from a
+ * callback, so we don't need to drop the lock here
+ */
+ if (callback)
+ callback(param);
+ dma_run_dependencies(txd);
+}
+
+static void txx9dmac_dequeue(struct txx9dmac_chan *dc, struct list_head *list)
+{
+ struct txx9dmac_dev *ddev = dc->ddev;
+ struct txx9dmac_desc *desc;
+ struct txx9dmac_desc *prev = NULL;
+
+ BUG_ON(!list_empty(list));
+ do {
+ desc = txx9dmac_first_queued(dc);
+ if (prev) {
+ desc_write_CHAR(dc, prev, desc->txd.phys);
+ dma_sync_single_for_device(chan2parent(&dc->chan),
+ prev->txd.phys, ddev->descsize,
+ DMA_TO_DEVICE);
+ }
+ prev = txx9dmac_last_child(desc);
+ list_move_tail(&desc->desc_node, list);
+ /* Make chain-completion interrupt happen */
+ if ((desc->txd.flags & DMA_PREP_INTERRUPT) &&
+ !txx9dmac_chan_INTENT(dc))
+ break;
+ } while (!list_empty(&dc->queue));
+}
+
+static void txx9dmac_complete_all(struct txx9dmac_chan *dc)
+{
+ struct txx9dmac_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ /*
+ * Submit queued descriptors ASAP, i.e. before we go through
+ * the completed ones.
+ */
+ list_splice_init(&dc->active_list, &list);
+ if (!list_empty(&dc->queue)) {
+ txx9dmac_dequeue(dc, &dc->active_list);
+ txx9dmac_dostart(dc, txx9dmac_first_active(dc));
+ }
+
+ list_for_each_entry_safe(desc, _desc, &list, desc_node)
+ txx9dmac_descriptor_complete(dc, desc);
+}
+
+static void txx9dmac_dump_desc(struct txx9dmac_chan *dc,
+ struct txx9dmac_hwdesc *desc)
+{
+ if (is_dmac64(dc)) {
+#ifdef TXX9_DMA_USE_SIMPLE_CHAIN
+ dev_crit(chan2dev(&dc->chan),
+ " desc: ch%#llx s%#llx d%#llx c%#x\n",
+ (u64)desc->CHAR, desc->SAR, desc->DAR, desc->CNTR);
+#else
+ dev_crit(chan2dev(&dc->chan),
+ " desc: ch%#llx s%#llx d%#llx c%#x"
+ " si%#x di%#x cc%#x cs%#x\n",
+ (u64)desc->CHAR, desc->SAR, desc->DAR, desc->CNTR,
+ desc->SAIR, desc->DAIR, desc->CCR, desc->CSR);
+#endif
+ } else {
+ struct txx9dmac_hwdesc32 *d = (struct txx9dmac_hwdesc32 *)desc;
+#ifdef TXX9_DMA_USE_SIMPLE_CHAIN
+ dev_crit(chan2dev(&dc->chan),
+ " desc: ch%#x s%#x d%#x c%#x\n",
+ d->CHAR, d->SAR, d->DAR, d->CNTR);
+#else
+ dev_crit(chan2dev(&dc->chan),
+ " desc: ch%#x s%#x d%#x c%#x"
+ " si%#x di%#x cc%#x cs%#x\n",
+ d->CHAR, d->SAR, d->DAR, d->CNTR,
+ d->SAIR, d->DAIR, d->CCR, d->CSR);
+#endif
+ }
+}
+
+static void txx9dmac_handle_error(struct txx9dmac_chan *dc, u32 csr)
+{
+ struct txx9dmac_desc *bad_desc;
+ struct txx9dmac_desc *child;
+ u32 errors;
+
+ /*
+ * The descriptor currently at the head of the active list is
+ * borked. Since we don't have any way to report errors, we'll
+ * just have to scream loudly and try to carry on.
+ */
+ dev_crit(chan2dev(&dc->chan), "Abnormal Chain Completion\n");
+ txx9dmac_dump_regs(dc);
+
+ bad_desc = txx9dmac_first_active(dc);
+ list_del_init(&bad_desc->desc_node);
+
+ /* Clear all error flags and try to restart the controller */
+ errors = csr & (TXX9_DMA_CSR_ABCHC |
+ TXX9_DMA_CSR_CFERR | TXX9_DMA_CSR_CHERR |
+ TXX9_DMA_CSR_DESERR | TXX9_DMA_CSR_SORERR);
+ channel_writel(dc, CSR, errors);
+
+ if (list_empty(&dc->active_list) && !list_empty(&dc->queue))
+ txx9dmac_dequeue(dc, &dc->active_list);
+ if (!list_empty(&dc->active_list))
+ txx9dmac_dostart(dc, txx9dmac_first_active(dc));
+
+ dev_crit(chan2dev(&dc->chan),
+ "Bad descriptor submitted for DMA! (cookie: %d)\n",
+ bad_desc->txd.cookie);
+ txx9dmac_dump_desc(dc, &bad_desc->hwdesc);
+ list_for_each_entry(child, &bad_desc->tx_list, desc_node)
+ txx9dmac_dump_desc(dc, &child->hwdesc);
+ /* Pretend the descriptor completed successfully */
+ txx9dmac_descriptor_complete(dc, bad_desc);
+}
+
+static void txx9dmac_scan_descriptors(struct txx9dmac_chan *dc)
+{
+ dma_addr_t chain;
+ struct txx9dmac_desc *desc, *_desc;
+ struct txx9dmac_desc *child;
+ u32 csr;
+
+ if (is_dmac64(dc)) {
+ chain = channel64_read_CHAR(dc);
+ csr = channel64_readl(dc, CSR);
+ channel64_writel(dc, CSR, csr);
+ } else {
+ chain = channel32_readl(dc, CHAR);
+ csr = channel32_readl(dc, CSR);
+ channel32_writel(dc, CSR, csr);
+ }
+ /* For dynamic chain, we should look at XFACT instead of NCHNC */
+ if (!(csr & (TXX9_DMA_CSR_XFACT | TXX9_DMA_CSR_ABCHC))) {
+ /* Everything we've submitted is done */
+ txx9dmac_complete_all(dc);
+ return;
+ }
+ if (!(csr & TXX9_DMA_CSR_CHNEN))
+ chain = 0; /* last descriptor of this chain */
+
+ dev_vdbg(chan2dev(&dc->chan), "scan_descriptors: char=%#llx\n",
+ (u64)chain);
+
+ list_for_each_entry_safe(desc, _desc, &dc->active_list, desc_node) {
+ if (desc_read_CHAR(dc, desc) == chain) {
+ /* This one is currently in progress */
+ if (csr & TXX9_DMA_CSR_ABCHC)
+ goto scan_done;
+ return;
+ }
+
+ list_for_each_entry(child, &desc->tx_list, desc_node)
+ if (desc_read_CHAR(dc, child) == chain) {
+ /* Currently in progress */
+ if (csr & TXX9_DMA_CSR_ABCHC)
+ goto scan_done;
+ return;
+ }
+
+ /*
+ * No descriptors so far seem to be in progress, i.e.
+ * this one must be done.
+ */
+ txx9dmac_descriptor_complete(dc, desc);
+ }
+scan_done:
+ if (csr & TXX9_DMA_CSR_ABCHC) {
+ txx9dmac_handle_error(dc, csr);
+ return;
+ }
+
+ dev_err(chan2dev(&dc->chan),
+ "BUG: All descriptors done, but channel not idle!\n");
+
+ /* Try to continue after resetting the channel... */
+ txx9dmac_reset_chan(dc);
+
+ if (!list_empty(&dc->queue)) {
+ txx9dmac_dequeue(dc, &dc->active_list);
+ txx9dmac_dostart(dc, txx9dmac_first_active(dc));
+ }
+}
+
+static void txx9dmac_chan_tasklet(unsigned long data)
+{
+ int irq;
+ u32 csr;
+ struct txx9dmac_chan *dc;
+
+ dc = (struct txx9dmac_chan *)data;
+ csr = channel_readl(dc, CSR);
+ dev_vdbg(chan2dev(&dc->chan), "tasklet: status=%x\n", csr);
+
+ spin_lock(&dc->lock);
+ if (csr & (TXX9_DMA_CSR_ABCHC | TXX9_DMA_CSR_NCHNC |
+ TXX9_DMA_CSR_NTRNFC))
+ txx9dmac_scan_descriptors(dc);
+ spin_unlock(&dc->lock);
+ irq = dc->irq;
+
+ enable_irq(irq);
+}
+
+static irqreturn_t txx9dmac_chan_interrupt(int irq, void *dev_id)
+{
+ struct txx9dmac_chan *dc = dev_id;
+
+ dev_vdbg(chan2dev(&dc->chan), "interrupt: status=%#x\n",
+ channel_readl(dc, CSR));
+
+ tasklet_schedule(&dc->tasklet);
+ /*
+ * Just disable the interrupts. We'll turn them back on in the
+ * softirq handler.
+ */
+ disable_irq_nosync(irq);
+
+ return IRQ_HANDLED;
+}
+
+static void txx9dmac_tasklet(unsigned long data)
+{
+ int irq;
+ u32 csr;
+ struct txx9dmac_chan *dc;
+
+ struct txx9dmac_dev *ddev = (struct txx9dmac_dev *)data;
+ u32 mcr;
+ int i;
+
+ mcr = dma_readl(ddev, MCR);
+ dev_vdbg(ddev->chan[0]->dma.dev, "tasklet: mcr=%x\n", mcr);
+ for (i = 0; i < TXX9_DMA_MAX_NR_CHANNELS; i++) {
+ if ((mcr >> (24 + i)) & 0x11) {
+ dc = ddev->chan[i];
+ csr = channel_readl(dc, CSR);
+ dev_vdbg(chan2dev(&dc->chan), "tasklet: status=%x\n",
+ csr);
+ spin_lock(&dc->lock);
+ if (csr & (TXX9_DMA_CSR_ABCHC | TXX9_DMA_CSR_NCHNC |
+ TXX9_DMA_CSR_NTRNFC))
+ txx9dmac_scan_descriptors(dc);
+ spin_unlock(&dc->lock);
+ }
+ }
+ irq = ddev->irq;
+
+ enable_irq(irq);
+}
+
+static irqreturn_t txx9dmac_interrupt(int irq, void *dev_id)
+{
+ struct txx9dmac_dev *ddev = dev_id;
+
+ dev_vdbg(ddev->chan[0]->dma.dev, "interrupt: status=%#x\n",
+ dma_readl(ddev, MCR));
+
+ tasklet_schedule(&ddev->tasklet);
+ /*
+ * Just disable the interrupts. We'll turn them back on in the
+ * softirq handler.
+ */
+ disable_irq_nosync(irq);
+
+ return IRQ_HANDLED;
+}
+
+/*----------------------------------------------------------------------*/
+
+static dma_cookie_t txx9dmac_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct txx9dmac_desc *desc = txd_to_txx9dmac_desc(tx);
+ struct txx9dmac_chan *dc = to_txx9dmac_chan(tx->chan);
+ dma_cookie_t cookie;
+
+ spin_lock_bh(&dc->lock);
+ cookie = txx9dmac_assign_cookie(dc, desc);
+
+ dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u %p\n",
+ desc->txd.cookie, desc);
+
+ list_add_tail(&desc->desc_node, &dc->queue);
+ spin_unlock_bh(&dc->lock);
+
+ return cookie;
+}
+
+static struct dma_async_tx_descriptor *
+txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+ struct txx9dmac_dev *ddev = dc->ddev;
+ struct txx9dmac_desc *desc;
+ struct txx9dmac_desc *first;
+ struct txx9dmac_desc *prev;
+ size_t xfer_count;
+ size_t offset;
+
+ dev_vdbg(chan2dev(chan), "prep_dma_memcpy d%#llx s%#llx l%#zx f%#lx\n",
+ (u64)dest, (u64)src, len, flags);
+
+ if (unlikely(!len)) {
+ dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
+ return NULL;
+ }
+
+ prev = first = NULL;
+
+ for (offset = 0; offset < len; offset += xfer_count) {
+ xfer_count = min_t(size_t, len - offset, TXX9_DMA_MAX_COUNT);
+ /*
+ * Workaround for ERT-TX49H2-033, ERT-TX49H3-020,
+ * ERT-TX49H4-016 (slightly conservative)
+ */
+ if (__is_dmac64(ddev)) {
+ if (xfer_count > 0x100 &&
+ (xfer_count & 0xff) >= 0xfa &&
+ (xfer_count & 0xff) <= 0xff)
+ xfer_count -= 0x20;
+ } else {
+ if (xfer_count > 0x80 &&
+ (xfer_count & 0x7f) >= 0x7e &&
+ (xfer_count & 0x7f) <= 0x7f)
+ xfer_count -= 0x20;
+ }
+
+ desc = txx9dmac_desc_get(dc);
+ if (!desc) {
+ txx9dmac_desc_put(dc, first);
+ return NULL;
+ }
+
+ if (__is_dmac64(ddev)) {
+ desc->hwdesc.SAR = src + offset;
+ desc->hwdesc.DAR = dest + offset;
+ desc->hwdesc.CNTR = xfer_count;
+ txx9dmac_desc_set_nosimple(ddev, desc, 8, 8,
+ dc->ccr | TXX9_DMA_CCR_XFACT);
+ } else {
+ desc->hwdesc32.SAR = src + offset;
+ desc->hwdesc32.DAR = dest + offset;
+ desc->hwdesc32.CNTR = xfer_count;
+ txx9dmac_desc_set_nosimple(ddev, desc, 4, 4,
+ dc->ccr | TXX9_DMA_CCR_XFACT);
+ }
+
+ /*
+ * The descriptors on tx_list are not reachable from
+ * the dc->queue list or dc->active_list after a
+ * submit. If we put all descriptors on active_list,
+ * calling of callback on the completion will be more
+ * complex.
+ */
+ if (!first) {
+ first = desc;
+ } else {
+ desc_write_CHAR(dc, prev, desc->txd.phys);
+ dma_sync_single_for_device(chan2parent(&dc->chan),
+ prev->txd.phys, ddev->descsize,
+ DMA_TO_DEVICE);
+ list_add_tail(&desc->desc_node, &first->tx_list);
+ }
+ prev = desc;
+ }
+
+ /* Trigger interrupt after last block */
+ if (flags & DMA_PREP_INTERRUPT)
+ txx9dmac_desc_set_INTENT(ddev, prev);
+
+ desc_write_CHAR(dc, prev, 0);
+ dma_sync_single_for_device(chan2parent(&dc->chan),
+ prev->txd.phys, ddev->descsize,
+ DMA_TO_DEVICE);
+
+ first->txd.flags = flags;
+ first->len = len;
+
+ return &first->txd;
+}
+
+static struct dma_async_tx_descriptor *
+txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_data_direction direction,
+ unsigned long flags)
+{
+ struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+ struct txx9dmac_dev *ddev = dc->ddev;
+ struct txx9dmac_slave *ds = chan->private;
+ struct txx9dmac_desc *prev;
+ struct txx9dmac_desc *first;
+ unsigned int i;
+ struct scatterlist *sg;
+
+ dev_vdbg(chan2dev(chan), "prep_dma_slave\n");
+
+ BUG_ON(!ds || !ds->reg_width);
+ if (ds->tx_reg)
+ BUG_ON(direction != DMA_TO_DEVICE);
+ else
+ BUG_ON(direction != DMA_FROM_DEVICE);
+ if (unlikely(!sg_len))
+ return NULL;
+
+ prev = first = NULL;
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct txx9dmac_desc *desc;
+ dma_addr_t mem;
+ u32 sai, dai;
+
+ desc = txx9dmac_desc_get(dc);
+ if (!desc) {
+ txx9dmac_desc_put(dc, first);
+ return NULL;
+ }
+
+ mem = sg_dma_address(sg);
+
+ if (__is_dmac64(ddev)) {
+ if (direction == DMA_TO_DEVICE) {
+ desc->hwdesc.SAR = mem;
+ desc->hwdesc.DAR = ds->tx_reg;
+ } else {
+ desc->hwdesc.SAR = ds->rx_reg;
+ desc->hwdesc.DAR = mem;
+ }
+ desc->hwdesc.CNTR = sg_dma_len(sg);
+ } else {
+ if (direction == DMA_TO_DEVICE) {
+ desc->hwdesc32.SAR = mem;
+ desc->hwdesc32.DAR = ds->tx_reg;
+ } else {
+ desc->hwdesc32.SAR = ds->rx_reg;
+ desc->hwdesc32.DAR = mem;
+ }
+ desc->hwdesc32.CNTR = sg_dma_len(sg);
+ }
+ if (direction == DMA_TO_DEVICE) {
+ sai = ds->reg_width;
+ dai = 0;
+ } else {
+ sai = 0;
+ dai = ds->reg_width;
+ }
+ txx9dmac_desc_set_nosimple(ddev, desc, sai, dai,
+ dc->ccr | TXX9_DMA_CCR_XFACT);
+
+ if (!first) {
+ first = desc;
+ } else {
+ desc_write_CHAR(dc, prev, desc->txd.phys);
+ dma_sync_single_for_device(chan2parent(&dc->chan),
+ prev->txd.phys,
+ ddev->descsize,
+ DMA_TO_DEVICE);
+ list_add_tail(&desc->desc_node, &first->tx_list);
+ }
+ prev = desc;
+ }
+
+ /* Trigger interrupt after last block */
+ if (flags & DMA_PREP_INTERRUPT)
+ txx9dmac_desc_set_INTENT(ddev, prev);
+
+ desc_write_CHAR(dc, prev, 0);
+ dma_sync_single_for_device(chan2parent(&dc->chan),
+ prev->txd.phys, ddev->descsize,
+ DMA_TO_DEVICE);
+
+ first->txd.flags = flags;
+ first->len = 0;
+
+ return &first->txd;
+}
+
+static void txx9dmac_terminate_all(struct dma_chan *chan)
+{
+ struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+ struct txx9dmac_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ dev_vdbg(chan2dev(chan), "terminate_all\n");
+ spin_lock_bh(&dc->lock);
+
+ txx9dmac_reset_chan(dc);
+
+ /* active_list entries will end up before queued entries */
+ list_splice_init(&dc->queue, &list);
+ list_splice_init(&dc->active_list, &list);
+
+ spin_unlock_bh(&dc->lock);
+
+ /* Flush all pending and queued descriptors */
+ list_for_each_entry_safe(desc, _desc, &list, desc_node)
+ txx9dmac_descriptor_complete(dc, desc);
+}
+
+static enum dma_status
+txx9dmac_is_tx_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+ int ret;
+
+ last_complete = dc->completed;
+ last_used = chan->cookie;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ if (ret != DMA_SUCCESS) {
+ spin_lock_bh(&dc->lock);
+ txx9dmac_scan_descriptors(dc);
+ spin_unlock_bh(&dc->lock);
+
+ last_complete = dc->completed;
+ last_used = chan->cookie;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ }
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return ret;
+}
+
+static void txx9dmac_chain_dynamic(struct txx9dmac_chan *dc,
+ struct txx9dmac_desc *prev)
+{
+ struct txx9dmac_dev *ddev = dc->ddev;
+ struct txx9dmac_desc *desc;
+ LIST_HEAD(list);
+
+ prev = txx9dmac_last_child(prev);
+ txx9dmac_dequeue(dc, &list);
+ desc = list_entry(list.next, struct txx9dmac_desc, desc_node);
+ desc_write_CHAR(dc, prev, desc->txd.phys);
+ dma_sync_single_for_device(chan2parent(&dc->chan),
+ prev->txd.phys, ddev->descsize,
+ DMA_TO_DEVICE);
+ mmiowb();
+ if (!(channel_readl(dc, CSR) & TXX9_DMA_CSR_CHNEN) &&
+ channel_read_CHAR(dc) == prev->txd.phys)
+ /* Restart chain DMA */
+ channel_write_CHAR(dc, desc->txd.phys);
+ list_splice_tail(&list, &dc->active_list);
+}
+
+static void txx9dmac_issue_pending(struct dma_chan *chan)
+{
+ struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+
+ spin_lock_bh(&dc->lock);
+
+ if (!list_empty(&dc->active_list))
+ txx9dmac_scan_descriptors(dc);
+ if (!list_empty(&dc->queue)) {
+ if (list_empty(&dc->active_list)) {
+ txx9dmac_dequeue(dc, &dc->active_list);
+ txx9dmac_dostart(dc, txx9dmac_first_active(dc));
+ } else if (txx9_dma_have_SMPCHN()) {
+ struct txx9dmac_desc *prev = txx9dmac_last_active(dc);
+
+ if (!(prev->txd.flags & DMA_PREP_INTERRUPT) ||
+ txx9dmac_chan_INTENT(dc))
+ txx9dmac_chain_dynamic(dc, prev);
+ }
+ }
+
+ spin_unlock_bh(&dc->lock);
+}
+
+static int txx9dmac_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+ struct txx9dmac_slave *ds = chan->private;
+ struct txx9dmac_desc *desc;
+ int i;
+
+ dev_vdbg(chan2dev(chan), "alloc_chan_resources\n");
+
+ /* ASSERT: channel is idle */
+ if (channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT) {
+ dev_dbg(chan2dev(chan), "DMA channel not idle?\n");
+ return -EIO;
+ }
+
+ dc->completed = chan->cookie = 1;
+
+ dc->ccr = TXX9_DMA_CCR_IMMCHN | TXX9_DMA_CCR_INTENE | CCR_LE;
+ txx9dmac_chan_set_SMPCHN(dc);
+ if (!txx9_dma_have_SMPCHN() || (dc->ccr & TXX9_DMA_CCR_SMPCHN))
+ dc->ccr |= TXX9_DMA_CCR_INTENC;
+ if (chan->device->device_prep_dma_memcpy) {
+ if (ds)
+ return -EINVAL;
+ dc->ccr |= TXX9_DMA_CCR_XFSZ_X8;
+ } else {
+ if (!ds ||
+ (ds->tx_reg && ds->rx_reg) || (!ds->tx_reg && !ds->rx_reg))
+ return -EINVAL;
+ dc->ccr |= TXX9_DMA_CCR_EXTRQ |
+ TXX9_DMA_CCR_XFSZ(__ffs(ds->reg_width));
+ txx9dmac_chan_set_INTENT(dc);
+ }
+
+ spin_lock_bh(&dc->lock);
+ i = dc->descs_allocated;
+ while (dc->descs_allocated < TXX9_DMA_INITIAL_DESC_COUNT) {
+ spin_unlock_bh(&dc->lock);
+
+ desc = txx9dmac_desc_alloc(dc, GFP_KERNEL);
+ if (!desc) {
+ dev_info(chan2dev(chan),
+ "only allocated %d descriptors\n", i);
+ spin_lock_bh(&dc->lock);
+ break;
+ }
+ txx9dmac_desc_put(dc, desc);
+
+ spin_lock_bh(&dc->lock);
+ i = ++dc->descs_allocated;
+ }
+ spin_unlock_bh(&dc->lock);
+
+ dev_dbg(chan2dev(chan),
+ "alloc_chan_resources allocated %d descriptors\n", i);
+
+ return i;
+}
+
+static void txx9dmac_free_chan_resources(struct dma_chan *chan)
+{
+ struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+ struct txx9dmac_dev *ddev = dc->ddev;
+ struct txx9dmac_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ dev_dbg(chan2dev(chan), "free_chan_resources (descs allocated=%u)\n",
+ dc->descs_allocated);
+
+ /* ASSERT: channel is idle */
+ BUG_ON(!list_empty(&dc->active_list));
+ BUG_ON(!list_empty(&dc->queue));
+ BUG_ON(channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT);
+
+ spin_lock_bh(&dc->lock);
+ list_splice_init(&dc->free_list, &list);
+ dc->descs_allocated = 0;
+ spin_unlock_bh(&dc->lock);
+
+ list_for_each_entry_safe(desc, _desc, &list, desc_node) {
+ dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc);
+ dma_unmap_single(chan2parent(chan), desc->txd.phys,
+ ddev->descsize, DMA_TO_DEVICE);
+ kfree(desc);
+ }
+
+ dev_vdbg(chan2dev(chan), "free_chan_resources done\n");
+}
+
+/*----------------------------------------------------------------------*/
+
+static void txx9dmac_off(struct txx9dmac_dev *ddev)
+{
+ dma_writel(ddev, MCR, 0);
+ mmiowb();
+}
+
+static int __init txx9dmac_chan_probe(struct platform_device *pdev)
+{
+ struct txx9dmac_chan_platform_data *cpdata = pdev->dev.platform_data;
+ struct platform_device *dmac_dev = cpdata->dmac_dev;
+ struct txx9dmac_platform_data *pdata = dmac_dev->dev.platform_data;
+ struct txx9dmac_chan *dc;
+ int err;
+ int ch = pdev->id % TXX9_DMA_MAX_NR_CHANNELS;
+ int irq;
+
+ dc = devm_kzalloc(&pdev->dev, sizeof(*dc), GFP_KERNEL);
+ if (!dc)
+ return -ENOMEM;
+
+ dc->dma.dev = &pdev->dev;
+ dc->dma.device_alloc_chan_resources = txx9dmac_alloc_chan_resources;
+ dc->dma.device_free_chan_resources = txx9dmac_free_chan_resources;
+ dc->dma.device_terminate_all = txx9dmac_terminate_all;
+ dc->dma.device_is_tx_complete = txx9dmac_is_tx_complete;
+ dc->dma.device_issue_pending = txx9dmac_issue_pending;
+ if (pdata && pdata->memcpy_chan == ch) {
+ dc->dma.device_prep_dma_memcpy = txx9dmac_prep_dma_memcpy;
+ dma_cap_set(DMA_MEMCPY, dc->dma.cap_mask);
+ } else {
+ dc->dma.device_prep_slave_sg = txx9dmac_prep_slave_sg;
+ dma_cap_set(DMA_SLAVE, dc->dma.cap_mask);
+ dma_cap_set(DMA_PRIVATE, dc->dma.cap_mask);
+ }
+
+ INIT_LIST_HEAD(&dc->dma.channels);
+ dc->ddev = platform_get_drvdata(dmac_dev);
+ if (dc->ddev->irq < 0) {
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+ tasklet_init(&dc->tasklet, txx9dmac_chan_tasklet,
+ (unsigned long)dc);
+ dc->irq = irq;
+ err = devm_request_irq(&pdev->dev, dc->irq,
+ txx9dmac_chan_interrupt, 0, dev_name(&pdev->dev), dc);
+ if (err)
+ return err;
+ } else
+ dc->irq = -1;
+ dc->ddev->chan[ch] = dc;
+ dc->chan.device = &dc->dma;
+ list_add_tail(&dc->chan.device_node, &dc->chan.device->channels);
+ dc->chan.cookie = dc->completed = 1;
+
+ if (is_dmac64(dc))
+ dc->ch_regs = &__txx9dmac_regs(dc->ddev)->CHAN[ch];
+ else
+ dc->ch_regs = &__txx9dmac_regs32(dc->ddev)->CHAN[ch];
+ spin_lock_init(&dc->lock);
+
+ INIT_LIST_HEAD(&dc->active_list);
+ INIT_LIST_HEAD(&dc->queue);
+ INIT_LIST_HEAD(&dc->free_list);
+
+ txx9dmac_reset_chan(dc);
+
+ platform_set_drvdata(pdev, dc);
+
+ err = dma_async_device_register(&dc->dma);
+ if (err)
+ return err;
+ dev_dbg(&pdev->dev, "TXx9 DMA Channel (dma%d%s%s)\n",
+ dc->dma.dev_id,
+ dma_has_cap(DMA_MEMCPY, dc->dma.cap_mask) ? " memcpy" : "",
+ dma_has_cap(DMA_SLAVE, dc->dma.cap_mask) ? " slave" : "");
+
+ return 0;
+}
+
+static int __exit txx9dmac_chan_remove(struct platform_device *pdev)
+{
+ struct txx9dmac_chan *dc = platform_get_drvdata(pdev);
+
+ dma_async_device_unregister(&dc->dma);
+ if (dc->irq >= 0)
+ tasklet_kill(&dc->tasklet);
+ dc->ddev->chan[pdev->id % TXX9_DMA_MAX_NR_CHANNELS] = NULL;
+ return 0;
+}
+
+static int __init txx9dmac_probe(struct platform_device *pdev)
+{
+ struct txx9dmac_platform_data *pdata = pdev->dev.platform_data;
+ struct resource *io;
+ struct txx9dmac_dev *ddev;
+ u32 mcr;
+ int err;
+
+ io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!io)
+ return -EINVAL;
+
+ ddev = devm_kzalloc(&pdev->dev, sizeof(*ddev), GFP_KERNEL);
+ if (!ddev)
+ return -ENOMEM;
+
+ if (!devm_request_mem_region(&pdev->dev, io->start, resource_size(io),
+ dev_name(&pdev->dev)))
+ return -EBUSY;
+
+ ddev->regs = devm_ioremap(&pdev->dev, io->start, resource_size(io));
+ if (!ddev->regs)
+ return -ENOMEM;
+ ddev->have_64bit_regs = pdata->have_64bit_regs;
+ if (__is_dmac64(ddev))
+ ddev->descsize = sizeof(struct txx9dmac_hwdesc);
+ else
+ ddev->descsize = sizeof(struct txx9dmac_hwdesc32);
+
+ /* force dma off, just in case */
+ txx9dmac_off(ddev);
+
+ ddev->irq = platform_get_irq(pdev, 0);
+ if (ddev->irq >= 0) {
+ tasklet_init(&ddev->tasklet, txx9dmac_tasklet,
+ (unsigned long)ddev);
+ err = devm_request_irq(&pdev->dev, ddev->irq,
+ txx9dmac_interrupt, 0, dev_name(&pdev->dev), ddev);
+ if (err)
+ return err;
+ }
+
+ mcr = TXX9_DMA_MCR_MSTEN | MCR_LE;
+ if (pdata && pdata->memcpy_chan >= 0)
+ mcr |= TXX9_DMA_MCR_FIFUM(pdata->memcpy_chan);
+ dma_writel(ddev, MCR, mcr);
+
+ platform_set_drvdata(pdev, ddev);
+ return 0;
+}
+
+static int __exit txx9dmac_remove(struct platform_device *pdev)
+{
+ struct txx9dmac_dev *ddev = platform_get_drvdata(pdev);
+
+ txx9dmac_off(ddev);
+ if (ddev->irq >= 0)
+ tasklet_kill(&ddev->tasklet);
+ return 0;
+}
+
+static void txx9dmac_shutdown(struct platform_device *pdev)
+{
+ struct txx9dmac_dev *ddev = platform_get_drvdata(pdev);
+
+ txx9dmac_off(ddev);
+}
+
+static int txx9dmac_suspend_noirq(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct txx9dmac_dev *ddev = platform_get_drvdata(pdev);
+
+ txx9dmac_off(ddev);
+ return 0;
+}
+
+static int txx9dmac_resume_noirq(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct txx9dmac_dev *ddev = platform_get_drvdata(pdev);
+ struct txx9dmac_platform_data *pdata = pdev->dev.platform_data;
+ u32 mcr;
+
+ mcr = TXX9_DMA_MCR_MSTEN | MCR_LE;
+ if (pdata && pdata->memcpy_chan >= 0)
+ mcr |= TXX9_DMA_MCR_FIFUM(pdata->memcpy_chan);
+ dma_writel(ddev, MCR, mcr);
+ return 0;
+
+}
+
+static const struct dev_pm_ops txx9dmac_dev_pm_ops = {
+ .suspend_noirq = txx9dmac_suspend_noirq,
+ .resume_noirq = txx9dmac_resume_noirq,
+};
+
+static struct platform_driver txx9dmac_chan_driver = {
+ .remove = __exit_p(txx9dmac_chan_remove),
+ .driver = {
+ .name = "txx9dmac-chan",
+ },
+};
+
+static struct platform_driver txx9dmac_driver = {
+ .remove = __exit_p(txx9dmac_remove),
+ .shutdown = txx9dmac_shutdown,
+ .driver = {
+ .name = "txx9dmac",
+ .pm = &txx9dmac_dev_pm_ops,
+ },
+};
+
+static int __init txx9dmac_init(void)
+{
+ int rc;
+
+ rc = platform_driver_probe(&txx9dmac_driver, txx9dmac_probe);
+ if (!rc) {
+ rc = platform_driver_probe(&txx9dmac_chan_driver,
+ txx9dmac_chan_probe);
+ if (rc)
+ platform_driver_unregister(&txx9dmac_driver);
+ }
+ return rc;
+}
+module_init(txx9dmac_init);
+
+static void __exit txx9dmac_exit(void)
+{
+ platform_driver_unregister(&txx9dmac_chan_driver);
+ platform_driver_unregister(&txx9dmac_driver);
+}
+module_exit(txx9dmac_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("TXx9 DMA Controller driver");
+MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>");
diff --git a/drivers/dma/txx9dmac.h b/drivers/dma/txx9dmac.h
new file mode 100644
index 00000000000..365d42366b9
--- /dev/null
+++ b/drivers/dma/txx9dmac.h
@@ -0,0 +1,308 @@
+/*
+ * Driver for the TXx9 SoC DMA Controller
+ *
+ * Copyright (C) 2009 Atsushi Nemoto
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef TXX9DMAC_H
+#define TXX9DMAC_H
+
+#include <linux/dmaengine.h>
+#include <asm/txx9/dmac.h>
+
+/*
+ * Design Notes:
+ *
+ * This DMAC have four channels and one FIFO buffer. Each channel can
+ * be configured for memory-memory or device-memory transfer, but only
+ * one channel can do alignment-free memory-memory transfer at a time
+ * while the channel should occupy the FIFO buffer for effective
+ * transfers.
+ *
+ * Instead of dynamically assign the FIFO buffer to channels, I chose
+ * make one dedicated channel for memory-memory transfer. The
+ * dedicated channel is public. Other channels are private and used
+ * for slave transfer. Some devices in the SoC are wired to certain
+ * DMA channel.
+ */
+
+#ifdef CONFIG_MACH_TX49XX
+static inline bool txx9_dma_have_SMPCHN(void)
+{
+ return true;
+}
+#define TXX9_DMA_USE_SIMPLE_CHAIN
+#else
+static inline bool txx9_dma_have_SMPCHN(void)
+{
+ return false;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN
+#ifdef CONFIG_MACH_TX49XX
+#define CCR_LE TXX9_DMA_CCR_LE
+#define MCR_LE 0
+#else
+#define CCR_LE 0
+#define MCR_LE TXX9_DMA_MCR_LE
+#endif
+#else
+#define CCR_LE 0
+#define MCR_LE 0
+#endif
+
+/*
+ * Redefine this macro to handle differences between 32- and 64-bit
+ * addressing, big vs. little endian, etc.
+ */
+#ifdef __BIG_ENDIAN
+#define TXX9_DMA_REG32(name) u32 __pad_##name; u32 name
+#else
+#define TXX9_DMA_REG32(name) u32 name; u32 __pad_##name
+#endif
+
+/* Hardware register definitions. */
+struct txx9dmac_cregs {
+#if defined(CONFIG_32BIT) && !defined(CONFIG_64BIT_PHYS_ADDR)
+ TXX9_DMA_REG32(CHAR); /* Chain Address Register */
+#else
+ u64 CHAR; /* Chain Address Register */
+#endif
+ u64 SAR; /* Source Address Register */
+ u64 DAR; /* Destination Address Register */
+ TXX9_DMA_REG32(CNTR); /* Count Register */
+ TXX9_DMA_REG32(SAIR); /* Source Address Increment Register */
+ TXX9_DMA_REG32(DAIR); /* Destination Address Increment Register */
+ TXX9_DMA_REG32(CCR); /* Channel Control Register */
+ TXX9_DMA_REG32(CSR); /* Channel Status Register */
+};
+struct txx9dmac_cregs32 {
+ u32 CHAR;
+ u32 SAR;
+ u32 DAR;
+ u32 CNTR;
+ u32 SAIR;
+ u32 DAIR;
+ u32 CCR;
+ u32 CSR;
+};
+
+struct txx9dmac_regs {
+ /* per-channel registers */
+ struct txx9dmac_cregs CHAN[TXX9_DMA_MAX_NR_CHANNELS];
+ u64 __pad[9];
+ u64 MFDR; /* Memory Fill Data Register */
+ TXX9_DMA_REG32(MCR); /* Master Control Register */
+};
+struct txx9dmac_regs32 {
+ struct txx9dmac_cregs32 CHAN[TXX9_DMA_MAX_NR_CHANNELS];
+ u32 __pad[9];
+ u32 MFDR;
+ u32 MCR;
+};
+
+/* bits for MCR */
+#define TXX9_DMA_MCR_EIS(ch) (0x10000000<<(ch))
+#define TXX9_DMA_MCR_DIS(ch) (0x01000000<<(ch))
+#define TXX9_DMA_MCR_RSFIF 0x00000080
+#define TXX9_DMA_MCR_FIFUM(ch) (0x00000008<<(ch))
+#define TXX9_DMA_MCR_LE 0x00000004
+#define TXX9_DMA_MCR_RPRT 0x00000002
+#define TXX9_DMA_MCR_MSTEN 0x00000001
+
+/* bits for CCRn */
+#define TXX9_DMA_CCR_IMMCHN 0x20000000
+#define TXX9_DMA_CCR_USEXFSZ 0x10000000
+#define TXX9_DMA_CCR_LE 0x08000000
+#define TXX9_DMA_CCR_DBINH 0x04000000
+#define TXX9_DMA_CCR_SBINH 0x02000000
+#define TXX9_DMA_CCR_CHRST 0x01000000
+#define TXX9_DMA_CCR_RVBYTE 0x00800000
+#define TXX9_DMA_CCR_ACKPOL 0x00400000
+#define TXX9_DMA_CCR_REQPL 0x00200000
+#define TXX9_DMA_CCR_EGREQ 0x00100000
+#define TXX9_DMA_CCR_CHDN 0x00080000
+#define TXX9_DMA_CCR_DNCTL 0x00060000
+#define TXX9_DMA_CCR_EXTRQ 0x00010000
+#define TXX9_DMA_CCR_INTRQD 0x0000e000
+#define TXX9_DMA_CCR_INTENE 0x00001000
+#define TXX9_DMA_CCR_INTENC 0x00000800
+#define TXX9_DMA_CCR_INTENT 0x00000400
+#define TXX9_DMA_CCR_CHNEN 0x00000200
+#define TXX9_DMA_CCR_XFACT 0x00000100
+#define TXX9_DMA_CCR_SMPCHN 0x00000020
+#define TXX9_DMA_CCR_XFSZ(order) (((order) << 2) & 0x0000001c)
+#define TXX9_DMA_CCR_XFSZ_1 TXX9_DMA_CCR_XFSZ(0)
+#define TXX9_DMA_CCR_XFSZ_2 TXX9_DMA_CCR_XFSZ(1)
+#define TXX9_DMA_CCR_XFSZ_4 TXX9_DMA_CCR_XFSZ(2)
+#define TXX9_DMA_CCR_XFSZ_8 TXX9_DMA_CCR_XFSZ(3)
+#define TXX9_DMA_CCR_XFSZ_X4 TXX9_DMA_CCR_XFSZ(4)
+#define TXX9_DMA_CCR_XFSZ_X8 TXX9_DMA_CCR_XFSZ(5)
+#define TXX9_DMA_CCR_XFSZ_X16 TXX9_DMA_CCR_XFSZ(6)
+#define TXX9_DMA_CCR_XFSZ_X32 TXX9_DMA_CCR_XFSZ(7)
+#define TXX9_DMA_CCR_MEMIO 0x00000002
+#define TXX9_DMA_CCR_SNGAD 0x00000001
+
+/* bits for CSRn */
+#define TXX9_DMA_CSR_CHNEN 0x00000400
+#define TXX9_DMA_CSR_STLXFER 0x00000200
+#define TXX9_DMA_CSR_XFACT 0x00000100
+#define TXX9_DMA_CSR_ABCHC 0x00000080
+#define TXX9_DMA_CSR_NCHNC 0x00000040
+#define TXX9_DMA_CSR_NTRNFC 0x00000020
+#define TXX9_DMA_CSR_EXTDN 0x00000010
+#define TXX9_DMA_CSR_CFERR 0x00000008
+#define TXX9_DMA_CSR_CHERR 0x00000004
+#define TXX9_DMA_CSR_DESERR 0x00000002
+#define TXX9_DMA_CSR_SORERR 0x00000001
+
+struct txx9dmac_chan {
+ struct dma_chan chan;
+ struct dma_device dma;
+ struct txx9dmac_dev *ddev;
+ void __iomem *ch_regs;
+ struct tasklet_struct tasklet;
+ int irq;
+ u32 ccr;
+
+ spinlock_t lock;
+
+ /* these other elements are all protected by lock */
+ dma_cookie_t completed;
+ struct list_head active_list;
+ struct list_head queue;
+ struct list_head free_list;
+
+ unsigned int descs_allocated;
+};
+
+struct txx9dmac_dev {
+ void __iomem *regs;
+ struct tasklet_struct tasklet;
+ int irq;
+ struct txx9dmac_chan *chan[TXX9_DMA_MAX_NR_CHANNELS];
+ bool have_64bit_regs;
+ unsigned int descsize;
+};
+
+static inline bool __is_dmac64(const struct txx9dmac_dev *ddev)
+{
+ return ddev->have_64bit_regs;
+}
+
+static inline bool is_dmac64(const struct txx9dmac_chan *dc)
+{
+ return __is_dmac64(dc->ddev);
+}
+
+#ifdef TXX9_DMA_USE_SIMPLE_CHAIN
+/* Hardware descriptor definition. (for simple-chain) */
+struct txx9dmac_hwdesc {
+#if defined(CONFIG_32BIT) && !defined(CONFIG_64BIT_PHYS_ADDR)
+ TXX9_DMA_REG32(CHAR);
+#else
+ u64 CHAR;
+#endif
+ u64 SAR;
+ u64 DAR;
+ TXX9_DMA_REG32(CNTR);
+};
+struct txx9dmac_hwdesc32 {
+ u32 CHAR;
+ u32 SAR;
+ u32 DAR;
+ u32 CNTR;
+};
+#else
+#define txx9dmac_hwdesc txx9dmac_cregs
+#define txx9dmac_hwdesc32 txx9dmac_cregs32
+#endif
+
+struct txx9dmac_desc {
+ /* FIRST values the hardware uses */
+ union {
+ struct txx9dmac_hwdesc hwdesc;
+ struct txx9dmac_hwdesc32 hwdesc32;
+ };
+
+ /* THEN values for driver housekeeping */
+ struct list_head desc_node ____cacheline_aligned;
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor txd;
+ size_t len;
+};
+
+#ifdef TXX9_DMA_USE_SIMPLE_CHAIN
+
+static inline bool txx9dmac_chan_INTENT(struct txx9dmac_chan *dc)
+{
+ return (dc->ccr & TXX9_DMA_CCR_INTENT) != 0;
+}
+
+static inline void txx9dmac_chan_set_INTENT(struct txx9dmac_chan *dc)
+{
+ dc->ccr |= TXX9_DMA_CCR_INTENT;
+}
+
+static inline void txx9dmac_desc_set_INTENT(struct txx9dmac_dev *ddev,
+ struct txx9dmac_desc *desc)
+{
+}
+
+static inline void txx9dmac_chan_set_SMPCHN(struct txx9dmac_chan *dc)
+{
+ dc->ccr |= TXX9_DMA_CCR_SMPCHN;
+}
+
+static inline void txx9dmac_desc_set_nosimple(struct txx9dmac_dev *ddev,
+ struct txx9dmac_desc *desc,
+ u32 sair, u32 dair, u32 ccr)
+{
+}
+
+#else /* TXX9_DMA_USE_SIMPLE_CHAIN */
+
+static inline bool txx9dmac_chan_INTENT(struct txx9dmac_chan *dc)
+{
+ return true;
+}
+
+static void txx9dmac_chan_set_INTENT(struct txx9dmac_chan *dc)
+{
+}
+
+static inline void txx9dmac_desc_set_INTENT(struct txx9dmac_dev *ddev,
+ struct txx9dmac_desc *desc)
+{
+ if (__is_dmac64(ddev))
+ desc->hwdesc.CCR |= TXX9_DMA_CCR_INTENT;
+ else
+ desc->hwdesc32.CCR |= TXX9_DMA_CCR_INTENT;
+}
+
+static inline void txx9dmac_chan_set_SMPCHN(struct txx9dmac_chan *dc)
+{
+}
+
+static inline void txx9dmac_desc_set_nosimple(struct txx9dmac_dev *ddev,
+ struct txx9dmac_desc *desc,
+ u32 sai, u32 dai, u32 ccr)
+{
+ if (__is_dmac64(ddev)) {
+ desc->hwdesc.SAIR = sai;
+ desc->hwdesc.DAIR = dai;
+ desc->hwdesc.CCR = ccr;
+ } else {
+ desc->hwdesc32.SAIR = sai;
+ desc->hwdesc32.DAIR = dai;
+ desc->hwdesc32.CCR = ccr;
+ }
+}
+
+#endif /* TXX9_DMA_USE_SIMPLE_CHAIN */
+
+#endif /* TXX9DMAC_H */