164b3fb48e8d92ce1be398088f70bff7e9051a2c
[openwrt/openwrt.git] / target / linux / ipq40xx / patches-4.14 / 040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch
1 From 6b4faeac05bc0b91616b921191cb054d1376f3b4 Mon Sep 17 00:00:00 2001
2 From: Sricharan R <sricharan@codeaurora.org>
3 Date: Mon, 28 Aug 2017 20:30:24 +0530
4 Subject: [PATCH] dmaengine: qcom-bam: Process multiple pending descriptors
5
6 The bam dmaengine has a circular FIFO to which we
7 add hw descriptors that describes the transaction.
8 The FIFO has space for about 4096 hw descriptors.
9
10 Currently we add one descriptor and wait for it to
11 complete with interrupt and then add the next pending
12 descriptor. In this way, the FIFO is underutilized
13 since only one descriptor is processed at a time, although
14 there is space in FIFO for the BAM to process more.
15
16 Instead keep adding descriptors to FIFO till its full,
17 that allows BAM to continue to work on the next descriptor
18 immediately after signalling completion interrupt for the
19 previous descriptor.
20
21 Also when the client has not set the DMA_PREP_INTERRUPT for
22 a descriptor, then do not configure BAM to trigger a interrupt
23 upon completion of that descriptor. This way we get a interrupt
24 only for the descriptor for which DMA_PREP_INTERRUPT was
25 requested and there signal completion of all the previous completed
26 descriptors. So we still do callbacks for all requested descriptors,
27 but just that the number of interrupts are reduced.
28
29 CURRENT:
30
31 ------ ------- ---------------
32 |DES 0| |DESC 1| |DESC 2 + INT |
33 ------ ------- ---------------
34 | | |
35 | | |
36 INTERRUPT: (INT) (INT) (INT)
37 CALLBACK: (CB) (CB) (CB)
38
39 MTD_SPEEDTEST READ PAGE: 3560 KiB/s
40 MTD_SPEEDTEST WRITE PAGE: 2664 KiB/s
41 IOZONE READ: 2456 KB/s
42 IOZONE WRITE: 1230 KB/s
43
44 bam dma interrupts (after tests): 96508
45
46 CHANGE:
47
48 ------ ------- -------------
49 |DES 0| |DESC 1 |DESC 2 + INT |
50 ------ ------- --------------
51 |
52 |
53 (INT)
54 (CB for 0, 1, 2)
55
56 MTD_SPEEDTEST READ PAGE: 3860 KiB/s
57 MTD_SPEEDTEST WRITE PAGE: 2837 KiB/s
58 IOZONE READ: 2677 KB/s
59 IOZONE WRITE: 1308 KB/s
60
61 bam dma interrupts (after tests): 58806
62
63 Signed-off-by: Sricharan R <sricharan@codeaurora.org>
64 Reviewed-by: Andy Gross <andy.gross@linaro.org>
65 Tested-by: Abhishek Sahu <absahu@codeaurora.org>
66 Signed-off-by: Vinod Koul <vinod.koul@intel.com>
67 ---
68 drivers/dma/qcom/bam_dma.c | 169 +++++++++++++++++++++++++++++----------------
69 1 file changed, 109 insertions(+), 60 deletions(-)
70
71 --- a/drivers/dma/qcom/bam_dma.c
72 +++ b/drivers/dma/qcom/bam_dma.c
73 @@ -46,6 +46,7 @@
74 #include <linux/of_address.h>
75 #include <linux/of_irq.h>
76 #include <linux/of_dma.h>
77 +#include <linux/circ_buf.h>
78 #include <linux/clk.h>
79 #include <linux/dmaengine.h>
80 #include <linux/pm_runtime.h>
81 @@ -78,6 +79,8 @@ struct bam_async_desc {
82
83 struct bam_desc_hw *curr_desc;
84
85 + /* list node for the desc in the bam_chan list of descriptors */
86 + struct list_head desc_node;
87 enum dma_transfer_direction dir;
88 size_t length;
89 struct bam_desc_hw desc[0];
90 @@ -347,6 +350,8 @@ static const struct reg_offset_data bam_
91 #define BAM_DESC_FIFO_SIZE SZ_32K
92 #define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1)
93 #define BAM_FIFO_SIZE (SZ_32K - 8)
94 +#define IS_BUSY(chan) (CIRC_SPACE(bchan->tail, bchan->head,\
95 + MAX_DESCRIPTORS + 1) == 0)
96
97 struct bam_chan {
98 struct virt_dma_chan vc;
99 @@ -356,8 +361,6 @@ struct bam_chan {
100 /* configuration from device tree */
101 u32 id;
102
103 - struct bam_async_desc *curr_txd; /* current running dma */
104 -
105 /* runtime configuration */
106 struct dma_slave_config slave;
107
108 @@ -372,6 +375,8 @@ struct bam_chan {
109 unsigned int initialized; /* is the channel hw initialized? */
110 unsigned int paused; /* is the channel paused? */
111 unsigned int reconfigure; /* new slave config? */
112 + /* list of descriptors currently processed */
113 + struct list_head desc_list;
114
115 struct list_head node;
116 };
117 @@ -540,7 +545,7 @@ static void bam_free_chan(struct dma_cha
118
119 vchan_free_chan_resources(to_virt_chan(chan));
120
121 - if (bchan->curr_txd) {
122 + if (!list_empty(&bchan->desc_list)) {
123 dev_err(bchan->bdev->dev, "Cannot free busy channel\n");
124 goto err;
125 }
126 @@ -633,8 +638,6 @@ static struct dma_async_tx_descriptor *b
127
128 if (flags & DMA_PREP_INTERRUPT)
129 async_desc->flags |= DESC_FLAG_EOT;
130 - else
131 - async_desc->flags |= DESC_FLAG_INT;
132
133 async_desc->num_desc = num_alloc;
134 async_desc->curr_desc = async_desc->desc;
135 @@ -685,29 +688,16 @@ err_out:
136 static int bam_dma_terminate_all(struct dma_chan *chan)
137 {
138 struct bam_chan *bchan = to_bam_chan(chan);
139 + struct bam_async_desc *async_desc, *tmp;
140 unsigned long flag;
141 LIST_HEAD(head);
142
143 /* remove all transactions, including active transaction */
144 spin_lock_irqsave(&bchan->vc.lock, flag);
145 - /*
146 - * If we have transactions queued, then some might be committed to the
147 - * hardware in the desc fifo. The only way to reset the desc fifo is
148 - * to do a hardware reset (either by pipe or the entire block).
149 - * bam_chan_init_hw() will trigger a pipe reset, and also reinit the
150 - * pipe. If the pipe is left disabled (default state after pipe reset)
151 - * and is accessed by a connected hardware engine, a fatal error in
152 - * the BAM will occur. There is a small window where this could happen
153 - * with bam_chan_init_hw(), but it is assumed that the caller has
154 - * stopped activity on any attached hardware engine. Make sure to do
155 - * this first so that the BAM hardware doesn't cause memory corruption
156 - * by accessing freed resources.
157 - */
158 - if (bchan->curr_txd) {
159 - bam_chan_init_hw(bchan, bchan->curr_txd->dir);
160 - list_add(&bchan->curr_txd->vd.node, &bchan->vc.desc_issued);
161 - bchan->curr_txd = NULL;
162 - }
163 + list_for_each_entry_safe(async_desc, tmp,
164 + &bchan->desc_list, desc_node) {
165 + list_add(&async_desc->vd.node, &bchan->vc.desc_issued);
166 + list_del(&async_desc->desc_node);
167
168 vchan_get_all_descriptors(&bchan->vc, &head);
169 spin_unlock_irqrestore(&bchan->vc.lock, flag);
170 @@ -778,9 +768,9 @@ static int bam_resume(struct dma_chan *c
171 */
172 static u32 process_channel_irqs(struct bam_device *bdev)
173 {
174 - u32 i, srcs, pipe_stts;
175 + u32 i, srcs, pipe_stts, offset, avail;
176 unsigned long flags;
177 - struct bam_async_desc *async_desc;
178 + struct bam_async_desc *async_desc, *tmp;
179
180 srcs = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_EE));
181
182 @@ -800,27 +790,40 @@ static u32 process_channel_irqs(struct b
183 writel_relaxed(pipe_stts, bam_addr(bdev, i, BAM_P_IRQ_CLR));
184
185 spin_lock_irqsave(&bchan->vc.lock, flags);
186 - async_desc = bchan->curr_txd;
187
188 - if (async_desc) {
189 - async_desc->num_desc -= async_desc->xfer_len;
190 - async_desc->curr_desc += async_desc->xfer_len;
191 - bchan->curr_txd = NULL;
192 + offset = readl_relaxed(bam_addr(bdev, i, BAM_P_SW_OFSTS)) &
193 + P_SW_OFSTS_MASK;
194 + offset /= sizeof(struct bam_desc_hw);
195 +
196 + /* Number of bytes available to read */
197 + avail = CIRC_CNT(offset, bchan->head, MAX_DESCRIPTORS + 1);
198 +
199 + list_for_each_entry_safe(async_desc, tmp,
200 + &bchan->desc_list, desc_node) {
201 + /* Not enough data to read */
202 + if (avail < async_desc->xfer_len)
203 + break;
204
205 /* manage FIFO */
206 bchan->head += async_desc->xfer_len;
207 bchan->head %= MAX_DESCRIPTORS;
208
209 + async_desc->num_desc -= async_desc->xfer_len;
210 + async_desc->curr_desc += async_desc->xfer_len;
211 + avail -= async_desc->xfer_len;
212 +
213 /*
214 - * if complete, process cookie. Otherwise
215 + * if complete, process cookie. Otherwise
216 * push back to front of desc_issued so that
217 * it gets restarted by the tasklet
218 */
219 - if (!async_desc->num_desc)
220 + if (!async_desc->num_desc) {
221 vchan_cookie_complete(&async_desc->vd);
222 - else
223 + } else {
224 list_add(&async_desc->vd.node,
225 - &bchan->vc.desc_issued);
226 + &bchan->vc.desc_issued);
227 + }
228 + list_del(&async_desc->desc_node);
229 }
230
231 spin_unlock_irqrestore(&bchan->vc.lock, flags);
232 @@ -882,6 +885,7 @@ static enum dma_status bam_tx_status(str
233 struct dma_tx_state *txstate)
234 {
235 struct bam_chan *bchan = to_bam_chan(chan);
236 + struct bam_async_desc *async_desc;
237 struct virt_dma_desc *vd;
238 int ret;
239 size_t residue = 0;
240 @@ -897,11 +901,17 @@ static enum dma_status bam_tx_status(str
241
242 spin_lock_irqsave(&bchan->vc.lock, flags);
243 vd = vchan_find_desc(&bchan->vc, cookie);
244 - if (vd)
245 + if (vd) {
246 residue = container_of(vd, struct bam_async_desc, vd)->length;
247 - else if (bchan->curr_txd && bchan->curr_txd->vd.tx.cookie == cookie)
248 - for (i = 0; i < bchan->curr_txd->num_desc; i++)
249 - residue += bchan->curr_txd->curr_desc[i].size;
250 + } else {
251 + list_for_each_entry(async_desc, &bchan->desc_list, desc_node) {
252 + if (async_desc->vd.tx.cookie != cookie)
253 + continue;
254 +
255 + for (i = 0; i < async_desc->num_desc; i++)
256 + residue += async_desc->curr_desc[i].size;
257 + }
258 + }
259
260 spin_unlock_irqrestore(&bchan->vc.lock, flags);
261
262 @@ -942,63 +952,86 @@ static void bam_start_dma(struct bam_cha
263 {
264 struct virt_dma_desc *vd = vchan_next_desc(&bchan->vc);
265 struct bam_device *bdev = bchan->bdev;
266 - struct bam_async_desc *async_desc;
267 + struct bam_async_desc *async_desc = NULL;
268 struct bam_desc_hw *desc;
269 struct bam_desc_hw *fifo = PTR_ALIGN(bchan->fifo_virt,
270 sizeof(struct bam_desc_hw));
271 int ret;
272 + unsigned int avail;
273 + struct dmaengine_desc_callback cb;
274
275 lockdep_assert_held(&bchan->vc.lock);
276
277 if (!vd)
278 return;
279
280 - list_del(&vd->node);
281 -
282 - async_desc = container_of(vd, struct bam_async_desc, vd);
283 - bchan->curr_txd = async_desc;
284 -
285 ret = pm_runtime_get_sync(bdev->dev);
286 if (ret < 0)
287 return;
288
289 - /* on first use, initialize the channel hardware */
290 - if (!bchan->initialized)
291 - bam_chan_init_hw(bchan, async_desc->dir);
292 -
293 - /* apply new slave config changes, if necessary */
294 - if (bchan->reconfigure)
295 - bam_apply_new_config(bchan, async_desc->dir);
296 + while (vd && !IS_BUSY(bchan)) {
297 + list_del(&vd->node);
298
299 - desc = bchan->curr_txd->curr_desc;
300 + async_desc = container_of(vd, struct bam_async_desc, vd);
301
302 - if (async_desc->num_desc > MAX_DESCRIPTORS)
303 - async_desc->xfer_len = MAX_DESCRIPTORS;
304 - else
305 - async_desc->xfer_len = async_desc->num_desc;
306 + /* on first use, initialize the channel hardware */
307 + if (!bchan->initialized)
308 + bam_chan_init_hw(bchan, async_desc->dir);
309
310 - /* set any special flags on the last descriptor */
311 - if (async_desc->num_desc == async_desc->xfer_len)
312 - desc[async_desc->xfer_len - 1].flags |=
313 - cpu_to_le16(async_desc->flags);
314 - else
315 - desc[async_desc->xfer_len - 1].flags |=
316 - cpu_to_le16(DESC_FLAG_INT);
317 + /* apply new slave config changes, if necessary */
318 + if (bchan->reconfigure)
319 + bam_apply_new_config(bchan, async_desc->dir);
320 +
321 + desc = async_desc->curr_desc;
322 + avail = CIRC_SPACE(bchan->tail, bchan->head,
323 + MAX_DESCRIPTORS + 1);
324 +
325 + if (async_desc->num_desc > avail)
326 + async_desc->xfer_len = avail;
327 + else
328 + async_desc->xfer_len = async_desc->num_desc;
329
330 - if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
331 - u32 partial = MAX_DESCRIPTORS - bchan->tail;
332 + /* set any special flags on the last descriptor */
333 + if (async_desc->num_desc == async_desc->xfer_len)
334 + desc[async_desc->xfer_len - 1].flags |=
335 + cpu_to_le16(async_desc->flags);
336
337 - memcpy(&fifo[bchan->tail], desc,
338 - partial * sizeof(struct bam_desc_hw));
339 - memcpy(fifo, &desc[partial], (async_desc->xfer_len - partial) *
340 + vd = vchan_next_desc(&bchan->vc);
341 +
342 + dmaengine_desc_get_callback(&async_desc->vd.tx, &cb);
343 +
344 + /*
345 + * An interrupt is generated at this desc, if
346 + * - FIFO is FULL.
347 + * - No more descriptors to add.
348 + * - If a callback completion was requested for this DESC,
349 + * In this case, BAM will deliver the completion callback
350 + * for this desc and continue processing the next desc.
351 + */
352 + if (((avail <= async_desc->xfer_len) || !vd ||
353 + dmaengine_desc_callback_valid(&cb)) &&
354 + !(async_desc->flags & DESC_FLAG_EOT))
355 + desc[async_desc->xfer_len - 1].flags |=
356 + cpu_to_le16(DESC_FLAG_INT);
357 +
358 + if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
359 + u32 partial = MAX_DESCRIPTORS - bchan->tail;
360 +
361 + memcpy(&fifo[bchan->tail], desc,
362 + partial * sizeof(struct bam_desc_hw));
363 + memcpy(fifo, &desc[partial],
364 + (async_desc->xfer_len - partial) *
365 sizeof(struct bam_desc_hw));
366 - } else {
367 - memcpy(&fifo[bchan->tail], desc,
368 - async_desc->xfer_len * sizeof(struct bam_desc_hw));
369 - }
370 + } else {
371 + memcpy(&fifo[bchan->tail], desc,
372 + async_desc->xfer_len *
373 + sizeof(struct bam_desc_hw));
374 + }
375
376 - bchan->tail += async_desc->xfer_len;
377 - bchan->tail %= MAX_DESCRIPTORS;
378 + bchan->tail += async_desc->xfer_len;
379 + bchan->tail %= MAX_DESCRIPTORS;
380 + list_add_tail(&async_desc->desc_node, &bchan->desc_list);
381 + }
382
383 /* ensure descriptor writes and dma start not reordered */
384 wmb();
385 @@ -1027,7 +1060,7 @@ static void dma_tasklet(unsigned long da
386 bchan = &bdev->channels[i];
387 spin_lock_irqsave(&bchan->vc.lock, flags);
388
389 - if (!list_empty(&bchan->vc.desc_issued) && !bchan->curr_txd)
390 + if (!list_empty(&bchan->vc.desc_issued) && !IS_BUSY(bchan))
391 bam_start_dma(bchan);
392 spin_unlock_irqrestore(&bchan->vc.lock, flags);
393 }
394 @@ -1048,7 +1081,7 @@ static void bam_issue_pending(struct dma
395 spin_lock_irqsave(&bchan->vc.lock, flags);
396
397 /* if work pending and idle, start a transaction */
398 - if (vchan_issue_pending(&bchan->vc) && !bchan->curr_txd)
399 + if (vchan_issue_pending(&bchan->vc) && !IS_BUSY(bchan))
400 bam_start_dma(bchan);
401
402 spin_unlock_irqrestore(&bchan->vc.lock, flags);
403 @@ -1152,6 +1185,7 @@ static void bam_channel_init(struct bam_
404
405 vchan_init(&bchan->vc, &bdev->common);
406 bchan->vc.desc_free = bam_dma_free_desc;
407 + INIT_LIST_HEAD(&bchan->desc_list);
408 }
409
410 static const struct of_device_id bam_of_match[] = {