1 From: Felix Fietkau <nbd@openwrt.org>
2 Subject: [PATCH 2/2] MIPS: partially inline dma ops
4 Several DMA ops are no-op on many platforms, and the indirection through
5 the mips_dma_map_ops function table is causing the compiler to emit
8 Inlining visibly improves network performance in my tests (on a 24Kc
9 based system), and also slightly reduces code size of a few drivers.
11 Signed-off-by: Felix Fietkau <nbd@openwrt.org>
13 --- a/arch/mips/Kconfig
14 +++ b/arch/mips/Kconfig
15 @@ -1426,6 +1426,7 @@ config CPU_CAVIUM_OCTEON
18 select USB_EHCI_BIG_ENDIAN_MMIO
19 + select SYS_HAS_DMA_OPS
21 The Cavium Octeon processor is a highly integrated chip containing
22 many ethernet hardware widgets for networking tasks. The processor
23 @@ -1646,6 +1647,9 @@ config SYS_HAS_CPU_XLR
24 config SYS_HAS_CPU_XLP
27 +config SYS_HAS_DMA_OPS
31 # CPU may reorder R->R, R->W, W->R, W->W
32 # Reordering beyond LL and SC is handled in WEAK_REORDERING_BEYOND_LLSC
33 --- a/arch/mips/include/asm/dma-mapping.h
34 +++ b/arch/mips/include/asm/dma-mapping.h
36 #ifndef _ASM_DMA_MAPPING_H
37 #define _ASM_DMA_MAPPING_H
39 +#include <linux/kmemcheck.h>
40 +#include <linux/bug.h>
41 +#include <linux/scatterlist.h>
42 +#include <linux/dma-debug.h>
43 +#include <linux/dma-attrs.h>
45 #include <asm/scatterlist.h>
46 #include <asm/dma-coherence.h>
47 #include <asm/cache.h>
49 #include <dma-coherence.h>
52 -extern struct dma_map_ops *mips_dma_map_ops;
53 +void __dma_sync(struct page *page, unsigned long offset, size_t size,
54 + enum dma_data_direction direction);
55 +void *mips_dma_alloc_coherent(struct device *dev, size_t size,
56 + dma_addr_t *dma_handle, gfp_t gfp,
57 + struct dma_attrs *attrs);
58 +void mips_dma_free_coherent(struct device *dev, size_t size, void *vaddr,
59 + dma_addr_t dma_handle, struct dma_attrs *attrs);
61 static inline struct dma_map_ops *get_dma_ops(struct device *dev)
63 +#ifdef CONFIG_SYS_HAS_DMA_OPS
64 if (dev && dev->archdata.dma_ops)
65 return dev->archdata.dma_ops;
67 return mips_dma_map_ops;
74 + * Warning on the terminology - Linux calls an uncached area coherent;
75 + * MIPS terminology calls memory areas with hardware maintained coherency
79 +static inline int cpu_is_noncoherent_r10000(struct device *dev)
81 +#ifndef CONFIG_SYS_HAS_CPU_R10000
84 + return !plat_device_is_coherent(dev) &&
85 + (current_cpu_type() == CPU_R10000 ||
86 + current_cpu_type() == CPU_R12000);
89 +static inline struct page *dma_addr_to_page(struct device *dev,
90 + dma_addr_t dma_addr)
93 + plat_dma_addr_to_phys(dev, dma_addr) >> PAGE_SHIFT);
96 static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
97 @@ -30,12 +69,309 @@ static inline bool dma_capable(struct de
99 static inline void dma_mark_clean(void *addr, size_t size) {}
101 -#include <asm-generic/dma-mapping-common.h>
102 +static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
104 + enum dma_data_direction dir,
105 + struct dma_attrs *attrs)
107 + struct dma_map_ops *ops = get_dma_ops(dev);
108 + unsigned long offset = (unsigned long)ptr & ~PAGE_MASK;
109 + struct page *page = virt_to_page(ptr);
112 + kmemcheck_mark_initialized(ptr, size);
113 + BUG_ON(!valid_dma_direction(dir));
115 + addr = ops->map_page(dev, page, offset, size, dir, attrs);
117 + if (!plat_device_is_coherent(dev))
118 + __dma_sync(page, offset, size, dir);
120 + addr = plat_map_dma_mem_page(dev, page) + offset;
122 + debug_dma_map_page(dev, page, offset, size, dir, addr, true);
126 +static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
128 + enum dma_data_direction dir,
129 + struct dma_attrs *attrs)
131 + struct dma_map_ops *ops = get_dma_ops(dev);
133 + BUG_ON(!valid_dma_direction(dir));
135 + ops->unmap_page(dev, addr, size, dir, attrs);
137 + if (cpu_is_noncoherent_r10000(dev))
138 + __dma_sync(dma_addr_to_page(dev, addr),
139 + addr & ~PAGE_MASK, size, dir);
141 + plat_unmap_dma_mem(dev, addr, size, dir);
143 + debug_dma_unmap_page(dev, addr, size, dir, true);
146 +static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
147 + int nents, enum dma_data_direction dir,
148 + struct dma_attrs *attrs)
150 + struct dma_map_ops *ops = get_dma_ops(dev);
152 + struct scatterlist *s;
154 + for_each_sg(sg, s, nents, i)
155 + kmemcheck_mark_initialized(sg_virt(s), s->length);
156 + BUG_ON(!valid_dma_direction(dir));
158 + ents = ops->map_sg(dev, sg, nents, dir, attrs);
160 + for_each_sg(sg, s, nents, i) {
161 + struct page *page = sg_page(s);
163 + if (!plat_device_is_coherent(dev))
164 + __dma_sync(page, s->offset, s->length, dir);
166 + plat_map_dma_mem_page(dev, page) + s->offset;
170 + debug_dma_map_sg(dev, sg, nents, ents, dir);
175 +static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
176 + int nents, enum dma_data_direction dir,
177 + struct dma_attrs *attrs)
179 + struct dma_map_ops *ops = get_dma_ops(dev);
180 + struct scatterlist *s;
183 + BUG_ON(!valid_dma_direction(dir));
184 + debug_dma_unmap_sg(dev, sg, nents, dir);
186 + ops->unmap_sg(dev, sg, nents, dir, attrs);
190 + for_each_sg(sg, s, nents, i) {
191 + if (!plat_device_is_coherent(dev) && dir != DMA_TO_DEVICE)
192 + __dma_sync(sg_page(s), s->offset, s->length, dir);
193 + plat_unmap_dma_mem(dev, s->dma_address, s->length, dir);
197 +static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
198 + size_t offset, size_t size,
199 + enum dma_data_direction dir)
201 + struct dma_map_ops *ops = get_dma_ops(dev);
204 + kmemcheck_mark_initialized(page_address(page) + offset, size);
205 + BUG_ON(!valid_dma_direction(dir));
207 + addr = ops->map_page(dev, page, offset, size, dir, NULL);
209 + if (!plat_device_is_coherent(dev))
210 + __dma_sync(page, offset, size, dir);
212 + addr = plat_map_dma_mem_page(dev, page) + offset;
214 + debug_dma_map_page(dev, page, offset, size, dir, addr, false);
219 +static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
220 + size_t size, enum dma_data_direction dir)
222 + struct dma_map_ops *ops = get_dma_ops(dev);
224 + BUG_ON(!valid_dma_direction(dir));
226 + ops->unmap_page(dev, addr, size, dir, NULL);
228 + if (cpu_is_noncoherent_r10000(dev))
229 + __dma_sync(dma_addr_to_page(dev, addr),
230 + addr & ~PAGE_MASK, size, dir);
232 + plat_unmap_dma_mem(dev, addr, size, dir);
234 + debug_dma_unmap_page(dev, addr, size, dir, false);
237 +static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
239 + enum dma_data_direction dir)
241 + struct dma_map_ops *ops = get_dma_ops(dev);
243 + BUG_ON(!valid_dma_direction(dir));
245 + ops->sync_single_for_cpu(dev, addr, size, dir);
246 + else if (cpu_is_noncoherent_r10000(dev))
247 + __dma_sync(dma_addr_to_page(dev, addr),
248 + addr & ~PAGE_MASK, size, dir);
249 + debug_dma_sync_single_for_cpu(dev, addr, size, dir);
252 +static inline void dma_sync_single_for_device(struct device *dev,
253 + dma_addr_t addr, size_t size,
254 + enum dma_data_direction dir)
256 + struct dma_map_ops *ops = get_dma_ops(dev);
258 + BUG_ON(!valid_dma_direction(dir));
260 + ops->sync_single_for_device(dev, addr, size, dir);
261 + else if (!plat_device_is_coherent(dev))
262 + __dma_sync(dma_addr_to_page(dev, addr),
263 + addr & ~PAGE_MASK, size, dir);
264 + debug_dma_sync_single_for_device(dev, addr, size, dir);
267 +static inline void dma_sync_single_range_for_cpu(struct device *dev,
269 + unsigned long offset,
271 + enum dma_data_direction dir)
273 + const struct dma_map_ops *ops = get_dma_ops(dev);
275 + BUG_ON(!valid_dma_direction(dir));
277 + ops->sync_single_for_cpu(dev, addr + offset, size, dir);
278 + else if (cpu_is_noncoherent_r10000(dev))
279 + __dma_sync(dma_addr_to_page(dev, addr + offset),
280 + (addr + offset) & ~PAGE_MASK, size, dir);
281 + debug_dma_sync_single_range_for_cpu(dev, addr, offset, size, dir);
284 +static inline void dma_sync_single_range_for_device(struct device *dev,
286 + unsigned long offset,
288 + enum dma_data_direction dir)
290 + const struct dma_map_ops *ops = get_dma_ops(dev);
292 + BUG_ON(!valid_dma_direction(dir));
294 + ops->sync_single_for_device(dev, addr + offset, size, dir);
295 + else if (!plat_device_is_coherent(dev))
296 + __dma_sync(dma_addr_to_page(dev, addr + offset),
297 + (addr + offset) & ~PAGE_MASK, size, dir);
298 + debug_dma_sync_single_range_for_device(dev, addr, offset, size, dir);
302 +dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
303 + int nelems, enum dma_data_direction dir)
305 + struct dma_map_ops *ops = get_dma_ops(dev);
306 + struct scatterlist *s;
309 + BUG_ON(!valid_dma_direction(dir));
311 + ops->sync_sg_for_cpu(dev, sg, nelems, dir);
312 + else if (cpu_is_noncoherent_r10000(dev)) {
313 + for_each_sg(sg, s, nelems, i)
314 + __dma_sync(sg_page(s), s->offset, s->length, dir);
316 + debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir);
320 +dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
321 + int nelems, enum dma_data_direction dir)
323 + struct dma_map_ops *ops = get_dma_ops(dev);
324 + struct scatterlist *s;
327 + BUG_ON(!valid_dma_direction(dir));
329 + ops->sync_sg_for_device(dev, sg, nelems, dir);
330 + else if (!plat_device_is_coherent(dev)) {
331 + for_each_sg(sg, s, nelems, i)
332 + __dma_sync(sg_page(s), s->offset, s->length, dir);
334 + debug_dma_sync_sg_for_device(dev, sg, nelems, dir);
338 +#define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, NULL)
339 +#define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, NULL)
340 +#define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, NULL)
341 +#define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, NULL)
343 +extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
344 + void *cpu_addr, dma_addr_t dma_addr, size_t size);
347 + * dma_mmap_attrs - map a coherent DMA allocation into user space
348 + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
349 + * @vma: vm_area_struct describing requested user mapping
350 + * @cpu_addr: kernel CPU-view address returned from dma_alloc_attrs
351 + * @handle: device-view address returned from dma_alloc_attrs
352 + * @size: size of memory originally requested in dma_alloc_attrs
353 + * @attrs: attributes of mapping properties requested in dma_alloc_attrs
355 + * Map a coherent DMA buffer previously allocated by dma_alloc_attrs
356 + * into user space. The coherent DMA buffer must not be freed by the
357 + * driver until the user space mapping has been released.
360 +dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr,
361 + dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs)
363 + struct dma_map_ops *ops = get_dma_ops(dev);
365 + if (ops && ops->mmap)
366 + return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
367 + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
370 +#define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, NULL)
372 +static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
373 + void *cpu_addr, dma_addr_t dma_addr, size_t size)
375 + DEFINE_DMA_ATTRS(attrs);
376 + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
377 + return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs);
381 +dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
382 + void *cpu_addr, dma_addr_t dma_addr, size_t size);
385 +dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr,
386 + dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs)
388 + struct dma_map_ops *ops = get_dma_ops(dev);
390 + if (ops && ops->get_sgtable)
391 + return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
393 + return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size);
396 +#define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, NULL)
399 static inline int dma_supported(struct device *dev, u64 mask)
401 struct dma_map_ops *ops = get_dma_ops(dev);
402 - return ops->dma_supported(dev, mask);
404 + return ops->dma_supported(dev, mask);
405 + return plat_dma_supported(dev, mask);
408 static inline int dma_mapping_error(struct device *dev, u64 mask)
409 @@ -43,7 +379,9 @@ static inline int dma_mapping_error(stru
410 struct dma_map_ops *ops = get_dma_ops(dev);
412 debug_dma_mapping_error(dev, mask);
413 - return ops->mapping_error(dev, mask);
415 + return ops->mapping_error(dev, mask);
420 @@ -69,7 +407,11 @@ static inline void *dma_alloc_attrs(stru
422 struct dma_map_ops *ops = get_dma_ops(dev);
424 - ret = ops->alloc(dev, size, dma_handle, gfp, attrs);
426 + ret = ops->alloc(dev, size, dma_handle, gfp, attrs);
428 + ret = mips_dma_alloc_coherent(dev, size, dma_handle, gfp,
431 debug_dma_alloc_coherent(dev, size, *dma_handle, ret);
433 @@ -84,7 +426,10 @@ static inline void dma_free_attrs(struct
435 struct dma_map_ops *ops = get_dma_ops(dev);
437 - ops->free(dev, size, vaddr, dma_handle, attrs);
439 + ops->free(dev, size, vaddr, dma_handle, attrs);
441 + mips_dma_free_coherent(dev, size, vaddr, dma_handle, attrs);
443 debug_dma_free_coherent(dev, size, vaddr, dma_handle);
445 --- a/arch/mips/mm/dma-default.c
446 +++ b/arch/mips/mm/dma-default.c
447 @@ -42,26 +42,6 @@ static int __init setnocoherentio(char *
449 early_param("nocoherentio", setnocoherentio);
451 -static inline struct page *dma_addr_to_page(struct device *dev,
452 - dma_addr_t dma_addr)
454 - return pfn_to_page(
455 - plat_dma_addr_to_phys(dev, dma_addr) >> PAGE_SHIFT);
459 - * Warning on the terminology - Linux calls an uncached area coherent;
460 - * MIPS terminology calls memory areas with hardware maintained coherency
464 -static inline int cpu_is_noncoherent_r10000(struct device *dev)
466 - return !plat_device_is_coherent(dev) &&
467 - (current_cpu_type() == CPU_R10000 ||
468 - current_cpu_type() == CPU_R12000);
471 static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp)
474 @@ -117,8 +97,9 @@ void *dma_alloc_noncoherent(struct devic
476 EXPORT_SYMBOL(dma_alloc_noncoherent);
478 -static void *mips_dma_alloc_coherent(struct device *dev, size_t size,
479 - dma_addr_t * dma_handle, gfp_t gfp, struct dma_attrs *attrs)
480 +void *mips_dma_alloc_coherent(struct device *dev, size_t size,
481 + dma_addr_t *dma_handle, gfp_t gfp,
482 + struct dma_attrs *attrs)
486 @@ -142,6 +123,7 @@ static void *mips_dma_alloc_coherent(str
490 +EXPORT_SYMBOL(mips_dma_alloc_coherent);
493 void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,
494 @@ -152,8 +134,8 @@ void dma_free_noncoherent(struct device
496 EXPORT_SYMBOL(dma_free_noncoherent);
498 -static void mips_dma_free_coherent(struct device *dev, size_t size, void *vaddr,
499 - dma_addr_t dma_handle, struct dma_attrs *attrs)
500 +void mips_dma_free_coherent(struct device *dev, size_t size, void *vaddr,
501 + dma_addr_t dma_handle, struct dma_attrs *attrs)
503 unsigned long addr = (unsigned long) vaddr;
504 int order = get_order(size);
505 @@ -168,6 +150,7 @@ static void mips_dma_free_coherent(struc
507 free_pages(addr, get_order(size));
509 +EXPORT_SYMBOL(mips_dma_free_coherent);
511 static inline void __dma_sync_virtual(void *addr, size_t size,
512 enum dma_data_direction direction)
513 @@ -196,8 +179,8 @@ static inline void __dma_sync_virtual(vo
514 * If highmem is not configured then the bulk of this loop gets
517 -static inline void __dma_sync(struct page *page,
518 - unsigned long offset, size_t size, enum dma_data_direction direction)
519 +void __dma_sync(struct page *page, unsigned long offset, size_t size,
520 + enum dma_data_direction direction)
524 @@ -226,109 +209,7 @@ static inline void __dma_sync(struct pag
529 -static void mips_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
530 - size_t size, enum dma_data_direction direction, struct dma_attrs *attrs)
532 - if (cpu_is_noncoherent_r10000(dev))
533 - __dma_sync(dma_addr_to_page(dev, dma_addr),
534 - dma_addr & ~PAGE_MASK, size, direction);
536 - plat_unmap_dma_mem(dev, dma_addr, size, direction);
539 -static int mips_dma_map_sg(struct device *dev, struct scatterlist *sg,
540 - int nents, enum dma_data_direction direction, struct dma_attrs *attrs)
544 - for (i = 0; i < nents; i++, sg++) {
545 - if (!plat_device_is_coherent(dev))
546 - __dma_sync(sg_page(sg), sg->offset, sg->length,
548 - sg->dma_address = plat_map_dma_mem_page(dev, sg_page(sg)) +
555 -static dma_addr_t mips_dma_map_page(struct device *dev, struct page *page,
556 - unsigned long offset, size_t size, enum dma_data_direction direction,
557 - struct dma_attrs *attrs)
559 - if (!plat_device_is_coherent(dev))
560 - __dma_sync(page, offset, size, direction);
562 - return plat_map_dma_mem_page(dev, page) + offset;
565 -static void mips_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
566 - int nhwentries, enum dma_data_direction direction,
567 - struct dma_attrs *attrs)
571 - for (i = 0; i < nhwentries; i++, sg++) {
572 - if (!plat_device_is_coherent(dev) &&
573 - direction != DMA_TO_DEVICE)
574 - __dma_sync(sg_page(sg), sg->offset, sg->length,
576 - plat_unmap_dma_mem(dev, sg->dma_address, sg->length, direction);
580 -static void mips_dma_sync_single_for_cpu(struct device *dev,
581 - dma_addr_t dma_handle, size_t size, enum dma_data_direction direction)
583 - if (cpu_is_noncoherent_r10000(dev))
584 - __dma_sync(dma_addr_to_page(dev, dma_handle),
585 - dma_handle & ~PAGE_MASK, size, direction);
588 -static void mips_dma_sync_single_for_device(struct device *dev,
589 - dma_addr_t dma_handle, size_t size, enum dma_data_direction direction)
591 - if (!plat_device_is_coherent(dev))
592 - __dma_sync(dma_addr_to_page(dev, dma_handle),
593 - dma_handle & ~PAGE_MASK, size, direction);
596 -static void mips_dma_sync_sg_for_cpu(struct device *dev,
597 - struct scatterlist *sg, int nelems, enum dma_data_direction direction)
601 - /* Make sure that gcc doesn't leave the empty loop body. */
602 - for (i = 0; i < nelems; i++, sg++) {
603 - if (cpu_is_noncoherent_r10000(dev))
604 - __dma_sync(sg_page(sg), sg->offset, sg->length,
609 -static void mips_dma_sync_sg_for_device(struct device *dev,
610 - struct scatterlist *sg, int nelems, enum dma_data_direction direction)
614 - /* Make sure that gcc doesn't leave the empty loop body. */
615 - for (i = 0; i < nelems; i++, sg++) {
616 - if (!plat_device_is_coherent(dev))
617 - __dma_sync(sg_page(sg), sg->offset, sg->length,
622 -int mips_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
627 -int mips_dma_supported(struct device *dev, u64 mask)
629 - return plat_dma_supported(dev, mask);
631 +EXPORT_SYMBOL(__dma_sync);
633 void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
634 enum dma_data_direction direction)
635 @@ -341,23 +222,10 @@ void dma_cache_sync(struct device *dev,
637 EXPORT_SYMBOL(dma_cache_sync);
639 -static struct dma_map_ops mips_default_dma_map_ops = {
640 - .alloc = mips_dma_alloc_coherent,
641 - .free = mips_dma_free_coherent,
642 - .map_page = mips_dma_map_page,
643 - .unmap_page = mips_dma_unmap_page,
644 - .map_sg = mips_dma_map_sg,
645 - .unmap_sg = mips_dma_unmap_sg,
646 - .sync_single_for_cpu = mips_dma_sync_single_for_cpu,
647 - .sync_single_for_device = mips_dma_sync_single_for_device,
648 - .sync_sg_for_cpu = mips_dma_sync_sg_for_cpu,
649 - .sync_sg_for_device = mips_dma_sync_sg_for_device,
650 - .mapping_error = mips_dma_mapping_error,
651 - .dma_supported = mips_dma_supported
654 -struct dma_map_ops *mips_dma_map_ops = &mips_default_dma_map_ops;
655 +#ifdef CONFIG_SYS_HAS_DMA_OPS
656 +struct dma_map_ops *mips_dma_map_ops = NULL;
657 EXPORT_SYMBOL(mips_dma_map_ops);
660 #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)