kernel: bump 5.4 to 5.4.36
[openwrt/staging/wigyori.git] / target / linux / bcm27xx / patches-5.4 / 950-0456-dma-mapping-treat-dev-bus_dma_mask-as-a-DMA-limit.patch
1 From d5430c466b3c3b5f631ee37be333a40924575b72 Mon Sep 17 00:00:00 2001
2 From: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
3 Date: Thu, 21 Nov 2019 10:26:44 +0100
4 Subject: [PATCH] dma-mapping: treat dev->bus_dma_mask as a DMA limit
5
6 commit a7ba70f1787f977f970cd116076c6fce4b9e01cc upstream.
7
8 Using a mask to represent bus DMA constraints has a set of limitations.
9 The biggest one being it can only hold a power of two (minus one). The
10 DMA mapping code is already aware of this and treats dev->bus_dma_mask
11 as a limit. This quirk is already used by some architectures although
12 still rare.
13
14 With the introduction of the Raspberry Pi 4 we've found a new contender
15 for the use of bus DMA limits, as its PCIe bus can only address the
16 lower 3GB of memory (of a total of 4GB). This is impossible to represent
17 with a mask. To make things worse the device-tree code rounds non power
18 of two bus DMA limits to the next power of two, which is unacceptable in
19 this case.
20
21 In the light of this, rename dev->bus_dma_mask to dev->bus_dma_limit all
22 over the tree and treat it as such. Note that dev->bus_dma_limit should
23 contain the higher accessible DMA address.
24
25 Signed-off-by: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
26 Reviewed-by: Robin Murphy <robin.murphy@arm.com>
27 Signed-off-by: Christoph Hellwig <hch@lst.de>
28 ---
29 arch/mips/pci/fixup-sb1250.c | 16 ++++++++--------
30 arch/powerpc/sysdev/fsl_pci.c | 6 +++---
31 arch/x86/kernel/pci-dma.c | 2 +-
32 arch/x86/mm/mem_encrypt.c | 2 +-
33 arch/x86/pci/sta2x11-fixup.c | 2 +-
34 drivers/acpi/arm64/iort.c | 20 +++++++-------------
35 drivers/ata/ahci.c | 2 +-
36 drivers/iommu/dma-iommu.c | 3 +--
37 drivers/of/device.c | 9 +++++----
38 include/linux/device.h | 6 +++---
39 include/linux/dma-direct.h | 2 +-
40 include/linux/dma-mapping.h | 2 +-
41 kernel/dma/direct.c | 27 +++++++++++++--------------
42 13 files changed, 46 insertions(+), 53 deletions(-)
43
44 --- a/arch/mips/pci/fixup-sb1250.c
45 +++ b/arch/mips/pci/fixup-sb1250.c
46 @@ -21,22 +21,22 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SI
47
48 /*
49 * The BCM1250, etc. PCI host bridge does not support DAC on its 32-bit
50 - * bus, so we set the bus's DMA mask accordingly. However the HT link
51 + * bus, so we set the bus's DMA limit accordingly. However the HT link
52 * down the artificial PCI-HT bridge supports 40-bit addressing and the
53 * SP1011 HT-PCI bridge downstream supports both DAC and a 64-bit bus
54 * width, so we record the PCI-HT bridge's secondary and subordinate bus
55 - * numbers and do not set the mask for devices present in the inclusive
56 + * numbers and do not set the limit for devices present in the inclusive
57 * range of those.
58 */
59 -struct sb1250_bus_dma_mask_exclude {
60 +struct sb1250_bus_dma_limit_exclude {
61 bool set;
62 unsigned char start;
63 unsigned char end;
64 };
65
66 -static int sb1250_bus_dma_mask(struct pci_dev *dev, void *data)
67 +static int sb1250_bus_dma_limit(struct pci_dev *dev, void *data)
68 {
69 - struct sb1250_bus_dma_mask_exclude *exclude = data;
70 + struct sb1250_bus_dma_limit_exclude *exclude = data;
71 bool exclude_this;
72 bool ht_bridge;
73
74 @@ -55,7 +55,7 @@ static int sb1250_bus_dma_mask(struct pc
75 exclude->start, exclude->end);
76 } else {
77 dev_dbg(&dev->dev, "disabling DAC for device");
78 - dev->dev.bus_dma_mask = DMA_BIT_MASK(32);
79 + dev->dev.bus_dma_limit = DMA_BIT_MASK(32);
80 }
81
82 return 0;
83 @@ -63,9 +63,9 @@ static int sb1250_bus_dma_mask(struct pc
84
85 static void quirk_sb1250_pci_dac(struct pci_dev *dev)
86 {
87 - struct sb1250_bus_dma_mask_exclude exclude = { .set = false };
88 + struct sb1250_bus_dma_limit_exclude exclude = { .set = false };
89
90 - pci_walk_bus(dev->bus, sb1250_bus_dma_mask, &exclude);
91 + pci_walk_bus(dev->bus, sb1250_bus_dma_limit, &exclude);
92 }
93 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_PCI,
94 quirk_sb1250_pci_dac);
95 --- a/arch/powerpc/sysdev/fsl_pci.c
96 +++ b/arch/powerpc/sysdev/fsl_pci.c
97 @@ -115,8 +115,8 @@ static void pci_dma_dev_setup_swiotlb(st
98 {
99 struct pci_controller *hose = pci_bus_to_host(pdev->bus);
100
101 - pdev->dev.bus_dma_mask =
102 - hose->dma_window_base_cur + hose->dma_window_size;
103 + pdev->dev.bus_dma_limit =
104 + hose->dma_window_base_cur + hose->dma_window_size - 1;
105 }
106
107 static void setup_swiotlb_ops(struct pci_controller *hose)
108 @@ -135,7 +135,7 @@ static void fsl_pci_dma_set_mask(struct
109 * mapping that allows addressing any RAM address from across PCI.
110 */
111 if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) {
112 - dev->bus_dma_mask = 0;
113 + dev->bus_dma_limit = 0;
114 dev->archdata.dma_offset = pci64_dma_offset;
115 }
116 }
117 --- a/arch/x86/kernel/pci-dma.c
118 +++ b/arch/x86/kernel/pci-dma.c
119 @@ -146,7 +146,7 @@ rootfs_initcall(pci_iommu_init);
120
121 static int via_no_dac_cb(struct pci_dev *pdev, void *data)
122 {
123 - pdev->dev.bus_dma_mask = DMA_BIT_MASK(32);
124 + pdev->dev.bus_dma_limit = DMA_BIT_MASK(32);
125 return 0;
126 }
127
128 --- a/arch/x86/mm/mem_encrypt.c
129 +++ b/arch/x86/mm/mem_encrypt.c
130 @@ -367,7 +367,7 @@ bool force_dma_unencrypted(struct device
131 if (sme_active()) {
132 u64 dma_enc_mask = DMA_BIT_MASK(__ffs64(sme_me_mask));
133 u64 dma_dev_mask = min_not_zero(dev->coherent_dma_mask,
134 - dev->bus_dma_mask);
135 + dev->bus_dma_limit);
136
137 if (dma_dev_mask <= dma_enc_mask)
138 return true;
139 --- a/arch/x86/pci/sta2x11-fixup.c
140 +++ b/arch/x86/pci/sta2x11-fixup.c
141 @@ -143,7 +143,7 @@ static void sta2x11_map_ep(struct pci_de
142
143 dev->dma_pfn_offset = PFN_DOWN(-amba_base);
144
145 - dev->bus_dma_mask = max_amba_addr;
146 + dev->bus_dma_limit = max_amba_addr;
147 pci_set_consistent_dma_mask(pdev, max_amba_addr);
148 pci_set_dma_mask(pdev, max_amba_addr);
149
150 --- a/drivers/acpi/arm64/iort.c
151 +++ b/drivers/acpi/arm64/iort.c
152 @@ -1057,8 +1057,8 @@ static int rc_dma_get_range(struct devic
153 */
154 void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size)
155 {
156 - u64 mask, dmaaddr = 0, size = 0, offset = 0;
157 - int ret, msb;
158 + u64 end, mask, dmaaddr = 0, size = 0, offset = 0;
159 + int ret;
160
161 /*
162 * If @dev is expected to be DMA-capable then the bus code that created
163 @@ -1085,19 +1085,13 @@ void iort_dma_setup(struct device *dev,
164 }
165
166 if (!ret) {
167 - msb = fls64(dmaaddr + size - 1);
168 /*
169 - * Round-up to the power-of-two mask or set
170 - * the mask to the whole 64-bit address space
171 - * in case the DMA region covers the full
172 - * memory window.
173 + * Limit coherent and dma mask based on size retrieved from
174 + * firmware.
175 */
176 - mask = msb == 64 ? U64_MAX : (1ULL << msb) - 1;
177 - /*
178 - * Limit coherent and dma mask based on size
179 - * retrieved from firmware.
180 - */
181 - dev->bus_dma_mask = mask;
182 + end = dmaaddr + size - 1;
183 + mask = DMA_BIT_MASK(ilog2(end) + 1);
184 + dev->bus_dma_limit = end;
185 dev->coherent_dma_mask = mask;
186 *dev->dma_mask = mask;
187 }
188 --- a/drivers/ata/ahci.c
189 +++ b/drivers/ata/ahci.c
190 @@ -900,7 +900,7 @@ static int ahci_configure_dma_masks(stru
191 * value, don't extend it here. This happens on STA2X11, for example.
192 *
193 * XXX: manipulating the DMA mask from platform code is completely
194 - * bogus, platform code should use dev->bus_dma_mask instead..
195 + * bogus, platform code should use dev->bus_dma_limit instead..
196 */
197 if (pdev->dma_mask && pdev->dma_mask < DMA_BIT_MASK(32))
198 return 0;
199 --- a/drivers/iommu/dma-iommu.c
200 +++ b/drivers/iommu/dma-iommu.c
201 @@ -404,8 +404,7 @@ static dma_addr_t iommu_dma_alloc_iova(s
202 if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
203 iova_len = roundup_pow_of_two(iova_len);
204
205 - if (dev->bus_dma_mask)
206 - dma_limit &= dev->bus_dma_mask;
207 + dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit);
208
209 if (domain->geometry.force_aperture)
210 dma_limit = min(dma_limit, domain->geometry.aperture_end);
211 --- a/drivers/of/device.c
212 +++ b/drivers/of/device.c
213 @@ -93,7 +93,7 @@ int of_dma_configure(struct device *dev,
214 bool coherent;
215 unsigned long offset;
216 const struct iommu_ops *iommu;
217 - u64 mask;
218 + u64 mask, end;
219
220 ret = of_dma_get_range(np, &dma_addr, &paddr, &size);
221 if (ret < 0) {
222 @@ -148,12 +148,13 @@ int of_dma_configure(struct device *dev,
223 * Limit coherent and dma mask based on size and default mask
224 * set by the driver.
225 */
226 - mask = DMA_BIT_MASK(ilog2(dma_addr + size - 1) + 1);
227 + end = dma_addr + size - 1;
228 + mask = DMA_BIT_MASK(ilog2(end) + 1);
229 dev->coherent_dma_mask &= mask;
230 *dev->dma_mask &= mask;
231 - /* ...but only set bus mask if we found valid dma-ranges earlier */
232 + /* ...but only set bus limit if we found valid dma-ranges earlier */
233 if (!ret)
234 - dev->bus_dma_mask = mask;
235 + dev->bus_dma_limit = end;
236
237 coherent = of_dma_is_coherent(np);
238 dev_dbg(dev, "device is%sdma coherent\n",
239 --- a/include/linux/device.h
240 +++ b/include/linux/device.h
241 @@ -1186,8 +1186,8 @@ struct dev_links_info {
242 * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all
243 * hardware supports 64-bit addresses for consistent allocations
244 * such descriptors.
245 - * @bus_dma_mask: Mask of an upstream bridge or bus which imposes a smaller DMA
246 - * limit than the device itself supports.
247 + * @bus_dma_limit: Limit of an upstream bridge or bus which imposes a smaller
248 + * DMA limit than the device itself supports.
249 * @dma_pfn_offset: offset of DMA memory range relatively of RAM
250 * @dma_parms: A low level driver may set these to teach IOMMU code about
251 * segment limitations.
252 @@ -1270,7 +1270,7 @@ struct device {
253 not all hardware supports
254 64 bit addresses for consistent
255 allocations such descriptors. */
256 - u64 bus_dma_mask; /* upstream dma_mask constraint */
257 + u64 bus_dma_limit; /* upstream dma constraint */
258 unsigned long dma_pfn_offset;
259
260 struct device_dma_parameters *dma_parms;
261 --- a/include/linux/dma-direct.h
262 +++ b/include/linux/dma-direct.h
263 @@ -63,7 +63,7 @@ static inline bool dma_capable(struct de
264 min(addr, end) < phys_to_dma(dev, PFN_PHYS(min_low_pfn)))
265 return false;
266
267 - return end <= min_not_zero(*dev->dma_mask, dev->bus_dma_mask);
268 + return end <= min_not_zero(*dev->dma_mask, dev->bus_dma_limit);
269 }
270
271 u64 dma_direct_get_required_mask(struct device *dev);
272 --- a/include/linux/dma-mapping.h
273 +++ b/include/linux/dma-mapping.h
274 @@ -697,7 +697,7 @@ static inline int dma_coerce_mask_and_co
275 */
276 static inline bool dma_addressing_limited(struct device *dev)
277 {
278 - return min_not_zero(dma_get_mask(dev), dev->bus_dma_mask) <
279 + return min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) <
280 dma_get_required_mask(dev);
281 }
282
283 --- a/kernel/dma/direct.c
284 +++ b/kernel/dma/direct.c
285 @@ -26,10 +26,10 @@ static void report_addr(struct device *d
286 {
287 if (!dev->dma_mask) {
288 dev_err_once(dev, "DMA map on device without dma_mask\n");
289 - } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) {
290 + } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_limit) {
291 dev_err_once(dev,
292 - "overflow %pad+%zu of DMA mask %llx bus mask %llx\n",
293 - &dma_addr, size, *dev->dma_mask, dev->bus_dma_mask);
294 + "overflow %pad+%zu of DMA mask %llx bus limit %llx\n",
295 + &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
296 }
297 WARN_ON_ONCE(1);
298 }
299 @@ -51,15 +51,14 @@ u64 dma_direct_get_required_mask(struct
300 }
301
302 static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
303 - u64 *phys_mask)
304 + u64 *phys_limit)
305 {
306 - if (dev->bus_dma_mask && dev->bus_dma_mask < dma_mask)
307 - dma_mask = dev->bus_dma_mask;
308 + u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit);
309
310 if (force_dma_unencrypted(dev))
311 - *phys_mask = __dma_to_phys(dev, dma_mask);
312 + *phys_limit = __dma_to_phys(dev, dma_limit);
313 else
314 - *phys_mask = dma_to_phys(dev, dma_mask);
315 + *phys_limit = dma_to_phys(dev, dma_limit);
316
317 /*
318 * Optimistically try the zone that the physical address mask falls
319 @@ -69,9 +68,9 @@ static gfp_t __dma_direct_optimal_gfp_ma
320 * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding
321 * zones.
322 */
323 - if (*phys_mask <= DMA_BIT_MASK(zone_dma_bits))
324 + if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits))
325 return GFP_DMA;
326 - if (*phys_mask <= DMA_BIT_MASK(32))
327 + if (*phys_limit <= DMA_BIT_MASK(32))
328 return GFP_DMA32;
329 return 0;
330 }
331 @@ -79,7 +78,7 @@ static gfp_t __dma_direct_optimal_gfp_ma
332 static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
333 {
334 return phys_to_dma_direct(dev, phys) + size - 1 <=
335 - min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask);
336 + min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
337 }
338
339 struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
340 @@ -88,7 +87,7 @@ struct page *__dma_direct_alloc_pages(st
341 size_t alloc_size = PAGE_ALIGN(size);
342 int node = dev_to_node(dev);
343 struct page *page = NULL;
344 - u64 phys_mask;
345 + u64 phys_limit;
346
347 if (attrs & DMA_ATTR_NO_WARN)
348 gfp |= __GFP_NOWARN;
349 @@ -96,7 +95,7 @@ struct page *__dma_direct_alloc_pages(st
350 /* we always manually zero the memory once we are done: */
351 gfp &= ~__GFP_ZERO;
352 gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
353 - &phys_mask);
354 + &phys_limit);
355 page = dma_alloc_contiguous(dev, alloc_size, gfp);
356 if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
357 dma_free_contiguous(dev, page, alloc_size);
358 @@ -110,7 +109,7 @@ again:
359 page = NULL;
360
361 if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
362 - phys_mask < DMA_BIT_MASK(64) &&
363 + phys_limit < DMA_BIT_MASK(64) &&
364 !(gfp & (GFP_DMA32 | GFP_DMA))) {
365 gfp |= GFP_DMA32;
366 goto again;