X-Git-Url: http://git.openwrt.org/?a=blobdiff_plain;f=target%2Flinux%2Fbrcm2708%2Fpatches-4.19%2F950-0457-PCI-brcmstb-Add-dma-range-mapping-for-inbound-traffi.patch;fp=target%2Flinux%2Fbrcm2708%2Fpatches-4.19%2F950-0457-PCI-brcmstb-Add-dma-range-mapping-for-inbound-traffi.patch;h=0000000000000000000000000000000000000000;hb=7d7aa2fd924c27829ec25f825481554dd81bce97;hp=75615e372f5d487bb0ab4bb47b62d8dd4ae2ea43;hpb=e7bfda2c243e66a75ff966ba04c28b1590b5d24c;p=openwrt%2Fstaging%2Fchunkeey.git diff --git a/target/linux/brcm2708/patches-4.19/950-0457-PCI-brcmstb-Add-dma-range-mapping-for-inbound-traffi.patch b/target/linux/brcm2708/patches-4.19/950-0457-PCI-brcmstb-Add-dma-range-mapping-for-inbound-traffi.patch deleted file mode 100644 index 75615e372f..0000000000 --- a/target/linux/brcm2708/patches-4.19/950-0457-PCI-brcmstb-Add-dma-range-mapping-for-inbound-traffi.patch +++ /dev/null @@ -1,569 +0,0 @@ -From d3cc1c713b9436a7dc72788caa1d8de63ac3a01b Mon Sep 17 00:00:00 2001 -From: Phil Elwell -Date: Tue, 19 Feb 2019 22:06:59 +0000 -Subject: [PATCH] PCI: brcmstb: Add dma-range mapping for inbound - traffic - -The Broadcom STB PCIe host controller is intimately related to the -memory subsystem. This close relationship adds complexity to how cpu -system memory is mapped to PCIe memory. Ideally, this mapping is an -identity mapping, or an identity mapping off by a constant. Not so in -this case. - -Consider the Broadcom reference board BCM97445LCC_4X8 which has 6 GB -of system memory. Here is how the PCIe controller maps the -system memory to PCIe memory: - - memc0-a@[ 0....3fffffff] <=> pci@[ 0....3fffffff] - memc0-b@[100000000...13fffffff] <=> pci@[ 40000000....7fffffff] - memc1-a@[ 40000000....7fffffff] <=> pci@[ 80000000....bfffffff] - memc1-b@[300000000...33fffffff] <=> pci@[ c0000000....ffffffff] - memc2-a@[ 80000000....bfffffff] <=> pci@[100000000...13fffffff] - memc2-b@[c00000000...c3fffffff] <=> pci@[140000000...17fffffff] - -Although there are some "gaps" that can be added between the -individual mappings by software, the permutation of memory regions for -the most part is fixed by HW. The solution of having something close -to an identity mapping is not possible. - -The idea behind this HW design is that the same PCIe module can -act as an RC or EP, and if it acts as an EP it concatenates all -of system memory into a BAR so anything can be accessed. Unfortunately, -when the PCIe block is in the role of an RC it also presents this -"BAR" to downstream PCIe devices, rather than offering an identity map -between its system memory and PCIe space. - -Suppose that an endpoint driver allocs some DMA memory. Suppose this -memory is located at 0x6000_0000, which is in the middle of memc1-a. -The driver wants a dma_addr_t value that it can pass on to the EP to -use. Without doing any custom mapping, the EP will use this value for -DMA: the driver will get a dma_addr_t equal to 0x6000_0000. But this -won't work; the device needs a dma_addr_t that reflects the PCIe space -address, namely 0xa000_0000. - -So, essentially the solution to this problem must modify the -dma_addr_t returned by the DMA routines routines. There are two -ways (I know of) of doing this: - -(a) overriding/redefining the dma_to_phys() and phys_to_dma() calls -that are used by the dma_ops routines. This is the approach of - - arch/mips/cavium-octeon/dma-octeon.c - -In ARM and ARM64 these two routines are defiend in asm/dma-mapping.h -as static inline functions. - -(b) Subscribe to a notifier that notifies when a device is added to a -bus. When this happens, set_dma_ops() can be called for the device. -This method is mentioned in: - - http://lxr.free-electrons.com/source/drivers/of/platform.c?v=3.16#L152 - -where it says as a comment - - "In case if platform code need to use own special DMA - configuration, it can use Platform bus notifier and - handle BUS_NOTIFY_ADD_DEVICE event to fix up DMA - configuration." - -Solution (b) is what this commit does. It uses its own set of -dma_ops which are wrappers around the arch_dma_ops. The -wrappers translate the dma addresses before/after invoking -the arch_dma_ops, as appropriate. - -Signed-off-by: Jim Quinlan ---- - drivers/pci/controller/pcie-brcmstb.c | 420 +++++++++++++++++++++++++- - 1 file changed, 411 insertions(+), 9 deletions(-) - ---- a/drivers/pci/controller/pcie-brcmstb.c -+++ b/drivers/pci/controller/pcie-brcmstb.c -@@ -4,6 +4,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -319,11 +320,307 @@ static struct pci_ops brcm_pcie_ops = { - ((val & ~reg##_##field##_MASK) | \ - (reg##_##field##_MASK & (field_val << reg##_##field##_SHIFT))) - -+static const struct dma_map_ops *arch_dma_ops; -+static const struct dma_map_ops *brcm_dma_ops_ptr; -+static struct of_pci_range *dma_ranges; -+static int num_dma_ranges; -+ - static phys_addr_t scb_size[BRCM_MAX_SCB]; - static int num_memc; - static int num_pcie; - static DEFINE_MUTEX(brcm_pcie_lock); - -+static dma_addr_t brcm_to_pci(dma_addr_t addr) -+{ -+ struct of_pci_range *p; -+ -+ if (!num_dma_ranges) -+ return addr; -+ -+ for (p = dma_ranges; p < &dma_ranges[num_dma_ranges]; p++) -+ if (addr >= p->cpu_addr && addr < (p->cpu_addr + p->size)) -+ return addr - p->cpu_addr + p->pci_addr; -+ -+ return addr; -+} -+ -+static dma_addr_t brcm_to_cpu(dma_addr_t addr) -+{ -+ struct of_pci_range *p; -+ -+ if (!num_dma_ranges) -+ return addr; -+ -+ for (p = dma_ranges; p < &dma_ranges[num_dma_ranges]; p++) -+ if (addr >= p->pci_addr && addr < (p->pci_addr + p->size)) -+ return addr - p->pci_addr + p->cpu_addr; -+ -+ return addr; -+} -+ -+static void *brcm_alloc(struct device *dev, size_t size, dma_addr_t *handle, -+ gfp_t gfp, unsigned long attrs) -+{ -+ void *ret; -+ -+ ret = arch_dma_ops->alloc(dev, size, handle, gfp, attrs); -+ if (ret) -+ *handle = brcm_to_pci(*handle); -+ return ret; -+} -+ -+static void brcm_free(struct device *dev, size_t size, void *cpu_addr, -+ dma_addr_t handle, unsigned long attrs) -+{ -+ handle = brcm_to_cpu(handle); -+ arch_dma_ops->free(dev, size, cpu_addr, handle, attrs); -+} -+ -+static int brcm_mmap(struct device *dev, struct vm_area_struct *vma, -+ void *cpu_addr, dma_addr_t dma_addr, size_t size, -+ unsigned long attrs) -+{ -+ dma_addr = brcm_to_cpu(dma_addr); -+ return arch_dma_ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); -+} -+ -+static int brcm_get_sgtable(struct device *dev, struct sg_table *sgt, -+ void *cpu_addr, dma_addr_t handle, size_t size, -+ unsigned long attrs) -+{ -+ handle = brcm_to_cpu(handle); -+ return arch_dma_ops->get_sgtable(dev, sgt, cpu_addr, handle, size, -+ attrs); -+} -+ -+static dma_addr_t brcm_map_page(struct device *dev, struct page *page, -+ unsigned long offset, size_t size, -+ enum dma_data_direction dir, -+ unsigned long attrs) -+{ -+ return brcm_to_pci(arch_dma_ops->map_page(dev, page, offset, size, -+ dir, attrs)); -+} -+ -+static void brcm_unmap_page(struct device *dev, dma_addr_t handle, -+ size_t size, enum dma_data_direction dir, -+ unsigned long attrs) -+{ -+ handle = brcm_to_cpu(handle); -+ arch_dma_ops->unmap_page(dev, handle, size, dir, attrs); -+} -+ -+static int brcm_map_sg(struct device *dev, struct scatterlist *sgl, -+ int nents, enum dma_data_direction dir, -+ unsigned long attrs) -+{ -+ int i, j; -+ struct scatterlist *sg; -+ -+ for_each_sg(sgl, sg, nents, i) { -+#ifdef CONFIG_NEED_SG_DMA_LENGTH -+ sg->dma_length = sg->length; -+#endif -+ sg->dma_address = -+ brcm_dma_ops_ptr->map_page(dev, sg_page(sg), sg->offset, -+ sg->length, dir, attrs); -+ if (dma_mapping_error(dev, sg->dma_address)) -+ goto bad_mapping; -+ } -+ return nents; -+ -+bad_mapping: -+ for_each_sg(sgl, sg, i, j) -+ brcm_dma_ops_ptr->unmap_page(dev, sg_dma_address(sg), -+ sg_dma_len(sg), dir, attrs); -+ return 0; -+} -+ -+static void brcm_unmap_sg(struct device *dev, -+ struct scatterlist *sgl, int nents, -+ enum dma_data_direction dir, -+ unsigned long attrs) -+{ -+ int i; -+ struct scatterlist *sg; -+ -+ for_each_sg(sgl, sg, nents, i) -+ brcm_dma_ops_ptr->unmap_page(dev, sg_dma_address(sg), -+ sg_dma_len(sg), dir, attrs); -+} -+ -+static void brcm_sync_single_for_cpu(struct device *dev, -+ dma_addr_t handle, size_t size, -+ enum dma_data_direction dir) -+{ -+ handle = brcm_to_cpu(handle); -+ arch_dma_ops->sync_single_for_cpu(dev, handle, size, dir); -+} -+ -+static void brcm_sync_single_for_device(struct device *dev, -+ dma_addr_t handle, size_t size, -+ enum dma_data_direction dir) -+{ -+ handle = brcm_to_cpu(handle); -+ arch_dma_ops->sync_single_for_device(dev, handle, size, dir); -+} -+ -+static dma_addr_t brcm_map_resource(struct device *dev, phys_addr_t phys, -+ size_t size, -+ enum dma_data_direction dir, -+ unsigned long attrs) -+{ -+ if (arch_dma_ops->map_resource) -+ return brcm_to_pci(arch_dma_ops->map_resource -+ (dev, phys, size, dir, attrs)); -+ return brcm_to_pci((dma_addr_t)phys); -+} -+ -+static void brcm_unmap_resource(struct device *dev, dma_addr_t handle, -+ size_t size, enum dma_data_direction dir, -+ unsigned long attrs) -+{ -+ if (arch_dma_ops->unmap_resource) -+ arch_dma_ops->unmap_resource(dev, brcm_to_cpu(handle), size, -+ dir, attrs); -+} -+ -+void brcm_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, -+ int nents, enum dma_data_direction dir) -+{ -+ struct scatterlist *sg; -+ int i; -+ -+ for_each_sg(sgl, sg, nents, i) -+ brcm_dma_ops_ptr->sync_single_for_cpu(dev, sg_dma_address(sg), -+ sg->length, dir); -+} -+ -+void brcm_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, -+ int nents, enum dma_data_direction dir) -+{ -+ struct scatterlist *sg; -+ int i; -+ -+ for_each_sg(sgl, sg, nents, i) -+ brcm_dma_ops_ptr->sync_single_for_device(dev, -+ sg_dma_address(sg), -+ sg->length, dir); -+} -+ -+static int brcm_mapping_error(struct device *dev, dma_addr_t dma_addr) -+{ -+ return arch_dma_ops->mapping_error(dev, dma_addr); -+} -+ -+static int brcm_dma_supported(struct device *dev, u64 mask) -+{ -+ if (num_dma_ranges) { -+ /* -+ * It is our translated addresses that the EP will "see", so -+ * we check all of the ranges for the largest possible value. -+ */ -+ int i; -+ -+ for (i = 0; i < num_dma_ranges; i++) -+ if (dma_ranges[i].pci_addr + dma_ranges[i].size - 1 -+ > mask) -+ return 0; -+ return 1; -+ } -+ -+ return arch_dma_ops->dma_supported(dev, mask); -+} -+ -+#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK -+u64 brcm_get_required_mask)(struct device *dev) -+{ -+ return arch_dma_ops->get_required_mask(dev); -+} -+#endif -+ -+static const struct dma_map_ops brcm_dma_ops = { -+ .alloc = brcm_alloc, -+ .free = brcm_free, -+ .mmap = brcm_mmap, -+ .get_sgtable = brcm_get_sgtable, -+ .map_page = brcm_map_page, -+ .unmap_page = brcm_unmap_page, -+ .map_sg = brcm_map_sg, -+ .unmap_sg = brcm_unmap_sg, -+ .map_resource = brcm_map_resource, -+ .unmap_resource = brcm_unmap_resource, -+ .sync_single_for_cpu = brcm_sync_single_for_cpu, -+ .sync_single_for_device = brcm_sync_single_for_device, -+ .sync_sg_for_cpu = brcm_sync_sg_for_cpu, -+ .sync_sg_for_device = brcm_sync_sg_for_device, -+ .mapping_error = brcm_mapping_error, -+ .dma_supported = brcm_dma_supported, -+#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK -+ .get_required_mask = brcm_get_required_mask, -+#endif -+}; -+ -+static void brcm_set_dma_ops(struct device *dev) -+{ -+ int ret; -+ -+ if (IS_ENABLED(CONFIG_ARM64)) { -+ /* -+ * We are going to invoke get_dma_ops(). That -+ * function, at this point in time, invokes -+ * get_arch_dma_ops(), and for ARM64 that function -+ * returns a pointer to dummy_dma_ops. So then we'd -+ * like to call arch_setup_dma_ops(), but that isn't -+ * exported. Instead, we call of_dma_configure(), -+ * which is exported, and this calls -+ * arch_setup_dma_ops(). Once we do this the call to -+ * get_dma_ops() will work properly because -+ * dev->dma_ops will be set. -+ */ -+ ret = of_dma_configure(dev, dev->of_node, true); -+ if (ret) { -+ dev_err(dev, "of_dma_configure() failed: %d\n", ret); -+ return; -+ } -+ } -+ -+ arch_dma_ops = get_dma_ops(dev); -+ if (!arch_dma_ops) { -+ dev_err(dev, "failed to get arch_dma_ops\n"); -+ return; -+ } -+ -+ set_dma_ops(dev, &brcm_dma_ops); -+} -+ -+static int brcmstb_platform_notifier(struct notifier_block *nb, -+ unsigned long event, void *__dev) -+{ -+ struct device *dev = __dev; -+ -+ brcm_dma_ops_ptr = &brcm_dma_ops; -+ if (event != BUS_NOTIFY_ADD_DEVICE) -+ return NOTIFY_DONE; -+ -+ brcm_set_dma_ops(dev); -+ return NOTIFY_OK; -+} -+ -+static struct notifier_block brcmstb_platform_nb = { -+ .notifier_call = brcmstb_platform_notifier, -+}; -+ -+static int brcm_register_notifier(void) -+{ -+ return bus_register_notifier(&pci_bus_type, &brcmstb_platform_nb); -+} -+ -+static int brcm_unregister_notifier(void) -+{ -+ return bus_unregister_notifier(&pci_bus_type, &brcmstb_platform_nb); -+} -+ - static u32 rd_fld(void __iomem *p, u32 mask, int shift) - { - return (bcm_readl(p) & mask) >> shift; -@@ -597,9 +894,71 @@ static inline void brcm_pcie_perst_set(s - WR_FLD_RB(pcie->base, PCIE_MISC_PCIE_CTRL, PCIE_PERSTB, !val); - } - -+static int pci_dma_range_parser_init(struct of_pci_range_parser *parser, -+ struct device_node *node) -+{ -+ const int na = 3, ns = 2; -+ int rlen; -+ -+ parser->node = node; -+ parser->pna = of_n_addr_cells(node); -+ parser->np = parser->pna + na + ns; -+ -+ parser->range = of_get_property(node, "dma-ranges", &rlen); -+ if (!parser->range) -+ return -ENOENT; -+ -+ parser->end = parser->range + rlen / sizeof(__be32); -+ -+ return 0; -+} -+ -+static int brcm_pcie_parse_map_dma_ranges(struct brcm_pcie *pcie) -+{ -+ int i; -+ struct of_pci_range_parser parser; -+ struct device_node *dn = pcie->dn; -+ -+ /* -+ * Parse dma-ranges property if present. If there are multiple -+ * PCIe controllers, we only have to parse from one of them since -+ * the others will have an identical mapping. -+ */ -+ if (!pci_dma_range_parser_init(&parser, dn)) { -+ unsigned int max_ranges -+ = (parser.end - parser.range) / parser.np; -+ -+ dma_ranges = kcalloc(max_ranges, sizeof(struct of_pci_range), -+ GFP_KERNEL); -+ if (!dma_ranges) -+ return -ENOMEM; -+ -+ for (i = 0; of_pci_range_parser_one(&parser, dma_ranges + i); -+ i++) -+ num_dma_ranges++; -+ } -+ -+ for (i = 0, num_memc = 0; i < BRCM_MAX_SCB; i++) { -+ u64 size = brcmstb_memory_memc_size(i); -+ -+ if (size == (u64)-1) { -+ dev_err(pcie->dev, "cannot get memc%d size", i); -+ return -EINVAL; -+ } else if (size) { -+ scb_size[i] = roundup_pow_of_two_64(size); -+ num_memc++; -+ } else { -+ break; -+ } -+ } -+ -+ return 0; -+} -+ - static int brcm_pcie_add_controller(struct brcm_pcie *pcie) - { - int i, ret = 0; -+ struct device *dev = pcie->dev; - - mutex_lock(&brcm_pcie_lock); - if (num_pcie > 0) { -@@ -607,12 +966,21 @@ static int brcm_pcie_add_controller(stru - goto done; - } - -+ ret = brcm_register_notifier(); -+ if (ret) { -+ dev_err(dev, "failed to register pci bus notifier\n"); -+ goto done; -+ } -+ ret = brcm_pcie_parse_map_dma_ranges(pcie); -+ if (ret) -+ goto done; -+ - /* Determine num_memc and their sizes */ - for (i = 0, num_memc = 0; i < BRCM_MAX_SCB; i++) { - u64 size = brcmstb_memory_memc_size(i); - - if (size == (u64)-1) { -- dev_err(pcie->dev, "cannot get memc%d size\n", i); -+ dev_err(dev, "cannot get memc%d size\n", i); - ret = -EINVAL; - goto done; - } else if (size) { -@@ -636,8 +1004,16 @@ done: - static void brcm_pcie_remove_controller(struct brcm_pcie *pcie) - { - mutex_lock(&brcm_pcie_lock); -- if (--num_pcie == 0) -- num_memc = 0; -+ if (--num_pcie > 0) -+ goto out; -+ -+ if (brcm_unregister_notifier()) -+ dev_err(pcie->dev, "failed to unregister pci bus notifier\n"); -+ kfree(dma_ranges); -+ dma_ranges = NULL; -+ num_dma_ranges = 0; -+ num_memc = 0; -+out: - mutex_unlock(&brcm_pcie_lock); - } - -@@ -757,6 +1133,38 @@ static int brcm_pcie_setup(struct brcm_p - */ - rc_bar2_offset = 0; - -+ if (dma_ranges) { -+ /* -+ * The best-case scenario is to place the inbound -+ * region in the first 4GB of pci-space, as some -+ * legacy devices can only address 32bits. -+ * We would also like to put the MSI under 4GB -+ * as well, since some devices require a 32bit -+ * MSI target address. -+ */ -+ if (total_mem_size <= 0xc0000000ULL && -+ rc_bar2_size <= 0x100000000ULL) { -+ rc_bar2_offset = 0; -+ } else { -+ /* -+ * The system memory is 4GB or larger so we -+ * cannot start the inbound region at location -+ * 0 (since we have to allow some space for -+ * outbound memory @ 3GB). So instead we -+ * start it at the 1x multiple of its size -+ */ -+ rc_bar2_offset = rc_bar2_size; -+ } -+ -+ } else { -+ /* -+ * Set simple configuration based on memory sizes -+ * only. We always start the viewport at address 0, -+ * and set the MSI target address accordingly. -+ */ -+ rc_bar2_offset = 0; -+ } -+ - tmp = lower_32_bits(rc_bar2_offset); - tmp = INSERT_FIELD(tmp, PCIE_MISC_RC_BAR2_CONFIG_LO, SIZE, - encode_ibar_size(rc_bar2_size)); -@@ -967,7 +1375,6 @@ static int brcm_pcie_probe(struct platfo - struct brcm_pcie *pcie; - struct resource *res; - void __iomem *base; -- u32 tmp; - struct pci_host_bridge *bridge; - struct pci_bus *child; - -@@ -984,11 +1391,6 @@ static int brcm_pcie_probe(struct platfo - return -EINVAL; - } - -- if (of_property_read_u32(dn, "dma-ranges", &tmp) == 0) { -- dev_err(&pdev->dev, "cannot yet handle dma-ranges\n"); -- return -EINVAL; -- } -- - data = of_id->data; - pcie->reg_offsets = data->offsets; - pcie->reg_field_info = data->reg_field_info;