1 From 1cff637824f8e5626a754cbbb97bc8f3730a88f0 Mon Sep 17 00:00:00 2001
2 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= <noralf@tronnes.org>
3 Date: Thu, 1 Jul 2021 19:07:48 +0200
4 Subject: [PATCH] drm/gud: Use scatter-gather USB bulk transfer
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
9 [ drm-misc commit 2eecd93b743b5611cd3654698794b4d0cefdc9ee ]
11 There'a limit to how big a kmalloc buffer can be, and as memory gets
12 fragmented it becomes more difficult to get big buffers. The downside of
13 smaller buffers is that the driver has to split the transfer up which
14 hampers performance. Compression might also take a hit because of the
17 Solve this by allocating the transfer buffer using vmalloc and create a
18 SG table to be passed on to the USB subsystem. vmalloc_32() is used to
19 avoid DMA bounce buffers on USB controllers that can only access 32-bit
22 This also solves the problem that split transfers can give host side
23 tearing since flushing is decoupled from rendering.
25 usb_sg_wait() doesn't have timeout handling builtin, so it is wrapped in
26 a timer like 4 out of 6 users in the kernel have done.
29 - Use DIV_ROUND_UP (Linus)
30 - Add timeout note to the commit log (Linus)
31 - Expand note about upper buffer limit (Linus)
32 - Change var name s/timer/ctx/ in gud_usb_bulk_timeout()
34 Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
35 Signed-off-by: Noralf Trønnes <noralf@tronnes.org>
36 Link: https://patchwork.freedesktop.org/patch/msgid/20210701170748.58009-2-noralf@tronnes.org
38 drivers/gpu/drm/gud/gud_drv.c | 50 +++++++++++++++++++++---------
39 drivers/gpu/drm/gud/gud_internal.h | 2 ++
40 drivers/gpu/drm/gud/gud_pipe.c | 47 ++++++++++++++++++++++++----
41 3 files changed, 78 insertions(+), 21 deletions(-)
43 --- a/drivers/gpu/drm/gud/gud_drv.c
44 +++ b/drivers/gpu/drm/gud/gud_drv.c
45 @@ -407,13 +407,40 @@ static struct drm_driver gud_drm_driver
49 +static int gud_alloc_bulk_buffer(struct gud_device *gdrm)
51 + unsigned int i, num_pages;
52 + struct page **pages;
56 + gdrm->bulk_buf = vmalloc_32(gdrm->bulk_len);
57 + if (!gdrm->bulk_buf)
60 + num_pages = DIV_ROUND_UP(gdrm->bulk_len, PAGE_SIZE);
61 + pages = kmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL);
65 + for (i = 0, ptr = gdrm->bulk_buf; i < num_pages; i++, ptr += PAGE_SIZE)
66 + pages[i] = vmalloc_to_page(ptr);
68 + ret = sg_alloc_table_from_pages(&gdrm->bulk_sgt, pages, num_pages,
69 + 0, gdrm->bulk_len, GFP_KERNEL);
75 static void gud_free_buffers_and_mutex(void *data)
77 struct gud_device *gdrm = data;
79 vfree(gdrm->compress_buf);
80 gdrm->compress_buf = NULL;
81 - kfree(gdrm->bulk_buf);
82 + sg_free_table(&gdrm->bulk_sgt);
83 + vfree(gdrm->bulk_buf);
84 gdrm->bulk_buf = NULL;
85 mutex_destroy(&gdrm->ctrl_lock);
87 @@ -550,24 +577,17 @@ static int gud_probe(struct usb_interfac
89 if (desc.max_buffer_size)
90 max_buffer_size = le32_to_cpu(desc.max_buffer_size);
93 - * Use plain kmalloc here since devm_kmalloc() places struct devres at the beginning
94 - * of the buffer it allocates. This wastes a lot of memory when allocating big buffers.
95 - * Asking for 2M would actually allocate 4M. This would also prevent getting the biggest
96 - * possible buffer potentially leading to split transfers.
98 - gdrm->bulk_buf = kmalloc(max_buffer_size, GFP_KERNEL | __GFP_NOWARN);
99 - if (!gdrm->bulk_buf) {
100 - max_buffer_size = roundup_pow_of_two(max_buffer_size) / 2;
101 - if (max_buffer_size < SZ_512K)
105 + /* Prevent a misbehaving device from allocating loads of RAM. 4096x4096@XRGB8888 = 64 MB */
106 + if (max_buffer_size > SZ_64M)
107 + max_buffer_size = SZ_64M;
109 gdrm->bulk_pipe = usb_sndbulkpipe(interface_to_usbdev(intf), usb_endpoint_num(bulk_out));
110 gdrm->bulk_len = max_buffer_size;
112 + ret = gud_alloc_bulk_buffer(gdrm);
116 if (gdrm->compression & GUD_COMPRESSION_LZ4) {
117 gdrm->lz4_comp_mem = devm_kmalloc(dev, LZ4_MEM_COMPRESS, GFP_KERNEL);
118 if (!gdrm->lz4_comp_mem)
119 --- a/drivers/gpu/drm/gud/gud_internal.h
120 +++ b/drivers/gpu/drm/gud/gud_internal.h
123 #include <linux/list.h>
124 #include <linux/mutex.h>
125 +#include <linux/scatterlist.h>
126 #include <linux/usb.h>
127 #include <linux/workqueue.h>
128 #include <uapi/drm/drm_fourcc.h>
129 @@ -26,6 +27,7 @@ struct gud_device {
130 unsigned int bulk_pipe;
133 + struct sg_table bulk_sgt;
137 --- a/drivers/gpu/drm/gud/gud_pipe.c
138 +++ b/drivers/gpu/drm/gud/gud_pipe.c
139 @@ -219,13 +219,51 @@ vunmap:
143 +struct gud_usb_bulk_context {
144 + struct timer_list timer;
145 + struct usb_sg_request sgr;
148 +static void gud_usb_bulk_timeout(struct timer_list *t)
150 + struct gud_usb_bulk_context *ctx = from_timer(ctx, t, timer);
152 + usb_sg_cancel(&ctx->sgr);
155 +static int gud_usb_bulk(struct gud_device *gdrm, size_t len)
157 + struct gud_usb_bulk_context ctx;
160 + ret = usb_sg_init(&ctx.sgr, gud_to_usb_device(gdrm), gdrm->bulk_pipe, 0,
161 + gdrm->bulk_sgt.sgl, gdrm->bulk_sgt.nents, len, GFP_KERNEL);
165 + timer_setup_on_stack(&ctx.timer, gud_usb_bulk_timeout, 0);
166 + mod_timer(&ctx.timer, jiffies + msecs_to_jiffies(3000));
168 + usb_sg_wait(&ctx.sgr);
170 + if (!del_timer_sync(&ctx.timer))
172 + else if (ctx.sgr.status < 0)
173 + ret = ctx.sgr.status;
174 + else if (ctx.sgr.bytes != len)
177 + destroy_timer_on_stack(&ctx.timer);
182 static int gud_flush_rect(struct gud_device *gdrm, struct drm_framebuffer *fb,
183 const struct drm_format_info *format, struct drm_rect *rect)
185 - struct usb_device *usb = gud_to_usb_device(gdrm);
186 struct gud_set_buffer_req req;
187 - int ret, actual_length;
191 drm_dbg(&gdrm->drm, "Flushing [FB:%d] " DRM_RECT_FMT "\n", fb->base.id, DRM_RECT_ARG(rect));
193 @@ -254,10 +292,7 @@ static int gud_flush_rect(struct gud_dev
197 - ret = usb_bulk_msg(usb, gdrm->bulk_pipe, gdrm->bulk_buf, trlen,
198 - &actual_length, msecs_to_jiffies(3000));
199 - if (!ret && trlen != actual_length)
201 + ret = gud_usb_bulk(gdrm, trlen);
203 gdrm->stats_num_errors++;