X-Git-Url: http://git.openwrt.org/?a=blobdiff_plain;f=target%2Flinux%2Fbrcm2708%2Fpatches-4.19%2F950-0042-Speed-up-console-framebuffer-imageblit-function.patch;fp=target%2Flinux%2Fbrcm2708%2Fpatches-4.19%2F950-0042-Speed-up-console-framebuffer-imageblit-function.patch;h=0000000000000000000000000000000000000000;hb=c2308a7e4adbb2acc8ff149f91d1ca46801c135e;hp=17d6137728d1275ffb287433633da449850370af;hpb=67dcc43f3a22dc3a7ac07a7065971b426feeb043;p=openwrt%2Fstaging%2Fchunkeey.git

diff --git a/target/linux/brcm2708/patches-4.19/950-0042-Speed-up-console-framebuffer-imageblit-function.patch b/target/linux/brcm2708/patches-4.19/950-0042-Speed-up-console-framebuffer-imageblit-function.patch
deleted file mode 100644
index 17d6137728..0000000000
--- a/target/linux/brcm2708/patches-4.19/950-0042-Speed-up-console-framebuffer-imageblit-function.patch
+++ /dev/null
@@ -1,209 +0,0 @@
-From f4489532d7a73ded68e1b8a815a71b0fe25e9e21 Mon Sep 17 00:00:00 2001
-From: Harm Hanemaaijer <fgenfb@yahoo.com>
-Date: Thu, 20 Jun 2013 20:21:39 +0200
-Subject: [PATCH] Speed up console framebuffer imageblit function
-
-Especially on platforms with a slower CPU but a relatively high
-framebuffer fill bandwidth, like current ARM devices, the existing
-console monochrome imageblit function used to draw console text is
-suboptimal for common pixel depths such as 16bpp and 32bpp. The existing
-code is quite general and can deal with several pixel depths. By creating
-special case functions for 16bpp and 32bpp, by far the most common pixel
-formats used on modern systems, a significant speed-up is attained
-which can be readily felt on ARM-based devices like the Raspberry Pi
-and the Allwinner platform, but should help any platform using the
-fb layer.
-
-The special case functions allow constant folding, eliminating a number
-of instructions including divide operations, and allow the use of an
-unrolled loop, eliminating instructions with a variable shift size,
-reducing source memory access instructions, and eliminating excessive
-branching. These unrolled loops also allow much better code optimization
-by the C compiler. The code that selects which optimized variant is used
-is also simplified, eliminating integer divide instructions.
-
-The speed-up, measured by timing 'cat file.txt' in the console, varies
-between 40% and 70%, when testing on the Raspberry Pi and Allwinner
-ARM-based platforms, depending on font size and the pixel depth, with
-the greater benefit for 32bpp.
-
-Signed-off-by: Harm Hanemaaijer <fgenfb@yahoo.com>
----
- drivers/video/fbdev/core/cfbimgblt.c | 152 ++++++++++++++++++++++++++-
- 1 file changed, 147 insertions(+), 5 deletions(-)
-
---- a/drivers/video/fbdev/core/cfbimgblt.c
-+++ b/drivers/video/fbdev/core/cfbimgblt.c
-@@ -28,6 +28,11 @@
-  *
-  *  Also need to add code to deal with cards endians that are different than
-  *  the native cpu endians. I also need to deal with MSB position in the word.
-+ *  Modified by Harm Hanemaaijer (fgenfb@yahoo.com) 2013:
-+ *  - Provide optimized versions of fast_imageblit for 16 and 32bpp that are
-+ *    significantly faster than the previous implementation.
-+ *  - Simplify the fast/slow_imageblit selection code, avoiding integer
-+ *    divides.
-  */
- #include <linux/module.h>
- #include <linux/string.h>
-@@ -262,6 +267,133 @@ static inline void fast_imageblit(const
- 	}
- }	
- 	
-+/*
-+ * Optimized fast_imageblit for bpp == 16. ppw = 2, bit_mask = 3 folded
-+ * into the code, main loop unrolled.
-+ */
-+
-+static inline void fast_imageblit16(const struct fb_image *image,
-+				    struct fb_info *p, u8 __iomem * dst1,
-+				    u32 fgcolor, u32 bgcolor)
-+{
-+	u32 fgx = fgcolor, bgx = bgcolor;
-+	u32 spitch = (image->width + 7) / 8;
-+	u32 end_mask, eorx;
-+	const char *s = image->data, *src;
-+	u32 __iomem *dst;
-+	const u32 *tab = NULL;
-+	int i, j, k;
-+
-+	tab = fb_be_math(p) ? cfb_tab16_be : cfb_tab16_le;
-+
-+	fgx <<= 16;
-+	bgx <<= 16;
-+	fgx |= fgcolor;
-+	bgx |= bgcolor;
-+
-+	eorx = fgx ^ bgx;
-+	k = image->width / 2;
-+
-+	for (i = image->height; i--;) {
-+		dst = (u32 __iomem *) dst1;
-+		src = s;
-+
-+		j = k;
-+		while (j >= 4) {
-+			u8 bits = *src;
-+			end_mask = tab[(bits >> 6) & 3];
-+			FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
-+			end_mask = tab[(bits >> 4) & 3];
-+			FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
-+			end_mask = tab[(bits >> 2) & 3];
-+			FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
-+			end_mask = tab[bits & 3];
-+			FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
-+			src++;
-+			j -= 4;
-+		}
-+		if (j != 0) {
-+			u8 bits = *src;
-+			end_mask = tab[(bits >> 6) & 3];
-+			FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
-+			if (j >= 2) {
-+				end_mask = tab[(bits >> 4) & 3];
-+				FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
-+				if (j == 3) {
-+					end_mask = tab[(bits >> 2) & 3];
-+					FB_WRITEL((end_mask & eorx) ^ bgx, dst);
-+				}
-+			}
-+		}
-+		dst1 += p->fix.line_length;
-+		s += spitch;
-+	}
-+}
-+
-+/*
-+ * Optimized fast_imageblit for bpp == 32. ppw = 1, bit_mask = 1 folded
-+ * into the code, main loop unrolled.
-+ */
-+
-+static inline void fast_imageblit32(const struct fb_image *image,
-+				    struct fb_info *p, u8 __iomem * dst1,
-+				    u32 fgcolor, u32 bgcolor)
-+{
-+	u32 fgx = fgcolor, bgx = bgcolor;
-+	u32 spitch = (image->width + 7) / 8;
-+	u32 end_mask, eorx;
-+	const char *s = image->data, *src;
-+	u32 __iomem *dst;
-+	const u32 *tab = NULL;
-+	int i, j, k;
-+
-+	tab = cfb_tab32;
-+
-+	eorx = fgx ^ bgx;
-+	k = image->width;
-+
-+	for (i = image->height; i--;) {
-+		dst = (u32 __iomem *) dst1;
-+		src = s;
-+
-+		j = k;
-+		while (j >= 8) {
-+			u8 bits = *src;
-+			end_mask = tab[(bits >> 7) & 1];
-+			FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
-+			end_mask = tab[(bits >> 6) & 1];
-+			FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
-+			end_mask = tab[(bits >> 5) & 1];
-+			FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
-+			end_mask = tab[(bits >> 4) & 1];
-+			FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
-+			end_mask = tab[(bits >> 3) & 1];
-+			FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
-+			end_mask = tab[(bits >> 2) & 1];
-+			FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
-+			end_mask = tab[(bits >> 1) & 1];
-+			FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
-+			end_mask = tab[bits & 1];
-+			FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
-+			src++;
-+			j -= 8;
-+		}
-+		if (j != 0) {
-+			u32 bits = (u32) * src;
-+			while (j > 1) {
-+				end_mask = tab[(bits >> 7) & 1];
-+				FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
-+				bits <<= 1;
-+				j--;
-+			}
-+			end_mask = tab[(bits >> 7) & 1];
-+			FB_WRITEL((end_mask & eorx) ^ bgx, dst);
-+		}
-+		dst1 += p->fix.line_length;
-+		s += spitch;
-+	}
-+}
-+
- void cfb_imageblit(struct fb_info *p, const struct fb_image *image)
- {
- 	u32 fgcolor, bgcolor, start_index, bitstart, pitch_index = 0;
-@@ -294,11 +426,21 @@ void cfb_imageblit(struct fb_info *p, co
- 			bgcolor = image->bg_color;
- 		}	
- 		
--		if (32 % bpp == 0 && !start_index && !pitch_index && 
--		    ((width & (32/bpp-1)) == 0) &&
--		    bpp >= 8 && bpp <= 32) 			
--			fast_imageblit(image, p, dst1, fgcolor, bgcolor);
--		else 
-+		if (!start_index && !pitch_index) {
-+			if (bpp == 32)
-+				fast_imageblit32(image, p, dst1, fgcolor,
-+						 bgcolor);
-+			else if (bpp == 16 && (width & 1) == 0)
-+				fast_imageblit16(image, p, dst1, fgcolor,
-+						 bgcolor);
-+			else if (bpp == 8 && (width & 3) == 0)
-+				fast_imageblit(image, p, dst1, fgcolor,
-+					       bgcolor);
-+			else
-+				slow_imageblit(image, p, dst1, fgcolor,
-+					       bgcolor,
-+					       start_index, pitch_index);
-+		} else
- 			slow_imageblit(image, p, dst1, fgcolor, bgcolor,
- 					start_index, pitch_index);
- 	} else