summaryrefslogtreecommitdiffstats
path: root/utils/bash/patches/007-bash-5.3-patch-8.patch
blob: ea49e910deea0232bdb5eaccd647da27d2cddb2d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
From 11ff9325c1497c5ef1de7cf6fde84e1094eae178 Mon Sep 17 00:00:00 2001
From: Chet Ramey <chet.ramey@case.edu>
Date: Wed, 26 Nov 2025 12:52:29 -0500
Subject: Bash-5.3 patch 8: fix issue with calling mbrtowc too much when
 translating ansic-single-quoted strings

--- a/lib/sh/strtrans.c
+++ b/lib/sh/strtrans.c
@@ -55,7 +55,7 @@ ansicstr (const char *string, size_t len
   const char *s;
   unsigned long v;
   size_t clen;
-  int mb_cur_max;
+  size_t mb_cur_max;
 #if defined (HANDLE_MULTIBYTE)
   wchar_t wc;
 #endif
@@ -63,7 +63,7 @@ ansicstr (const char *string, size_t len
   if (string == 0 || *string == '\0')
     return ((char *)0);
 
-  mb_cur_max = MB_CUR_MAX;
+  mb_cur_max = locale_mb_cur_max;
 #if defined (HANDLE_MULTIBYTE)
   temp = 4*len + 4;
   if (temp < 12)
@@ -79,10 +79,14 @@ ansicstr (const char *string, size_t len
 	{
 	  clen = 1;
 #if defined (HANDLE_MULTIBYTE)
-	  if ((locale_utf8locale && (c & 0x80)) ||
-	      (locale_utf8locale == 0 && mb_cur_max > 0 && is_basic (c) == 0))
+	  /* We read an entire multibyte character at a time if we are in a
+	     locale where a backslash can possibly appear as part of a
+	     multibyte character. UTF-8 encodings prohibit this. */
+	  if (locale_utf8locale == 0 && mb_cur_max > 1 && is_basic (c) == 0)
 	    {
 	      clen = mbrtowc (&wc, s - 1, mb_cur_max, 0);
+	      if (MB_NULLWCH (clen))
+		break;			/* it apparently can happen */
 	      if (MB_INVALIDCH (clen))
 		clen = 1;
 	    }
@@ -227,30 +231,24 @@ ansic_quote (const char *str, int flags,
 {
   char *r, *ret;
   const char  *s;
-  size_t l, rsize;
   unsigned char c;
+#if defined (HANDLE_MULTIBYTE)
   size_t clen;
   int b;
-#if defined (HANDLE_MULTIBYTE)
   wchar_t wc;
+  DECLARE_MBSTATE;
 #endif
 
   if (str == 0 || *str == 0)
     return ((char *)0);
 
-  l = strlen (str);
-  rsize = 4 * l + 4;
-  r = ret = (char *)xmalloc (rsize);
+  r = ret = (char *)xmalloc (4 * strlen (str) + 4);
 
   *r++ = '$';
   *r++ = '\'';
 
   for (s = str; c = *s; s++)
     {
-      b = 1;		/* 1 == add backslash; 0 == no backslash */
-      l = 1;
-      clen = 1;
-
       switch (c)
 	{
 	case ESC: c = 'E'; break;
@@ -266,39 +264,42 @@ ansic_quote (const char *str, int flags,
 	  break;
 	default:
 #if defined (HANDLE_MULTIBYTE)
-	  b = is_basic (c);
-	  /* XXX - clen comparison to 0 is dicey */
-	  if ((b == 0 && ((clen = mbrtowc (&wc, s, MB_CUR_MAX, 0)) < 0 || MB_INVALIDCH (clen) || iswprint (wc) == 0)) ||
-	      (b == 1 && ISPRINT (c) == 0))
-#else
-	  if (ISPRINT (c) == 0)
-#endif
+	  if ((locale_utf8locale && (c & 0x80)) ||
+	      (locale_utf8locale == 0 && locale_mb_cur_max > 1 && is_basic (c) == 0))
 	    {
-	      *r++ = '\\';
-	      *r++ = TOCHAR ((c >> 6) & 07);
-	      *r++ = TOCHAR ((c >> 3) & 07);
-	      *r++ = TOCHAR (c & 07);
-	      continue;
+	      clen = mbrtowc (&wc, s, locale_mb_cur_max, &state);
+	      if (MB_NULLWCH (clen))
+		goto quote_end;
+	      if (MB_INVALIDCH (clen))
+		INITIALIZE_MBSTATE;
+	      else if (iswprint (wc))
+		{
+		  for (b = 0; b < (int)clen; b++)
+		    *r++ = (unsigned char)s[b];
+		  s += clen - 1;	/* -1 because of the increment above */
+		  continue;
+		}
 	    }
-	  l = 0;
-	  break;
+	  else
+#endif
+	    if (ISPRINT (c))
+	      {
+		*r++ = c;
+		continue;
+	      }
+
+	  *r++ = '\\';
+	  *r++ = TOCHAR ((c >> 6) & 07);
+	  *r++ = TOCHAR ((c >> 3) & 07);
+	  *r++ = TOCHAR (c & 07);
+	  continue;
 	}
-      if (b == 0 && clen == 0)
-	break;
 
-      if (l)
-	*r++ = '\\';
-
-      if (clen == 1)
-	*r++ = c;
-      else
-	{
-	  for (b = 0; b < (int)clen; b++)
-	    *r++ = (unsigned char)s[b];
-	  s += clen - 1;	/* -1 because of the increment above */
-	}
+      *r++ = '\\';
+      *r++ = c;
     }
 
+quote_end:
   *r++ = '\'';
   *r = '\0';
   if (rlen)
@@ -348,7 +349,8 @@ ansic_shouldquote (const char *string)
   for (s = string; c = *s; s++)
     {
 #if defined (HANDLE_MULTIBYTE)
-      if (is_basic (c) == 0)
+      if ((locale_utf8locale && (c & 0x80)) ||
+	  (locale_utf8locale == 0 && locale_mb_cur_max > 1 && is_basic (c) == 0))
 	return (ansic_wshouldquote (s));
 #endif
       if (ISPRINT (c) == 0)
--- a/patchlevel.h
+++ b/patchlevel.h
@@ -25,6 +25,6 @@
    regexp `^#define[ 	]*PATCHLEVEL', since that's what support/mkversion.sh
    looks for to find the patch level (for the sccs version string). */
 
-#define PATCHLEVEL 7
+#define PATCHLEVEL 8
 
 #endif /* _PATCHLEVEL_H_ */