1 From b7ecff22e77887626fd8e8608c4dd73bc7b7366f Mon Sep 17 00:00:00 2001
2 From: George Joseph <gjoseph@sangoma.com>
3 Date: Tue, 18 Jan 2022 06:14:31 -0700
4 Subject: [PATCH] Additional multipart improvements
6 Added the following APIs:
7 pjsip_multipart_find_part_by_header()
8 pjsip_multipart_find_part_by_header_str()
9 pjsip_multipart_find_part_by_cid_str()
10 pjsip_multipart_find_part_by_cid_uri()
12 pjsip/include/pjsip/sip_multipart.h | 83 ++++++++++
13 pjsip/src/pjsip/sip_multipart.c | 223 +++++++++++++++++++++++++++
14 pjsip/src/test/multipart_test.c | 225 +++++++++++++++++++++++++++-
15 3 files changed, 530 insertions(+), 1 deletion(-)
17 --- a/pjsip/include/pjsip/sip_multipart.h
18 +++ b/pjsip/include/pjsip/sip_multipart.h
19 @@ -154,6 +154,89 @@ pjsip_multipart_find_part( const pjsip_m
20 const pjsip_multipart_part *start);
23 + * Find a body inside multipart bodies which has a header matching the
24 + * supplied one. Most useful for finding a part with a specific Content-ID.
26 + * @param pool Memory pool to use for temp space.
27 + * @param mp The multipart body.
28 + * @param search_hdr Header to search for.
29 + * @param start If specified, the search will begin at
30 + * start->next part. Otherwise it will begin at
31 + * the first part in the multipart bodies.
33 + * @return The first part which has a header matching the
34 + * specified one, or NULL if not found.
36 +PJ_DECL(pjsip_multipart_part*)
37 +pjsip_multipart_find_part_by_header(pj_pool_t *pool,
38 + const pjsip_msg_body *mp,
40 + const pjsip_multipart_part *start);
43 + * Find a body inside multipart bodies which has a header matching the
44 + * supplied name and value. Most useful for finding a part with a specific
47 + * @param pool Memory pool to use for temp space.
48 + * @param mp The multipart body.
49 + * @param hdr_name Header name to search for.
50 + * @param hdr_value Header value search for.
51 + * @param start If specified, the search will begin at
52 + * start->next part. Otherwise it will begin at
53 + * the first part in the multipart bodies.
55 + * @return The first part which has a header matching the
56 + * specified one, or NULL if not found.
58 +PJ_DECL(pjsip_multipart_part*)
59 +pjsip_multipart_find_part_by_header_str(pj_pool_t *pool,
60 + const pjsip_msg_body *mp,
61 + const pj_str_t *hdr_name,
62 + const pj_str_t *hdr_value,
63 + const pjsip_multipart_part *start);
68 + * Find a body inside multipart bodies which has a Content-ID value matching the
69 + * supplied "cid" URI in pj_str form. The "cid:" scheme will be assumed if the
70 + * URL doesn't start with it. Enclosing angle brackets will also be handled
71 + * correctly if they exist.
73 + * @see RFC2392 Content-ID and Message-ID Uniform Resource Locators
75 + * @param pool Memory pool to use for temp space.
76 + * @param mp The multipart body.
77 + * @param cid The "cid" URI to search for in pj_str form.
79 + * @return The first part which has a Content-ID header matching the
80 + * specified "cid" URI. or NULL if not found.
82 +PJ_DECL(pjsip_multipart_part*)
83 +pjsip_multipart_find_part_by_cid_str(pj_pool_t *pool,
84 + const pjsip_msg_body *mp,
88 + * Find a body inside multipart bodies which has a Content-ID value matching the
89 + * supplied "cid" URI.
91 + * @see RFC2392 Content-ID and Message-ID Uniform Resource Locators
93 + * @param pool Memory pool to use for temp space.
94 + * @param mp The multipart body.
95 + * @param cid The "cid" URI to search for.
97 + * @return The first part which had a Content-ID header matching the
98 + * specified "cid" URI. or NULL if not found.
100 +PJ_DECL(pjsip_multipart_part*)
101 +pjsip_multipart_find_part_by_cid_uri(pj_pool_t *pool,
102 + const pjsip_msg_body *mp,
103 + pjsip_other_uri *cid_uri);
106 * Parse multipart message.
108 * @param pool Memory pool.
109 --- a/pjsip/src/pjsip/sip_multipart.c
110 +++ b/pjsip/src/pjsip/sip_multipart.c
112 #include <pjsip/sip_multipart.h>
113 #include <pjsip/sip_parser.h>
114 #include <pjlib-util/scanner.h>
115 +#include <pjlib-util/string.h>
116 #include <pj/assert.h>
117 #include <pj/ctype.h>
118 #include <pj/errno.h>
119 @@ -416,6 +417,220 @@ pjsip_multipart_find_part( const pjsip_m
124 + * Find a body inside multipart bodies which has the header and value.
126 +PJ_DEF(pjsip_multipart_part*)
127 +pjsip_multipart_find_part_by_header_str(pj_pool_t *pool,
128 + const pjsip_msg_body *mp,
129 + const pj_str_t *hdr_name,
130 + const pj_str_t *hdr_value,
131 + const pjsip_multipart_part *start)
133 + struct multipart_data *m_data;
134 + pjsip_multipart_part *part;
135 + pjsip_hdr *found_hdr;
136 + pj_str_t found_hdr_str;
137 + pj_str_t found_hdr_value;
138 + pj_size_t expected_hdr_slen;
139 + pj_size_t buf_size;
141 +#define REASONABLE_PADDING 32
142 +#define SEPARATOR_LEN 2
143 + /* Must specify mandatory params */
144 + PJ_ASSERT_RETURN(mp && hdr_name && hdr_value, NULL);
146 + /* mp must really point to an actual multipart msg body */
147 + PJ_ASSERT_RETURN(mp->print_body==&multipart_print_body, NULL);
150 + * We'll need to "print" each header we find to test it but
151 + * allocating a buffer of PJSIP_MAX_URL_SIZE is overkill.
152 + * Instead, we'll allocate one large enough to hold the search
153 + * header name, the ": " separator, the search hdr value, and
154 + * the NULL terminator. If we can't print the found header
155 + * into that buffer then it can't be a match.
157 + * Some header print functions such as generic_int require enough
158 + * space to print the maximum possible header length so we'll
159 + * add a reasonable amount to the print buffer size.
161 + expected_hdr_slen = hdr_name->slen + SEPARATOR_LEN + hdr_value->slen;
162 + buf_size = expected_hdr_slen + REASONABLE_PADDING;
163 + found_hdr_str.ptr = pj_pool_alloc(pool, buf_size);
164 + found_hdr_str.slen = 0;
165 + hdr_name_len = hdr_name->slen + SEPARATOR_LEN;
167 + m_data = (struct multipart_data*)mp->data;
170 + part = start->next;
172 + part = m_data->part_head.next;
174 + while (part != &m_data->part_head) {
176 + while ((found_hdr = pjsip_hdr_find_by_name(&part->hdr, hdr_name,
177 + (found_hdr ? found_hdr->next : NULL))) != NULL) {
179 + found_hdr_str.slen = pjsip_hdr_print_on((void*) found_hdr, found_hdr_str.ptr, buf_size);
181 + * If the buffer was too small (slen = -1) or the result wasn't
182 + * the same length as the search header, it can't be a match.
184 + if (found_hdr_str.slen != expected_hdr_slen) {
188 + * Set the value overlay to start at the found header value...
190 + found_hdr_value.ptr = found_hdr_str.ptr + hdr_name_len;
191 + found_hdr_value.slen = found_hdr_str.slen - hdr_name_len;
192 + /* ...and compare it to the supplied header value. */
193 + if (pj_strcmp(hdr_value, &found_hdr_value) == 0) {
200 +#undef SEPARATOR_LEN
201 +#undef REASONABLE_PADDING
204 +PJ_DEF(pjsip_multipart_part*)
205 +pjsip_multipart_find_part_by_header(pj_pool_t *pool,
206 + const pjsip_msg_body *mp,
208 + const pjsip_multipart_part *start)
210 + struct multipart_data *m_data;
211 + pjsip_hdr *search_hdr = search_for;
212 + pj_str_t search_buf;
214 + /* Must specify mandatory params */
215 + PJ_ASSERT_RETURN(mp && search_hdr, NULL);
217 + /* mp must really point to an actual multipart msg body */
218 + PJ_ASSERT_RETURN(mp->print_body==&multipart_print_body, NULL);
221 + * Unfortunately, there isn't enough information to determine
222 + * the maximum printed size of search_hdr at this point so we
223 + * have to allocate a reasonable max.
225 + search_buf.ptr = pj_pool_alloc(pool, PJSIP_MAX_URL_SIZE);
226 + search_buf.slen = pjsip_hdr_print_on(search_hdr, search_buf.ptr, PJSIP_MAX_URL_SIZE - 1);
227 + if (search_buf.slen <= 0) {
231 + * Set the header value to start after the header name plus the ":", then
232 + * strip leading and trailing whitespace.
234 + search_buf.ptr += (search_hdr->name.slen + 1);
235 + search_buf.slen -= (search_hdr->name.slen + 1);
236 + pj_strtrim(&search_buf);
238 + return pjsip_multipart_find_part_by_header_str(pool, mp, &search_hdr->name, &search_buf, start);
242 + * Convert a Content-ID URI to it's corresponding header value.
244 + * A "cid" URL is converted to the corresponding Content-ID message
245 + * header by removing the "cid:" prefix, converting the % encoded
246 + * character(s) to their equivalent US-ASCII characters, and enclosing
247 + * the remaining parts with an angle bracket pair, "<" and ">".
249 + * This implementation will accept URIs with or without the "cid:"
250 + * scheme and optional angle brackets.
252 +static pj_str_t cid_uri_to_hdr_value(pj_pool_t *pool, pj_str_t *cid_uri)
254 + pj_size_t cid_len = pj_strlen(cid_uri);
255 + pj_size_t alloc_len = cid_len + 2 /* for the leading and trailing angle brackets */;
256 + pj_str_t uri_overlay;
258 + pj_str_t hdr_overlay;
260 + pj_strassign(&uri_overlay, cid_uri);
261 + /* If the URI is already enclosed in angle brackets, remove them. */
262 + if (uri_overlay.ptr[0] == '<') {
264 + uri_overlay.slen -= 2;
266 + /* If the URI starts with the "cid:" scheme, skip over it. */
267 + if (pj_strncmp2(&uri_overlay, "cid:", 4) == 0) {
268 + uri_overlay.ptr += 4;
269 + uri_overlay.slen -= 4;
271 + /* Start building */
272 + cid_hdr.ptr = pj_pool_alloc(pool, alloc_len);
273 + cid_hdr.ptr[0] = '<';
275 + hdr_overlay.ptr = cid_hdr.ptr + 1;
276 + hdr_overlay.slen = 0;
277 + pj_strcpy_unescape(&hdr_overlay, &uri_overlay);
278 + cid_hdr.slen += hdr_overlay.slen;
279 + cid_hdr.ptr[cid_hdr.slen] = '>';
285 +PJ_DEF(pjsip_multipart_part*)
286 +pjsip_multipart_find_part_by_cid_str(pj_pool_t *pool,
287 + const pjsip_msg_body *mp,
290 + struct multipart_data *m_data;
291 + pjsip_multipart_part *part;
292 + pjsip_generic_string_hdr *found_hdr;
293 + pj_str_t found_hdr_value;
294 + static pj_str_t hdr_name = { "Content-ID", 10};
295 + pj_str_t hdr_value;
297 + PJ_ASSERT_RETURN(pool && mp && cid && (pj_strlen(cid) > 0), NULL);
299 + hdr_value = cid_uri_to_hdr_value(pool, cid);
300 + if (pj_strlen(&hdr_value) == 0) {
304 + m_data = (struct multipart_data*)mp->data;
305 + part = m_data->part_head.next;
307 + while (part != &m_data->part_head) {
309 + while ((found_hdr = pjsip_hdr_find_by_name(&part->hdr, &hdr_name,
310 + (found_hdr ? found_hdr->next : NULL))) != NULL) {
311 + if (pj_strcmp(&hdr_value, &found_hdr->hvalue) == 0) {
320 +PJ_DEF(pjsip_multipart_part*)
321 +pjsip_multipart_find_part_by_cid_uri(pj_pool_t *pool,
322 + const pjsip_msg_body *mp,
323 + pjsip_other_uri *cid_uri)
325 + PJ_ASSERT_RETURN(pool && mp && cid_uri, NULL);
327 + if (pj_strcmp2(&cid_uri->scheme, "cid") != 0) {
331 + * We only need to pass the URI content so we
332 + * can do that directly.
334 + return pjsip_multipart_find_part_by_cid_str(pool, mp, &cid_uri->content);
337 /* Parse a multipart part. "pct" is parent content-type */
338 static pjsip_multipart_part *parse_multipart_part(pj_pool_t *pool,
340 @@ -584,6 +799,7 @@ PJ_DEF(pjsip_msg_body*) pjsip_multipart_
341 (int)boundary.slen, boundary.ptr));
345 /* Build the delimiter:
346 * delimiter = "--" boundary
348 @@ -630,6 +846,8 @@ PJ_DEF(pjsip_msg_body*) pjsip_multipart_
349 if (*curptr=='\r') ++curptr;
351 /* Expecting a newline here */
352 + PJ_LOG(2, (THIS_FILE, "Failed to find newline"));
357 @@ -645,6 +863,7 @@ PJ_DEF(pjsip_msg_body*) pjsip_multipart_
358 curptr = pj_strstr(&subbody, &delim);
360 /* We're really expecting end delimiter to be found. */
361 + PJ_LOG(2, (THIS_FILE, "Failed to find end-delimiter"));
365 @@ -670,9 +889,13 @@ PJ_DEF(pjsip_msg_body*) pjsip_multipart_
366 part = parse_multipart_part(pool, start_body, end_body - start_body,
369 + TRACE_((THIS_FILE, "Adding part"));
370 pjsip_multipart_add_part(pool, body, part);
372 + PJ_LOG(2, (THIS_FILE, "Failed to add part"));
375 + TRACE_((THIS_FILE, "pjsip_multipart_parse finished: %p", body));
379 --- a/pjsip/src/test/multipart_test.c
380 +++ b/pjsip/src/test/multipart_test.c
382 typedef pj_status_t (*verify_ptr)(pj_pool_t*,pjsip_msg_body*);
384 static pj_status_t verify1(pj_pool_t *pool, pjsip_msg_body *body);
385 +static pj_status_t verify2(pj_pool_t *pool, pjsip_msg_body *body);
389 @@ -68,7 +69,41 @@ static struct test_t
390 "This is epilogue, which should be ignored too",
396 + "multipart", "mixed", "12345",
399 + "This is the prolog, which should be ignored.\r\n"
401 + "Content-Type: text/plain\r\n"
402 + "Content-ID: <header1@example.org>\r\n"
403 + "Content-ID: <\"header1\"@example.org>\r\n"
404 + "Content-Length: 13\r\n"
408 + "Content-Type: application/pidf+xml\r\n"
409 + "Content-ID: <my header2@example.org>\r\n"
410 + "Content-ID: <my\xffheader2@example.org>\r\n"
411 + "Content-Length: 13\r\n"
415 + "Content-Type: text/plain\r\n"
416 + "Content-ID: <my header3@example.org>\r\n"
417 + "Content-ID: <header1@example.org>\r\n"
418 + "Content-ID: <my header4@example.org>\r\n"
419 + "Content-Length: 13\r\n"
423 + "This is epilogue, which should be ignored too",
430 static void init_media_type(pjsip_media_type *mt,
431 @@ -87,6 +122,192 @@ static void init_media_type(pjsip_media_
435 +static int verify_hdr(pj_pool_t *pool, pjsip_msg_body *multipart_body,
436 + void *hdr, char *part_body)
438 + pjsip_media_type mt;
439 + pjsip_multipart_part *part;
443 + part = pjsip_multipart_find_part_by_header(pool, multipart_body, hdr, NULL);
448 + the_body.ptr = (char*)part->body->data;
449 + the_body.slen = part->body->len;
451 + if (pj_strcmp2(&the_body, part_body) != 0) {
458 +static int verify_cid_str(pj_pool_t *pool, pjsip_msg_body *multipart_body,
459 + pj_str_t cid_url, char *part_body)
461 + pjsip_media_type mt;
462 + pjsip_multipart_part *part;
465 + part = pjsip_multipart_find_part_by_cid_str(pool, multipart_body, &cid_url);
470 + the_body.ptr = (char*)part->body->data;
471 + the_body.slen = part->body->len;
473 + if (pj_strcmp2(&the_body, part_body) != 0) {
480 +static int verify_cid_uri(pj_pool_t *pool, pjsip_msg_body *multipart_body,
481 + pjsip_other_uri *cid_uri, char *part_body)
483 + pjsip_media_type mt;
484 + pjsip_multipart_part *part;
487 + part = pjsip_multipart_find_part_by_cid_uri(pool, multipart_body, cid_uri);
492 + the_body.ptr = (char*)part->body->data;
493 + the_body.slen = part->body->len;
495 + if (pj_strcmp2(&the_body, part_body) != 0) {
502 +static pj_status_t verify2(pj_pool_t *pool, pjsip_msg_body *body)
506 + pjsip_other_uri *cid_uri;
507 + pjsip_ctype_hdr *ctype_hdr = pjsip_ctype_hdr_create(pool);
509 + ctype_hdr->media.type = pj_str("application");
510 + ctype_hdr->media.subtype = pj_str("pidf+xml");
512 + rc = verify_hdr(pool, body, ctype_hdr, "has header2");
514 + return (rc - rcbase);
518 + rc = verify_cid_str(pool, body, pj_str("cid:header1@example.org"), "has header1");
520 + return (rc - rcbase);
524 + rc = verify_cid_str(pool, body, pj_str("%22header1%22@example.org"), "has header1");
526 + return (rc - rcbase);
529 + cid_uri = pjsip_uri_get_uri(pjsip_parse_uri(pool, "<cid:%22header1%22@example.org>",
530 + strlen("<cid:%22header1%22@example.org>"), 0));
532 + rc = verify_cid_uri(pool, body, cid_uri, "has header1");
534 + return (rc - rcbase);
538 + rc = verify_cid_str(pool, body, pj_str("<cid:my%20header2@example.org>"), "has header2");
540 + return (rc - rcbase);
544 + rc = verify_cid_str(pool, body, pj_str("cid:my%ffheader2@example.org"), "has header2");
546 + return (rc - rcbase);
549 + cid_uri = pjsip_uri_get_uri(pjsip_parse_uri(pool, "<cid:my%ffheader2@example.org>",
550 + strlen("<cid:my%ffheader2@example.org>"), 0));
552 + rc = verify_cid_uri(pool, body, cid_uri, "has header2");
554 + return (rc - rcbase);
558 + rc = verify_cid_str(pool, body, pj_str("cid:my%20header3@example.org"), "has header4");
560 + return (rc - rcbase);
564 + rc = verify_cid_str(pool, body, pj_str("<cid:my%20header4@example.org>"), "has header4");
566 + return (rc - rcbase);
569 + cid_uri = pjsip_uri_get_uri(pjsip_parse_uri(pool, "<cid:my%20header4@example.org>",
570 + strlen("<cid:my%20header4@example.org>"), 0));
572 + rc = verify_cid_uri(pool, body, cid_uri, "has header4");
574 + return (rc - rcbase);
578 + rc = verify_cid_str(pool, body, pj_str("<my%20header3@example.org>"), "has header4");
580 + return (rc - rcbase);
583 + /* These should all fail for malformed or missing URI */
585 + rc = verify_cid_str(pool, body, pj_str("cid:"), "has header4");
587 + return (rc - rcbase);
591 + rc = verify_cid_str(pool, body, pj_str(""), "has header4");
593 + return (rc - rcbase);
597 + rc = verify_cid_str(pool, body, pj_str("<>"), "has header4");
599 + return (rc - rcbase);
603 + rc = verify_cid_str(pool, body, pj_str("<cid>"), "has header4");
605 + return (rc - rcbase);
609 + * This is going to pass but the ' ' in the uri is un-encoded which is invalid
610 + * so we should never see it.
613 + rc = verify_cid_str(pool, body, pj_str("cid:my header3@example.org"), "has header4");
615 + return (rc - rcbase);
621 static int verify_part(pjsip_multipart_part *part,
622 char *h_content_type,
623 char *h_content_subtype,
624 @@ -236,8 +457,10 @@ static int parse_test(void)
626 pj_strdup2_with_null(pool, &str, p_tests[i].msg);
627 body = pjsip_multipart_parse(pool, str.ptr, str.slen, &ctype, 0);
630 + pj_pool_release(pool);
634 if (p_tests[i].verify) {
635 rc = p_tests[i].verify(pool, body);