From edde06f261ac807a89a6086b7f03460867675f95 Mon Sep 17 00:00:00 2001 From: Nanang Izzuddin Date: Tue, 1 Jul 2025 15:13:36 +0700 Subject: [PATCH] Avoid deadlock between transport and transaction (#4453) --- pjsip/include/pjsip/sip_transaction.h | 1 + pjsip/src/pjsip/sip_transaction.c | 101 ++++++++++++++++++++++---- 2 files changed, 88 insertions(+), 14 deletions(-) --- a/pjsip/include/pjsip/sip_transaction.h +++ b/pjsip/include/pjsip/sip_transaction.h @@ -141,6 +141,7 @@ struct pjsip_transaction int retransmit_count;/**< Retransmission count. */ pj_timer_entry retransmit_timer;/**< Retransmit timer. */ pj_timer_entry timeout_timer; /**< Timeout timer. */ + pj_timer_entry misc_timer; /**< Miscellaneous timer. */ /** Module specific data. */ void *mod_data[PJSIP_MAX_MODULE]; --- a/pjsip/src/pjsip/sip_transaction.c +++ b/pjsip/src/pjsip/sip_transaction.c @@ -140,6 +140,7 @@ static int max_retrans_count = -1; #define TRANSPORT_ERR_TIMER 3 #define TRANSPORT_DISC_TIMER 4 #define TERMINATE_TIMER 5 +#define TRANSPORT_CB_TIMER 6 /* Flags for tsx_set_state() */ enum @@ -2265,23 +2266,21 @@ static void send_msg_callback( pjsip_sen } -/* Transport callback. */ -static void transport_callback(void *token, pjsip_tx_data *tdata, - pj_ssize_t sent) -{ - pjsip_transaction *tsx = (pjsip_transaction*) token; +/* Transport callback parameter. */ +struct tp_cb_param { + pjsip_transaction* tsx; + pjsip_tx_data* tdata; + pj_ssize_t sent; +}; - /* Check if the transaction layer has been shutdown. */ - if (mod_tsx_layer.mod.id < 0) - return; - /* In other circumstances, locking tsx->grp_lock AFTER transport mutex - * will introduce deadlock if another thread is currently sending a - * SIP message to the transport. But this should be safe as there should - * be no way this callback could be called while another thread is - * sending a message. - */ +/* Transport callback actual implementation. */ +static void transport_callback_impl(pjsip_transaction *tsx, + pjsip_tx_data* tdata, + pj_ssize_t sent) +{ pj_grp_lock_acquire(tsx->grp_lock); + tsx->transport_flag &= ~(TSX_HAS_PENDING_TRANSPORT); if (sent > 0 || tsx->role == PJSIP_ROLE_UAS) { @@ -2299,6 +2298,7 @@ static void transport_callback(void *tok tsx_set_state( tsx, PJSIP_TSX_STATE_DESTROYED, PJSIP_EVENT_UNKNOWN, NULL, 0 ); pj_grp_lock_release(tsx->grp_lock); + pj_grp_lock_dec_ref(tsx->grp_lock); return; } @@ -2354,6 +2354,79 @@ static void transport_callback(void *tok } +/* Timer callback for transport callback. + * This is currently only used to avoid deadlock due to inversed locking order + * between transport and transaction. + */ +static void tsx_misc_timer_callback(pj_timer_heap_t *theap, + pj_timer_entry *entry) +{ + PJ_UNUSED_ARG(theap); + + if (entry->id == TRANSPORT_CB_TIMER) { + struct tp_cb_param* param = (struct tp_cb_param*)entry->user_data; + + /* Check if the transaction layer has been shutdown. */ + if (mod_tsx_layer.mod.id >= 0) { + /* Call transport callback implementation */ + transport_callback_impl(param->tsx, param->tdata, param->sent); + } + + /* Release tdata */ + pjsip_tx_data_dec_ref(param->tdata); + } +} + + +/* Transport callback. */ +static void transport_callback(void *token, pjsip_tx_data *tdata, + pj_ssize_t sent) +{ + pjsip_transaction *tsx = (pjsip_transaction*) token; + pj_status_t status; + + /* Check if the transaction layer has been shutdown. */ + if (mod_tsx_layer.mod.id < 0) + return; + + /* In other circumstances, locking tsx->grp_lock AFTER transport mutex + * will introduce deadlock if another thread is currently sending a + * SIP message to the transport. But this should be safe as there should + * be no way this callback could be called while another thread is + * sending a message. + */ + // Deadlock does happen, see #4453. + // So now, to avoid deadlock, we'll try to acquire the group lock first, + // and if it fails, we'll schedule the processing via timer. + status = pj_grp_lock_tryacquire(tsx->grp_lock); + if (status != PJ_SUCCESS) { + pj_time_val delay = { 0, 0 }; + struct tp_cb_param *param = NULL; + + lock_timer(tsx); + tsx_cancel_timer(tsx, &tsx->misc_timer); + + /* Increment tdata ref count to avoid premature destruction. + * Note that tsx ref count is already handled by tsx_schedule_timer(). + */ + pjsip_tx_data_add_ref(tdata); + + param = PJ_POOL_ZALLOC_T(tsx->pool, struct tp_cb_param); + param->sent = sent; + param->tdata = tdata; + param->tsx = tsx; + pj_timer_entry_init(&tsx->misc_timer, TIMER_INACTIVE, param, + &tsx_misc_timer_callback); + tsx_schedule_timer(tsx, &tsx->misc_timer, &delay, TRANSPORT_CB_TIMER); + unlock_timer(tsx); + return; + } + + transport_callback_impl(tsx, tdata, sent); + pj_grp_lock_release(tsx->grp_lock); +} + + /* * Callback when transport state changes. */