tcp_main.c
5b532c7f
 /*
  * $Id$
  *
53c7e0f1
  * Copyright (C) 2001-2003 FhG Fokus
5b532c7f
  *
  * This file is part of ser, a free SIP server.
  *
  * ser is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version
  *
  * For a license to use the ser software under conditions
  * other than those described here, or to purchase support for this
  * software, please contact iptel.org by e-mail at the following addresses:
  *    info@iptel.org
  *
  * ser is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
b33736bc
 /*
  * History:
  * --------
  *  2002-11-29  created by andrei
  *  2002-12-11  added tcp_send (andrei)
  *  2003-01-20  locking fixes, hashtables (andrei)
  *  2003-02-20  s/lock_t/gen_lock_t/ to avoid a conflict on solaris (andrei)
  *  2003-02-25  Nagle is disabled if -DDISABLE_NAGLE (andrei)
e6509c23
  *  2003-03-29  SO_REUSEADDR before calling bind to allow
  *              server restart, Nagle set on the (hopefuly) 
f15bede1
  *              correct socket (jiri)
  *  2003-03-31  always try to find the corresponding tcp listen socket for
  *               a temp. socket and store in in *->bind_address: added
  *               find_tcp_si, modified tcpconn_connect (andrei)
39546e5f
  *  2003-04-14  set sockopts to TOS low delay (andrei)
f6e50f08
  *  2003-06-30  moved tcp new connect checking & handling to
  *               handle_new_connect (andrei)
f535fd9a
  *  2003-07-09  tls_close called before closing the tcp connection (andrei)
9f4c52ce
  *  2003-10-24  converted to the new socket_info lists (andrei)
59653eb8
  *  2003-10-27  tcp port aliases support added (andrei)
06aaa54f
  *  2003-11-04  always lock before manipulating refcnt; sendchild
  *              does not inc refcnt by itself anymore (andrei)
  *  2003-11-07  different unix sockets are used for fd passing
  *              to/from readers/writers (andrei)
76b0ff49
  *  2003-11-17  handle_new_connect & tcp_connect will close the 
  *              new socket if tcpconn_new return 0 (e.g. out of mem) (andrei)
c55f3308
  *  2003-11-28  tcp_blocking_write & tcp_blocking_connect added (andrei)
a35ad52c
  *  2004-11-08  dropped find_tcp_si and replaced with find_si (andrei)
0ba367ec
  *  2005-06-07  new tcp optimized code, supports epoll (LT), sigio + real time
  *               signals, poll & select (andrei)
741a9937
  *  2005-06-26  *bsd kqueue support (andrei)
9eda5956
  *  2005-07-04  solaris /dev/poll support (andrei)
55d8155e
  *  2005-07-08  tcp_max_connections, tcp_connection_lifetime, don't accept
  *               more connections if tcp_max_connections is exceeded (andrei)
32f8b7ed
  *  2005-10-21  cleanup all the open connections on exit
98f3d5e2
  *              decrement the no. of open connections on timeout too    (andrei) *  2006-01-30  queue send_fd request and execute them at the end of the
  *              poll loop  (#ifdef) (andrei)
  *              process all children requests, before attempting to send
  *              them new stuff (fixes some deadlocks) (andrei)
1ce28841
  *  2006-02-03  timers are run only once per s (andrei)
746f7674
  *              tcp children fds can be non-blocking; send fds are queued on
  *              EAGAIN; lots of bug fixes (andrei)
d9515405
  *  2006-02-06  better tcp_max_connections checks, tcp_connections_no moved to
  *              shm (andrei)
e6a2b12e
  *  2006-04-12  tcp_send() changed to use struct dest_info (andrei)
be7401cc
  *  2006-11-02  switched to atomic ops for refcnt, locking improvements 
  *               (andrei)
d8b11bbc
  *  2006-11-04  switched to raw ticks (to fix conversion errors which could
  *               result in inf. lifetime) (andrei)
205fcb02
  *  2007-07-25  tcpconn_connect can now bind the socket on a specified
  *                source addr/port (andrei)
a288ee34
  *  2007-07-26   tcp_send() and tcpconn_get() can now use a specified source
  *                addr./port (andrei)
a6357e25
  *  2007-08-23   getsockname() for INADDR_ANY(SI_IS_ANY) sockets (andrei)
2aa5c1f7
  *  2007-08-27   split init_sock_opt into a lightweight init_sock_opt_accept() 
  *               used when accepting connections and init_sock_opt used for 
  *               connect/ new sockets (andrei)
5702d448
  *  2007-11-22  always add the connection & clear the coresponding flags before
  *               io_watch_add-ing its fd - it's safer this way (andrei)
ccb7fda2
  *  2007-11-26  improved tcp timers: switched to local_timer (andrei)
5c5cd736
  *  2007-11-27  added send fd cache and reader fd reuse (andrei)
20c64cc6
  *  2007-11-28  added support for TCP_DEFER_ACCEPT, KEEPALIVE, KEEPINTVL,
  *               KEEPCNT, QUICKACK, SYNCNT, LINGER2 (andrei)
885b9f62
  *  2007-12-04  support for queueing write requests (andrei)
20863813
  *  2007-12-12  destroy connection asap on wbuf. timeout (andrei)
19782e1c
  *  2007-12-13  changed the refcnt and destroy scheme, now refcnt is 1 if
db237b38
  *                linked into the hash tables (was 0) (andrei)
d22b82a0
  *  2007-12-21  support for pending connects (connections are added to the
  *               hash immediately and writes on them are buffered) (andrei)
7498b4dc
  *  2008-02-05  handle POLLRDHUP (if supported), POLLERR and
  *               POLLHUP (andrei)
  *              on write error check if there's still data in the socket 
  *               read buffer and process it first (andrei)
38429f23
  *  2009-02-26  direct blacklist support (andrei)
ffc72fcf
  *  2009-03-20  s/wq_timeout/send_timeout ; send_timeout is now in ticks
  *              (andrei)
340ce466
  *  2009-04-09  tcp ev and tcp stats macros added (andrei)
  *  2009-09-15  support for force connection reuse and close after send
  *               send flags (andrei)
ce51fbb8
  *  2010-03-23  tcp_send() split in 3 smaller functions (andrei)
b33736bc
  */
5b532c7f
 
ce51fbb8
 /** tcp main/dispatcher and tcp send functions.
  * @file tcp_main.c
  * @ingroup core
  * Module: @ref core
1d0661db
  */
 
5b532c7f
 
 #ifdef USE_TCP
 
6bc40dea
 
 #ifndef SHM_MEM
 #error "shared memory support needed (add -DSHM_MEM to Makefile.defs)"
 #endif
 
7498b4dc
 #define HANDLE_IO_INLINE
 #include "io_wait.h" /* include first to make sure the needed features are
 						turned on (e.g. _GNU_SOURCE for POLLRDHUP) */
 
5b532c7f
 #include <sys/time.h>
 #include <sys/types.h>
b988daef
 #include <sys/select.h>
5b532c7f
 #include <sys/socket.h>
0d2d4018
 #ifdef HAVE_FILIO_H
 #include <sys/filio.h> /* needed on solaris 2.x for FIONREAD */
 #elif defined __OS_solaris
 #define BSD_COMP  /* needed on older solaris for FIONREAD */
 #endif /* HAVE_FILIO_H / __OS_solaris */
7498b4dc
 #include <sys/ioctl.h>  /* ioctl() used on write error */
1f2c924e
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
39546e5f
 #include <netinet/ip.h>
b33736bc
 #include <netinet/tcp.h>
ab130758
 #include <sys/uio.h>  /* writev*/
b33736bc
 #include <netdb.h>
0ba367ec
 #include <stdlib.h> /*exit() */
5b532c7f
 
 #include <unistd.h>
 
 #include <errno.h>
 #include <string.h>
 
0ba367ec
 #ifdef HAVE_SELECT
 #include <sys/select.h>
 #endif
 #include <sys/poll.h>
5b532c7f
 
 
 #include "ip_addr.h"
 #include "pass_fd.h"
6bc40dea
 #include "tcp_conn.h"
5b532c7f
 #include "globals.h"
6ee62314
 #include "pt.h"
8aeb47e2
 #include "locking.h"
5b532c7f
 #include "mem/mem.h"
6ee62314
 #include "mem/shm_mem.h"
 #include "timer.h"
7ec958f3
 #include "sr_module.h"
0c5da34b
 #include "tcp_server.h"
f2f969dd
 #include "tcp_init.h"
ce51fbb8
 #include "tcp_int_send.h"
7bb2b4ca
 #include "tcp_stats.h"
 #include "tcp_ev.h"
518c9339
 #include "tsend.h"
746f7674
 #include "timer_ticks.h"
ccb7fda2
 #include "local_timer.h"
6c53d41a
 #ifdef CORE_TLS
28427aa4
 #include "tls/tls_server.h"
6c53d41a
 #define tls_loaded() 1
 #else
 #include "tls_hooks_init.h"
 #include "tls_hooks.h"
38429f23
 #endif /* CORE_TLS*/
 #ifdef USE_DST_BLACKLIST
 #include "dst_blacklist.h"
 #endif /* USE_DST_BLACKLIST */
6c53d41a
 
d9515405
 #include "tcp_info.h"
20c64cc6
 #include "tcp_options.h"
885b9f62
 #include "ut.h"
9188021a
 #include "cfg/cfg_struct.h"
8aeb47e2
 
5b532c7f
 #define local_malloc pkg_malloc
 #define local_free   pkg_free
 
0ba367ec
 #include <fcntl.h> /* must be included after io_wait.h if SIGIO_RT is used */
 
22f06258
 
92c0024c
 #ifdef NO_MSG_DONTWAIT
 #ifndef MSG_DONTWAIT
 /* should work inside tcp_main */
 #define MSG_DONTWAIT 0
 #endif
 #endif /*NO_MSG_DONTWAIT */
 
 
746f7674
 #define TCP_PASS_NEW_CONNECTION_ON_DATA /* don't pass a new connection
 										   immediately to a child, wait for
 										   some data on it first */
1ce28841
 #define TCP_LISTEN_BACKLOG 1024
28260509
 #define SEND_FD_QUEUE /* queue send fd requests on EAGAIN, instead of sending 
 							them immediately */
 #define TCP_CHILD_NON_BLOCKING 
98f3d5e2
 #ifdef SEND_FD_QUEUE
28260509
 #ifndef TCP_CHILD_NON_BLOCKING
 #define TCP_CHILD_NON_BLOCKING
 #endif
3e999281
 #define MAX_SEND_FD_QUEUE_SIZE	tcp_main_max_fd_no
98f3d5e2
 #define SEND_FD_QUEUE_SIZE		128  /* initial size */
746f7674
 #define SEND_FD_QUEUE_TIMEOUT	MS_TO_TICKS(2000)  /* 2 s */
98f3d5e2
 #endif
0ba367ec
 
ccb7fda2
 /* minimum interval local_timer_run() is allowed to run, in ticks */
 #define TCPCONN_TIMEOUT_MIN_RUN 1  /* once per tick */
 #define TCPCONN_WAIT_TIMEOUT 1 /* 1 tick */
0ba367ec
 
76cb799e
 #ifdef TCP_ASYNC
885b9f62
 static unsigned int* tcp_total_wq=0;
 #endif
 
 
0ba367ec
 enum fd_types { F_NONE, F_SOCKINFO /* a tcp_listen fd */,
 				F_TCPCONN, F_TCPCHILD, F_PROC };
 
5c5cd736
 
 #ifdef TCP_FD_CACHE
 
 #define TCP_FD_CACHE_SIZE 8
 
 struct fd_cache_entry{
 	struct tcp_connection* con;
 	int id;
 	int fd;
 };
 
 
 static struct fd_cache_entry fd_cache[TCP_FD_CACHE_SIZE];
 #endif /* TCP_FD_CACHE */
 
d8b11bbc
 static int is_tcp_main=0;
0ba367ec
 
3dc4f620
 
0ba367ec
 enum poll_types tcp_poll_method=0; /* by default choose the best method */
3e999281
 int tcp_main_max_fd_no=0;
3dc4f620
 int tcp_max_connections=DEFAULT_TCP_MAX_CONNECTIONS;
61f8b970
 int tls_max_connections=DEFAULT_TLS_MAX_CONNECTIONS;
55d8155e
 
205fcb02
 static union sockaddr_union tcp_source_ipv4_addr; /* saved bind/srv v4 addr. */
 static union sockaddr_union* tcp_source_ipv4=0;
761bd309
 #ifdef USE_IPV6
205fcb02
 static union sockaddr_union tcp_source_ipv6_addr; /* saved bind/src v6 addr. */
 static union sockaddr_union* tcp_source_ipv6=0;
761bd309
 #endif
 
61f8b970
 static int* tcp_connections_no=0; /* current tcp (+tls) open connections */
 static int* tls_connections_no=0; /* current tls open connections */
5b532c7f
 
59653eb8
 /* connection hash table (after ip&port) , includes also aliases */
 struct tcp_conn_alias** tcpconn_aliases_hash=0;
8aeb47e2
 /* connection hash table (after connection id) */
 struct tcp_connection** tcpconn_id_hash=0;
0bd53297
 gen_lock_t* tcpconn_lock=0;
8aeb47e2
 
edf5e385
 struct tcp_child* tcp_children=0;
2719e69b
 static int* connection_id=0; /*  unique for each connection, used for 
0c5da34b
 								quickly finding the corresponding connection
 								for a reply */
 int unix_tcp_sock;
5b532c7f
 
0ba367ec
 static int tcp_proto_no=-1; /* tcp protocol number as returned by
 							   getprotobyname */
 
 static io_wait_h io_h;
5b532c7f
 
ccb7fda2
 static struct local_timer tcp_main_ltimer;
057063e6
 static ticks_t tcp_main_prev_ticks;
5b532c7f
 
edf5e385
 /* tell if there are tcp workers that should handle only specific socket
  * - used to optimize the search of least loaded worker for a tcp socket
  * - 0 - no workers per tcp sockets have been set
  * - 1 + generic_workers - when there are workers per tcp sockets
  */
 static int tcp_sockets_gworkers = 0;
bc977837
 
ccb7fda2
 static ticks_t tcpconn_main_timeout(ticks_t , struct timer_ln* , void* );
 
a288ee34
 inline static int _tcpconn_add_alias_unsafe(struct tcp_connection* c, int port,
22f06258
 										struct ip_addr* l_ip, int l_port,
 										int flags);
a288ee34
 
 
 
205fcb02
 /* sets source address used when opening new sockets and no source is specified
  *  (by default the address is choosen by the kernel)
  * Should be used only on init.
  * returns -1 on error */
 int tcp_set_src_addr(struct ip_addr* ip)
 {
 	switch (ip->af){
 		case AF_INET:
 			ip_addr2su(&tcp_source_ipv4_addr, ip, 0);
 			tcp_source_ipv4=&tcp_source_ipv4_addr;
 			break;
 		#ifdef USE_IPV6
 		case AF_INET6:
 			ip_addr2su(&tcp_source_ipv6_addr, ip, 0);
 			tcp_source_ipv6=&tcp_source_ipv6_addr;
 			break;
 		#endif
 		default:
 			return -1;
 	}
 	return 0;
 }
 
 
 
20c64cc6
 static inline int init_sock_keepalive(int s)
 {
 	int optval;
 	
 #ifdef HAVE_SO_KEEPALIVE
22db42e4
 	if (cfg_get(tcp, tcp_cfg, keepalive)){
20c64cc6
 		optval=1;
 		if (setsockopt(s, SOL_SOCKET, SO_KEEPALIVE, &optval,
 						sizeof(optval))<0){
 			LOG(L_WARN, "WARNING: init_sock_keepalive: failed to enable"
 						" SO_KEEPALIVE: %s\n", strerror(errno));
 			return -1;
 		}
 	}
 #endif
 #ifdef HAVE_TCP_KEEPINTVL
22db42e4
 	if ((optval=cfg_get(tcp, tcp_cfg, keepintvl))){
20c64cc6
 		if (setsockopt(s, IPPROTO_TCP, TCP_KEEPINTVL, &optval,
 						sizeof(optval))<0){
 			LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
 						" keepalive probes interval: %s\n", strerror(errno));
 		}
 	}
 #endif
 #ifdef HAVE_TCP_KEEPIDLE
22db42e4
 	if ((optval=cfg_get(tcp, tcp_cfg, keepidle))){
20c64cc6
 		if (setsockopt(s, IPPROTO_TCP, TCP_KEEPIDLE, &optval,
 						sizeof(optval))<0){
 			LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
 						" keepalive idle interval: %s\n", strerror(errno));
 		}
 	}
 #endif
 #ifdef HAVE_TCP_KEEPCNT
22db42e4
 	if ((optval=cfg_get(tcp, tcp_cfg, keepcnt))){
20c64cc6
 		if (setsockopt(s, IPPROTO_TCP, TCP_KEEPCNT, &optval,
 						sizeof(optval))<0){
 			LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
 						" maximum keepalive count: %s\n", strerror(errno));
 		}
 	}
 #endif
 	return 0;
 }
 
 
 
2aa5c1f7
 /* set all socket/fd options for new sockets (e.g. before connect): 
  *  disable nagle, tos lowdelay, reuseaddr, non-blocking
  *
bc977837
  * return -1 on error */
 static int init_sock_opt(int s)
 {
 	int flags;
 	int optval;
 	
 #ifdef DISABLE_NAGLE
 	flags=1;
 	if ( (tcp_proto_no!=-1) && (setsockopt(s, tcp_proto_no , TCP_NODELAY,
 					&flags, sizeof(flags))<0) ){
 		LOG(L_WARN, "WARNING: init_sock_opt: could not disable Nagle: %s\n",
 				strerror(errno));
 	}
 #endif
 	/* tos*/
fe09f315
 	optval = tos;
bc977837
 	if (setsockopt(s, IPPROTO_IP, IP_TOS, (void*)&optval,sizeof(optval)) ==-1){
 		LOG(L_WARN, "WARNING: init_sock_opt: setsockopt tos: %s\n",
 				strerror(errno));
 		/* continue since this is not critical */
 	}
2aa5c1f7
 #if  !defined(TCP_DONT_REUSEADDR) 
 	optval=1;
 	if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,
 						(void*)&optval, sizeof(optval))==-1){
 		LOG(L_ERR, "ERROR: setsockopt SO_REUSEADDR %s\n",
 				strerror(errno));
 		/* continue, not critical */
 	}
 #endif /* !TCP_DONT_REUSEADDR */
20c64cc6
 #ifdef HAVE_TCP_SYNCNT
22db42e4
 	if ((optval=cfg_get(tcp, tcp_cfg, syncnt))){
20c64cc6
 		if (setsockopt(s, IPPROTO_TCP, TCP_SYNCNT, &optval,
 						sizeof(optval))<0){
 			LOG(L_WARN, "WARNING: init_sock_opt: failed to set"
 						" maximum SYN retr. count: %s\n", strerror(errno));
 		}
 	}
 #endif
 #ifdef HAVE_TCP_LINGER2
22db42e4
 	if ((optval=cfg_get(tcp, tcp_cfg, linger2))){
20c64cc6
 		if (setsockopt(s, IPPROTO_TCP, TCP_LINGER2, &optval,
 						sizeof(optval))<0){
 			LOG(L_WARN, "WARNING: init_sock_opt: failed to set"
 						" maximum LINGER2 timeout: %s\n", strerror(errno));
 		}
 	}
 #endif
 #ifdef HAVE_TCP_QUICKACK
22db42e4
 	if (cfg_get(tcp, tcp_cfg, delayed_ack)){
20c64cc6
 		optval=0; /* reset quick ack => delayed ack */
 		if (setsockopt(s, IPPROTO_TCP, TCP_QUICKACK, &optval,
 						sizeof(optval))<0){
 			LOG(L_WARN, "WARNING: init_sock_opt: failed to reset"
 						" TCP_QUICKACK: %s\n", strerror(errno));
 		}
 	}
 #endif /* HAVE_TCP_QUICKACK */
 	init_sock_keepalive(s);
2aa5c1f7
 	
bc977837
 	/* non-blocking */
 	flags=fcntl(s, F_GETFL);
 	if (flags==-1){
 		LOG(L_ERR, "ERROR: init_sock_opt: fnctl failed: (%d) %s\n",
 				errno, strerror(errno));
 		goto error;
 	}
 	if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
 		LOG(L_ERR, "ERROR: init_sock_opt: fcntl: set non-blocking failed:"
 				" (%d) %s\n", errno, strerror(errno));
 		goto error;
 	}
 	return 0;
 error:
 	return -1;
 }
 
 
 
2aa5c1f7
 /* set all socket/fd options for "accepted" sockets 
  *  only nonblocking is set since the rest is inherited from the
  *  "parent" (listening) socket
  *  Note: setting O_NONBLOCK is required on linux but it's not needed on
  *        BSD and possibly solaris (where the flag is inherited from the 
  *        parent socket). However since there is no standard document 
  *        requiring a specific behaviour in this case it's safer to always set
  *        it (at least for now)  --andrei
  *  TODO: check on which OSes  O_NONBLOCK is inherited and make this 
  *        function a nop.
  *
  * return -1 on error */
 static int init_sock_opt_accept(int s)
 {
 	int flags;
 	
 	/* non-blocking */
 	flags=fcntl(s, F_GETFL);
 	if (flags==-1){
 		LOG(L_ERR, "ERROR: init_sock_opt_accept: fnctl failed: (%d) %s\n",
 				errno, strerror(errno));
 		goto error;
 	}
 	if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
 		LOG(L_ERR, "ERROR: init_sock_opt_accept: "
 					"fcntl: set non-blocking failed: (%d) %s\n",
 					errno, strerror(errno));
 		goto error;
 	}
 	return 0;
 error:
 	return -1;
 }
 
 
 
ab88df95
 /** close a socket, handling errno.
  * On EINTR, repeat the close().
  * Filter expected errors (return success if close() failed because
  * EPIPE, ECONNRST a.s.o). Note that this happens on *BSDs (on linux close()
  * does not fail for socket level errors).
  * @param s - open valid socket.
  * @return - 0 on success, < 0 on error (whatever close() returns). On error
  *           errno is set.
  */
 static int tcp_safe_close(int s)
 {
 	int ret;
 retry:
 	if (unlikely((ret = close(s)) < 0 )) {
 		switch(errno) {
 			case EINTR:
 				goto retry;
 			case EPIPE:
 			case ENOTCONN:
 			case ECONNRESET:
 			case ECONNREFUSED:
 			case ENETUNREACH:
 			case EHOSTUNREACH:
 				/* on *BSD we really get these errors at close() time 
 				   => ignore them */
 				ret = 0;
 				break;
 			default:
 				break;
 		}
 	}
 	return ret;
 }
 
 
 
0ba367ec
 /* blocking connect on a non-blocking fd; it will timeout after
  * tcp_connect_timeout 
  * if BLOCKING_USE_SELECT and HAVE_SELECT are defined it will internally
  * use select() instead of poll (bad if fd > FD_SET_SIZE, poll is preferred)
  */
5d6752dc
 static int tcp_blocking_connect(int fd, int type, snd_flags_t* send_flags,
38429f23
 								const struct sockaddr *servaddr,
bc977837
 								socklen_t addrlen)
 {
 	int n;
0ba367ec
 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
bc977837
 	fd_set sel_set;
0ba367ec
 	fd_set orig_set;
bc977837
 	struct timeval timeout;
0ba367ec
 #else
 	struct pollfd pf;
 #endif
 	int elapsed;
 	int to;
bc977837
 	int ticks;
 	int err;
a9fae28b
 	unsigned int err_len;
a31a6e8c
 	int poll_err;
bc977837
 	
a31a6e8c
 	poll_err=0;
3dc4f620
 	to=cfg_get(tcp, tcp_cfg, connect_timeout_s);
0ba367ec
 	ticks=get_ticks();
bc977837
 again:
 	n=connect(fd, servaddr, addrlen);
 	if (n==-1){
0ba367ec
 		if (errno==EINTR){
 			elapsed=(get_ticks()-ticks)*TIMER_TICK;
 			if (elapsed<to)		goto again;
 			else goto error_timeout;
 		}
bc977837
 		if (errno!=EINPROGRESS && errno!=EALREADY){
30745c9d
 			goto error_errno;
bc977837
 		}
 	}else goto end;
 	
0ba367ec
 	/* poll/select loop */
 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
 		FD_ZERO(&orig_set);
 		FD_SET(fd, &orig_set);
 #else
 		pf.fd=fd;
 		pf.events=POLLOUT;
 #endif
bc977837
 	while(1){
0ba367ec
 		elapsed=(get_ticks()-ticks)*TIMER_TICK;
98d282a6
 		if (elapsed>=to)
0ba367ec
 			goto error_timeout;
 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
 		sel_set=orig_set;
98d282a6
 		timeout.tv_sec=to-elapsed;
bc977837
 		timeout.tv_usec=0;
 		n=select(fd+1, 0, &sel_set, 0, &timeout);
0ba367ec
 #else
98d282a6
 		n=poll(&pf, 1, (to-elapsed)*1000);
0ba367ec
 #endif
bc977837
 		if (n<0){
 			if (errno==EINTR) continue;
edcdea00
 			LOG(L_ERR, "ERROR: tcp_blocking_connect %s: poll/select failed:"
 					" (%d) %s\n",
 					su2a((union sockaddr_union*)servaddr, addrlen),
 					errno, strerror(errno));
bc977837
 			goto error;
0ba367ec
 		}else if (n==0) /* timeout */ continue;
 #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
a31a6e8c
 		if (FD_ISSET(fd, &sel_set))
0ba367ec
 #else
 		if (pf.revents&(POLLERR|POLLHUP|POLLNVAL)){ 
edcdea00
 			LOG(L_ERR, "ERROR: tcp_blocking_connect %s: poll error: "
 					"flags %x\n",
 					su2a((union sockaddr_union*)servaddr, addrlen),
0ba367ec
 					pf.revents);
a31a6e8c
 			poll_err=1;
 		}
0ba367ec
 #endif
a31a6e8c
 		{
bc977837
 			err_len=sizeof(err);
 			getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &err_len);
a31a6e8c
 			if ((err==0) && (poll_err==0)) goto end;
bc977837
 			if (err!=EINPROGRESS && err!=EALREADY){
edcdea00
 				LOG(L_ERR, "ERROR: tcp_blocking_connect %s: SO_ERROR (%d) "
 						"%s\n",
 						su2a((union sockaddr_union*)servaddr, addrlen),
bc977837
 						err, strerror(err));
30745c9d
 				errno=err;
 				goto error_errno;
bc977837
 			}
 		}
 	}
30745c9d
 error_errno:
 	switch(errno){
 		case ENETUNREACH:
 		case EHOSTUNREACH:
 #ifdef USE_DST_BLACKLIST
5d6752dc
 			dst_blacklist_su(BLST_ERR_CONNECT, type,
 							 (union sockaddr_union*)servaddr, send_flags, 0);
30745c9d
 #endif /* USE_DST_BLACKLIST */
 			TCP_EV_CONNECT_UNREACHABLE(errno, 0, 0,
 							(union sockaddr_union*)servaddr, type);
 			break;
 		case ETIMEDOUT:
 #ifdef USE_DST_BLACKLIST
5d6752dc
 			dst_blacklist_su(BLST_ERR_CONNECT, type,
 							 (union sockaddr_union*)servaddr, send_flags, 0);
30745c9d
 #endif /* USE_DST_BLACKLIST */
 			TCP_EV_CONNECT_TIMEOUT(errno, 0, 0,
 							(union sockaddr_union*)servaddr, type);
 			break;
 		case ECONNREFUSED:
 		case ECONNRESET:
 #ifdef USE_DST_BLACKLIST
5d6752dc
 			dst_blacklist_su(BLST_ERR_CONNECT, type,
 							 (union sockaddr_union*)servaddr, send_flags, 0);
30745c9d
 #endif /* USE_DST_BLACKLIST */
 			TCP_EV_CONNECT_RST(errno, 0, 0,
 							(union sockaddr_union*)servaddr, type);
 			break;
 		case EAGAIN: /* not posix, but supported on linux and bsd */
 			TCP_EV_CONNECT_NO_MORE_PORTS(errno, 0, 0,
 							(union sockaddr_union*)servaddr, type);
 			break;
 		default:
 			TCP_EV_CONNECT_ERR(errno, 0, 0,
 								(union sockaddr_union*)servaddr, type);
 	}
 	LOG(L_ERR, "ERROR: tcp_blocking_connect %s: (%d) %s\n",
 			su2a((union sockaddr_union*)servaddr, addrlen),
 			errno, strerror(errno));
 	goto error;
0ba367ec
 error_timeout:
 	/* timeout */
38429f23
 #ifdef USE_DST_BLACKLIST
5d6752dc
 	dst_blacklist_su(BLST_ERR_CONNECT, type,
 						(union sockaddr_union*)servaddr, send_flags, 0);
38429f23
 #endif /* USE_DST_BLACKLIST */
7bb2b4ca
 	TCP_EV_CONNECT_TIMEOUT(0, 0, 0, (union sockaddr_union*)servaddr, type);
edcdea00
 	LOG(L_ERR, "ERROR: tcp_blocking_connect %s: timeout %d s elapsed "
 				"from %d s\n", su2a((union sockaddr_union*)servaddr, addrlen),
3dc4f620
 				elapsed, cfg_get(tcp, tcp_cfg, connect_timeout_s));
bc977837
 error:
30745c9d
 	TCP_STATS_CONNECT_FAILED();
bc977837
 	return -1;
 end:
 	return 0;
 }
 
 
 
76cb799e
 #ifdef TCP_ASYNC
885b9f62
 
 
20863813
 /* unsafe version */
 #define _wbufq_empty(con) ((con)->wbuf_q.first==0)
 /* unsafe version */
 #define _wbufq_non_empty(con) ((con)->wbuf_q.first!=0)
 
 
 /* unsafe version, call while holding the connection write lock */
5f653089
 inline static int _wbufq_add(struct  tcp_connection* c, const char* data, 
885b9f62
 							unsigned int size)
 {
 	struct tcp_wbuffer_queue* q;
 	struct tcp_wbuffer* wb;
 	unsigned int last_free;
 	unsigned int wb_size;
 	unsigned int crt_size;
 	ticks_t t;
 	
 	q=&c->wbuf_q;
 	t=get_ticks_raw();
22db42e4
 	if (unlikely(	((q->queued+size)>cfg_get(tcp, tcp_cfg, tcpconn_wq_max)) ||
 					((*tcp_total_wq+size)>cfg_get(tcp, tcp_cfg, tcp_wq_max)) ||
885b9f62
 					(q->first &&
20863813
 					TICKS_LT(q->wr_timeout, t)) )){
885b9f62
 		LOG(L_ERR, "ERROR: wbufq_add(%d bytes): write queue full or timeout "
 					" (%d, total %d, last write %d s ago)\n",
 					size, q->queued, *tcp_total_wq,
5f653089
 					TICKS_TO_S(t-(q->wr_timeout-
 								cfg_get(tcp, tcp_cfg, send_timeout))));
7bb2b4ca
 		if (q->first && TICKS_LT(q->wr_timeout, t)){
 			if (unlikely(c->state==S_CONN_CONNECT)){
38429f23
 #ifdef USE_DST_BLACKLIST
a6c250c0
 				(void)dst_blacklist_su( BLST_ERR_CONNECT, c->rcv.proto,
5d6752dc
 										&c->rcv.src_su, &c->send_flags, 0);
7bb2b4ca
 #endif /* USE_DST_BLACKLIST */
 				TCP_EV_CONNECT_TIMEOUT(0, TCP_LADDR(c), TCP_LPORT(c),
 											TCP_PSU(c), TCP_PROTO(c));
 				TCP_STATS_CONNECT_FAILED();
 			}else{
 #ifdef USE_DST_BLACKLIST
a6c250c0
 				(void)dst_blacklist_su( BLST_ERR_SEND, c->rcv.proto,
5d6752dc
 									&c->rcv.src_su, &c->send_flags, 0);
38429f23
 #endif /* USE_DST_BLACKLIST */
7bb2b4ca
 				TCP_EV_SEND_TIMEOUT(0, &c->rcv);
 				TCP_STATS_SEND_TIMEOUT();
 			}
 		}else{
 			/* if it's not a timeout => queue full */
 			TCP_EV_SENDQ_FULL(0, &c->rcv);
 			TCP_STATS_SENDQ_FULL();
 		}
885b9f62
 		goto error;
 	}
 	
 	if (unlikely(q->last==0)){
e655392a
 		wb_size=MAX_unsigned(cfg_get(tcp, tcp_cfg, wq_blk_size), size);
885b9f62
 		wb=shm_malloc(sizeof(*wb)+wb_size-1);
 		if (unlikely(wb==0))
 			goto error;
 		wb->b_size=wb_size;
 		wb->next=0;
 		q->last=wb;
 		q->first=wb;
 		q->last_used=0;
 		q->offset=0;
efc23dce
 		q->wr_timeout=get_ticks_raw()+
 			((c->state==S_CONN_CONNECT)?
 					S_TO_TICKS(cfg_get(tcp, tcp_cfg, connect_timeout_s)):
ffc72fcf
 					cfg_get(tcp, tcp_cfg, send_timeout));
885b9f62
 	}else{
 		wb=q->last;
 	}
 	
 	while(size){
 		last_free=wb->b_size-q->last_used;
 		if (last_free==0){
e655392a
 			wb_size=MAX_unsigned(cfg_get(tcp, tcp_cfg, wq_blk_size), size);
885b9f62
 			wb=shm_malloc(sizeof(*wb)+wb_size-1);
 			if (unlikely(wb==0))
 				goto error;
 			wb->b_size=wb_size;
 			wb->next=0;
 			q->last->next=wb;
 			q->last=wb;
 			q->last_used=0;
 			last_free=wb->b_size;
 		}
 		crt_size=MIN_unsigned(last_free, size);
74c6d280
 		memcpy(wb->buf+q->last_used, data, crt_size);
885b9f62
 		q->last_used+=crt_size;
 		size-=crt_size;
 		data+=crt_size;
 		q->queued+=crt_size;
 		atomic_add_int((int*)tcp_total_wq, crt_size);
 	}
 	return 0;
 error:
 	return -1;
 }
 
 
 
5b892e3e
 /* unsafe version, call while holding the connection write lock
  * inserts data at the beginning, it ignores the max queue size checks and
  * the timeout (use sparingly)
  * Note: it should never be called on a write buffer after wbufq_run() */
5f653089
 inline static int _wbufq_insert(struct  tcp_connection* c, const char* data, 
5b892e3e
 							unsigned int size)
 {
 	struct tcp_wbuffer_queue* q;
 	struct tcp_wbuffer* wb;
 	
 	q=&c->wbuf_q;
 	if (likely(q->first==0)) /* if empty, use wbufq_add */
 		return _wbufq_add(c, data, size);
 	
22db42e4
 	if (unlikely((*tcp_total_wq+size)>cfg_get(tcp, tcp_cfg, tcp_wq_max))){
38429f23
 		LOG(L_ERR, "ERROR: wbufq_insert(%d bytes): write queue full"
5b892e3e
 					" (%d, total %d, last write %d s ago)\n",
 					size, q->queued, *tcp_total_wq,
 					TICKS_TO_S(get_ticks_raw()-q->wr_timeout-
ffc72fcf
 									cfg_get(tcp, tcp_cfg, send_timeout)));
5b892e3e
 		goto error;
 	}
 	if (unlikely(q->offset)){
 		LOG(L_CRIT, "BUG: wbufq_insert: non-null offset %d (bad call, should"
 				"never be called after the wbufq_run())\n", q->offset);
 		goto error;
 	}
 	if ((q->first==q->last) && ((q->last->b_size-q->last_used)>=size)){
 		/* one block with enough space in it for size bytes */
 		memmove(q->first->buf+size, q->first->buf, size);
 		memcpy(q->first->buf, data, size);
 		q->last_used+=size;
 	}else{
 		/* create a size bytes block directly */
 		wb=shm_malloc(sizeof(*wb)+size-1);
 		if (unlikely(wb==0))
 			goto error;
 		wb->b_size=size;
 		/* insert it */
 		wb->next=q->first;
 		q->first=wb;
 		memcpy(wb->buf, data, size);
 	}
 	
 	q->queued+=size;
 	atomic_add_int((int*)tcp_total_wq, size);
 	return 0;
 error:
 	return -1;
 }
 
 
 
20863813
 /* unsafe version, call while holding the connection write lock */
 inline static void _wbufq_destroy( struct  tcp_wbuffer_queue* q)
885b9f62
 {
 	struct tcp_wbuffer* wb;
 	struct tcp_wbuffer* next_wb;
 	int unqueued;
 	
 	unqueued=0;
 	if (likely(q->first)){
 		wb=q->first;
 		do{
 			next_wb=wb->next;
 			unqueued+=(wb==q->last)?q->last_used:wb->b_size;
 			if (wb==q->first)
 				unqueued-=q->offset;
 			shm_free(wb);
 			wb=next_wb;
 		}while(wb);
 	}
 	memset(q, 0, sizeof(*q));
 	atomic_add_int((int*)tcp_total_wq, -unqueued);
 }
 
 
 
20863813
 /* tries to empty the queue  (safe version, c->write_lock must not be hold)
885b9f62
  * returns -1 on error, bytes written on success (>=0) 
  * if the whole queue is emptied => sets *empty*/
 inline static int wbufq_run(int fd, struct tcp_connection* c, int* empty)
 {
 	struct tcp_wbuffer_queue* q;
 	struct tcp_wbuffer* wb;
 	int n;
 	int ret;
 	int block_size;
 	char* buf;
 	
 	*empty=0;
 	ret=0;
 	lock_get(&c->write_lock);
 	q=&c->wbuf_q;
 	while(q->first){
 		block_size=((q->first==q->last)?q->last_used:q->first->b_size)-
 						q->offset;
 		buf=q->first->buf+q->offset;
 		n=_tcpconn_write_nb(fd, c, buf, block_size);
 		if (likely(n>0)){
 			ret+=n;
 			if (likely(n==block_size)){
 				wb=q->first;
 				q->first=q->first->next; 
 				shm_free(wb);
 				q->offset=0;
 				q->queued-=block_size;
 				atomic_add_int((int*)tcp_total_wq, -block_size);
 			}else{
 				q->offset+=n;
 				q->queued-=n;
 				atomic_add_int((int*)tcp_total_wq, -n);
 				break;
 			}
 		}else{
 			if (n<0){
 				/* EINTR is handled inside _tcpconn_write_nb */
 				if (!(errno==EAGAIN || errno==EWOULDBLOCK)){
7bb2b4ca
 					if (unlikely(c->state==S_CONN_CONNECT)){
38429f23
 						switch(errno){
 							case ENETUNREACH:
7bb2b4ca
 							case EHOSTUNREACH: /* not posix for send() */
 #ifdef USE_DST_BLACKLIST
5d6752dc
 								dst_blacklist_su(BLST_ERR_CONNECT,
 													c->rcv.proto,
 													&c->rcv.src_su,
 													&c->send_flags, 0);
7bb2b4ca
 #endif /* USE_DST_BLACKLIST */
 								TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
 													TCP_LPORT(c), TCP_PSU(c),
 													TCP_PROTO(c));
 								break;
 							case ECONNREFUSED:
38429f23
 							case ECONNRESET:
7bb2b4ca
 #ifdef USE_DST_BLACKLIST
5d6752dc
 								dst_blacklist_su(BLST_ERR_CONNECT,
 													c->rcv.proto,
 													&c->rcv.src_su,
 													&c->send_flags, 0);
7bb2b4ca
 #endif /* USE_DST_BLACKLIST */
 								TCP_EV_CONNECT_RST(0, TCP_LADDR(c),
 													TCP_LPORT(c), TCP_PSU(c),
 													TCP_PROTO(c));
 								break;
 							default:
 								TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c),
 													TCP_LPORT(c), TCP_PSU(c),
 													TCP_PROTO(c));
 						}
 						TCP_STATS_CONNECT_FAILED();
 					}else{
 						switch(errno){
 							case ECONNREFUSED:
 							case ECONNRESET:
 								TCP_STATS_CON_RESET();
 								/* no break */
 							case ENETUNREACH:
 							case EHOSTUNREACH: /* not posix for send() */
 #ifdef USE_DST_BLACKLIST
5d6752dc
 								dst_blacklist_su(BLST_ERR_SEND,
 													c->rcv.proto,
 													&c->rcv.src_su,
 													&c->send_flags, 0);
7bb2b4ca
 #endif /* USE_DST_BLACKLIST */
38429f23
 								break;
 						}
7bb2b4ca
 					}
885b9f62
 					ret=-1;
 					LOG(L_ERR, "ERROR: wbuf_runq: %s [%d]\n",
 						strerror(errno), errno);
 				}
 			}
 			break;
 		}
 	}
 	if (likely(q->first==0)){
 		q->last=0;
 		q->last_used=0;
 		q->offset=0;
 		*empty=1;
 	}
 	lock_release(&c->write_lock);
efc23dce
 	if (likely(ret>0)){
ffc72fcf
 		q->wr_timeout=get_ticks_raw()+cfg_get(tcp, tcp_cfg, send_timeout);
7bb2b4ca
 		if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT)){
 			TCP_STATS_ESTABLISHED(c->state);
efc23dce
 			c->state=S_CONN_OK;
7bb2b4ca
 		}
efc23dce
 	}
885b9f62
 	return ret;
 }
 
76cb799e
 #endif /* TCP_ASYNC */
885b9f62
 
 
 
518c9339
 #if 0
bc977837
 /* blocking write even on non-blocking sockets 
  * if TCP_TIMEOUT will return with error */
 static int tcp_blocking_write(struct tcp_connection* c, int fd, char* buf,
 								unsigned int len)
 {
 	int n;
 	fd_set sel_set;
 	struct timeval timeout;
 	int ticks;
 	int initial_len;
 	
 	initial_len=len;
 again:
 	
 	n=send(fd, buf, len,
 #ifdef HAVE_MSG_NOSIGNAL
 			MSG_NOSIGNAL
 #else
 			0
 #endif
 		);
 	if (n<0){
 		if (errno==EINTR)	goto again;
 		else if (errno!=EAGAIN && errno!=EWOULDBLOCK){
 			LOG(L_ERR, "tcp_blocking_write: failed to send: (%d) %s\n",
 					errno, strerror(errno));
30745c9d
 			TCP_EV_SEND_TIMEOUT(errno, &c->rcv);
 			TCP_STATS_SEND_TIMEOUT();
bc977837
 			goto error;
 		}
 	}else if (n<len){
 		/* partial write */
 		buf+=n;
 		len-=n;
 	}else{
 		/* success: full write */
 		goto end;
 	}
 	while(1){
 		FD_ZERO(&sel_set);
 		FD_SET(fd, &sel_set);
 		timeout.tv_sec=tcp_send_timeout;
 		timeout.tv_usec=0;
 		ticks=get_ticks();
 		n=select(fd+1, 0, &sel_set, 0, &timeout);
 		if (n<0){
 			if (errno==EINTR) continue; /* signal, ignore */
 			LOG(L_ERR, "ERROR: tcp_blocking_write: select failed: "
 					" (%d) %s\n", errno, strerror(errno));
 			goto error;
 		}else if (n==0){
 			/* timeout */
 			if (get_ticks()-ticks>=tcp_send_timeout){
 				LOG(L_ERR, "ERROR: tcp_blocking_write: send timeout (%d)\n",
 						tcp_send_timeout);
 				goto error;
 			}
 			continue;
 		}
 		if (FD_ISSET(fd, &sel_set)){
 			/* we can write again */
 			goto again;
 		}
 	}
 error:
 		return -1;
 end:
 		return initial_len;
 }
518c9339
 #endif
bc977837
 
 
 
f2f969dd
 struct tcp_connection* tcpconn_new(int sock, union sockaddr_union* su,
205fcb02
 									union sockaddr_union* local_addr,
f6e50f08
 									struct socket_info* ba, int type, 
 									int state)
5b532c7f
 {
 	struct tcp_connection *c;
e655392a
 	int rd_b_size;
5b532c7f
 	
e655392a
 	rd_b_size=cfg_get(tcp, tcp_cfg, rd_buf_size);
 	c=shm_malloc(sizeof(struct tcp_connection) + rd_b_size);
5b532c7f
 	if (c==0){
f6e50f08
 		LOG(L_ERR, "ERROR: tcpconn_new: mem. allocation failure\n");
5b532c7f
 		goto error;
 	}
e655392a
 	memset(c, 0, sizeof(struct tcp_connection)); /* zero init (skip rd buf)*/
5b532c7f
 	c->s=sock;
e29d027b
 	c->fd=-1; /* not initialized */
ab130758
 	if (lock_init(&c->write_lock)==0){
f6e50f08
 		LOG(L_ERR, "ERROR: tcpconn_new: init lock failed\n");
ab130758
 		goto error;
 	}
 	
f2f969dd
 	c->rcv.src_su=*su;
 	
be7401cc
 	atomic_set(&c->refcnt, 0);
8b0472d7
 	local_timer_init(&c->timer, tcpconn_main_timeout, c, 0);
f2f969dd
 	su2ip_addr(&c->rcv.src_ip, su);
 	c->rcv.src_port=su_getport(su);
 	c->rcv.bind_address=ba;
205fcb02
 	if (likely(local_addr)){
 		su2ip_addr(&c->rcv.dst_ip, local_addr);
 		c->rcv.dst_port=su_getport(local_addr);
 	}else if (ba){
f2f969dd
 		c->rcv.dst_ip=ba->address;
 		c->rcv.dst_port=ba->port_no;
 	}
9c01c860
 	print_ip("tcpconn_new: new tcp connection: ", &c->rcv.src_ip, "\n");
 	DBG(     "tcpconn_new: on port %d, type %d\n", c->rcv.src_port, type);
e655392a
 	init_tcp_req(&c->req, (char*)c+sizeof(struct tcp_connection), rd_b_size);
2719e69b
 	c->id=(*connection_id)++;
f2f969dd
 	c->rcv.proto_reserved1=0; /* this will be filled before receive_message*/
 	c->rcv.proto_reserved2=0;
f6e50f08
 	c->state=state;
 	c->extra_data=0;
 #ifdef USE_TLS
 	if (type==PROTO_TLS){
28427aa4
 		if (tls_tcpconn_init(c, sock)==-1) goto error;
f6e50f08
 	}else
 #endif /* USE_TLS*/
 	{
 		c->type=PROTO_TCP;
 		c->rcv.proto=PROTO_TCP;
3dc4f620
 		c->timeout=get_ticks_raw()+cfg_get(tcp, tcp_cfg, con_lifetime);
f6e50f08
 	}
7564dcab
 	
5b532c7f
 	return c;
 	
 error:
f6e50f08
 	if (c) shm_free(c);
5b532c7f
 	return 0;
 }
 
 
 
d22b82a0
 /* do the actual connect, set sock. options a.s.o
  * returns socket on success, -1 on error
  * sets also *res_local_addr, res_si and state (S_CONN_CONNECT for an
  * unfinished connect and S_CONN_OK for a finished one)*/
 inline static int tcp_do_connect(	union sockaddr_union* server,
 									union sockaddr_union* from,
 									int type,
5d6752dc
 									snd_flags_t* send_flags,
d22b82a0
 									union sockaddr_union* res_local_addr,
 									struct socket_info** res_si,
 									enum tcp_conn_states *state
 									)
0c5da34b
 {
 	int s;
f15bede1
 	union sockaddr_union my_name;
56b80d50
 	socklen_t my_name_len;
a35ad52c
 	struct ip_addr ip;
76cb799e
 #ifdef TCP_ASYNC
885b9f62
 	int n;
76cb799e
 #endif /* TCP_ASYNC */
0c5da34b
 
 	s=socket(AF2PF(server->s.sa_family), SOCK_STREAM, 0);
d22b82a0
 	if (unlikely(s==-1)){
edcdea00
 		LOG(L_ERR, "ERROR: tcp_do_connect %s: socket: (%d) %s\n",
 				su2a(server, sizeof(*server)), errno, strerror(errno));
0c5da34b
 		goto error;
 	}
bc977837
 	if (init_sock_opt(s)<0){
edcdea00
 		LOG(L_ERR, "ERROR: tcp_do_connect %s: init_sock_opt failed\n",
 					su2a(server, sizeof(*server)));
bc977837
 		goto error;
39546e5f
 	}
205fcb02
 	
d22b82a0
 	if (unlikely(from && bind(s, &from->s, sockaddru_len(*from)) != 0)){
 		LOG(L_WARN, "WARNING: tcp_do_connect: binding to source address"
edcdea00
 					" %s failed: %s [%d]\n", su2a(from, sizeof(*from)),
 					strerror(errno), errno);
d22b82a0
 	}
 	*state=S_CONN_OK;
76cb799e
 #ifdef TCP_ASYNC
 	if (likely(cfg_get(tcp, tcp_cfg, async))){
885b9f62
 again:
 		n=connect(s, &server->s, sockaddru_len(*server));
575a0b47
 		if (likely(n==-1)){ /*non-blocking => most probable EINPROGRESS*/
d22b82a0
 			if (likely(errno==EINPROGRESS))
 				*state=S_CONN_CONNECT;
575a0b47
 			else if (errno==EINTR) goto again;
d22b82a0
 			else if (errno!=EALREADY){
7bb2b4ca
 				switch(errno){
 					case ENETUNREACH:
 					case EHOSTUNREACH:
38429f23
 #ifdef USE_DST_BLACKLIST
5d6752dc
 						dst_blacklist_su(BLST_ERR_CONNECT, type, server,
 											send_flags, 0);
7bb2b4ca
 #endif /* USE_DST_BLACKLIST */
 						TCP_EV_CONNECT_UNREACHABLE(errno, 0, 0, server, type);
 						break;
 					case ETIMEDOUT:
 #ifdef USE_DST_BLACKLIST
5d6752dc
 						dst_blacklist_su(BLST_ERR_CONNECT, type, server,
 											send_flags, 0);
38429f23
 #endif /* USE_DST_BLACKLIST */
7bb2b4ca
 						TCP_EV_CONNECT_TIMEOUT(errno, 0, 0, server, type);
 						break;
 					case ECONNREFUSED:
 					case ECONNRESET:
 #ifdef USE_DST_BLACKLIST
5d6752dc
 						dst_blacklist_su(BLST_ERR_CONNECT, type, server,
 											send_flags, 0);
7bb2b4ca
 #endif /* USE_DST_BLACKLIST */
 						TCP_EV_CONNECT_RST(errno, 0, 0, server, type);
 						break;
 					case EAGAIN:/* not posix, but supported on linux and bsd */
 						TCP_EV_CONNECT_NO_MORE_PORTS(errno, 0, 0, server,type);
 						break;
 					default:
 						TCP_EV_CONNECT_ERR(errno, 0, 0, server, type);
 				}
 				TCP_STATS_CONNECT_FAILED();
edcdea00
 				LOG(L_ERR, "ERROR: tcp_do_connect: connect %s: (%d) %s\n",
 							su2a(server, sizeof(*server)),
 							errno, strerror(errno));
885b9f62
 				goto error;
 			}
 		}
 	}else{
76cb799e
 #endif /* TCP_ASYNC */
5d6752dc
 		if (tcp_blocking_connect(s, type,  send_flags, &server->s,
38429f23
 									sockaddru_len(*server))<0){
edcdea00
 			LOG(L_ERR, "ERROR: tcp_do_connect: tcp_blocking_connect %s"
 						" failed\n", su2a(server, sizeof(*server)));
885b9f62
 			goto error;
 		}
76cb799e
 #ifdef TCP_ASYNC
0c5da34b
 	}
76cb799e
 #endif /* TCP_ASYNC */
205fcb02
 	if (from){
 		su2ip_addr(&ip, from);
 		if (!ip_addr_any(&ip))
 			/* we already know the source ip, skip the sys. call */
 			goto find_socket;
 	}
f15bede1
 	my_name_len=sizeof(my_name);
d22b82a0
 	if (unlikely(getsockname(s, &my_name.s, &my_name_len)!=0)){
 		LOG(L_ERR, "ERROR: tcp_do_connect: getsockname failed: %s(%d)\n",
f15bede1
 				strerror(errno), errno);
d22b82a0
 		*res_si=0;
 		goto error;
f15bede1
 	}
205fcb02
 	from=&my_name; /* update from with the real "from" address */
a35ad52c
 	su2ip_addr(&ip, &my_name);
205fcb02
 find_socket:
06aaa54f
 #ifdef USE_TLS
d22b82a0
 	if (unlikely(type==PROTO_TLS))
 		*res_si=find_si(&ip, 0, PROTO_TLS);
06aaa54f
 	else
 #endif
d22b82a0
 		*res_si=find_si(&ip, 0, PROTO_TCP);
 	
 	if (unlikely(*res_si==0)){
edcdea00
 		LOG(L_WARN, "WARNING: tcp_do_connect %s: could not find corresponding"
 				" listening socket for %s, using default...\n",
 					su2a(server, sizeof(*server)), ip_addr2a(&ip));
d22b82a0
 		if (server->s.sa_family==AF_INET) *res_si=sendipv4_tcp;
f15bede1
 #ifdef USE_IPV6
d22b82a0
 		else *res_si=sendipv6_tcp;
f15bede1
 #endif
 	}
d22b82a0
 	*res_local_addr=*from;
 	return s;
 error:
ab88df95
 	if (s!=-1) tcp_safe_close(s);
d22b82a0
 	return -1;
 }
 
 
 
5d6752dc
 struct tcp_connection* tcpconn_connect( union sockaddr_union* server,
d22b82a0
 										union sockaddr_union* from,
5d6752dc
 										int type, snd_flags_t* send_flags)
d22b82a0
 {
 	int s;
 	struct socket_info* si;
 	union sockaddr_union my_name;
 	struct tcp_connection* con;
 	enum tcp_conn_states state;
 
 	s=-1;
 	
3dc4f620
 	if (*tcp_connections_no >= cfg_get(tcp, tcp_cfg, max_connections)){
d22b82a0
 		LOG(L_ERR, "ERROR: tcpconn_connect: maximum number of connections"
 					" exceeded (%d/%d)\n",
3dc4f620
 					*tcp_connections_no,
 					cfg_get(tcp, tcp_cfg, max_connections));
d22b82a0
 		goto error;
 	}
61f8b970
 	if (unlikely(type==PROTO_TLS)) {
 		if (*tls_connections_no >= cfg_get(tcp, tcp_cfg, max_tls_connections)){
 			LM_ERR("ERROR: maximum number of tls connections"
 						" exceeded (%d/%d)\n",
 						*tls_connections_no,
 						cfg_get(tcp, tcp_cfg, max_tls_connections));
 			goto error;
 		}
 	}
 
5d6752dc
 	s=tcp_do_connect(server, from, type,  send_flags, &my_name, &si, &state);
d22b82a0
 	if (s==-1){
edcdea00
 		LOG(L_ERR, "ERROR: tcp_do_connect %s: failed (%d) %s\n",
 				su2a(server, sizeof(*server)), errno, strerror(errno));
d22b82a0
 		goto error;
 	}
 	con=tcpconn_new(s, server, &my_name, si, type, state);
76b0ff49
 	if (con==0){
edcdea00
 		LOG(L_ERR, "ERROR: tcp_connect %s: tcpconn_new failed, closing the "
 				 " socket\n", su2a(server, sizeof(*server)));
76b0ff49
 		goto error;
 	}
5d6752dc
 	tcpconn_set_send_flags(con, *send_flags);
76b0ff49
 	return con;
0c5da34b
 error:
ab88df95
 	if (s!=-1) tcp_safe_close(s); /* close the opened socket */
0c5da34b
 	return 0;
 }
 
 
 
d22b82a0
 #ifdef TCP_CONNECT_WAIT
 int tcpconn_finish_connect( struct tcp_connection* c,
 												union sockaddr_union* from)
 {
 	int s;
 	int r;
 	union sockaddr_union local_addr;
 	struct socket_info* si;
 	enum tcp_conn_states state;
 	struct tcp_conn_alias* a;
3dc4f620
 	int new_conn_alias_flags;
d22b82a0
 	
5d6752dc
 	s=tcp_do_connect(&c->rcv.src_su, from, c->type, &c->send_flags,
 						&local_addr, &si, &state);
d22b82a0
 	if (unlikely(s==-1)){
edcdea00
 		LOG(L_ERR, "ERROR: tcpconn_finish_connect %s: tcp_do_connect for %p"
 					" failed\n", su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
 					c);
d22b82a0
 		return -1;
 	}
 	c->rcv.bind_address=si;
 	su2ip_addr(&c->rcv.dst_ip, &local_addr);
 	c->rcv.dst_port=su_getport(&local_addr);
 	/* update aliases if needed */
 	if (likely(from==0)){
3dc4f620
 		new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
d22b82a0
 		/* add aliases */
 		TCPCONN_LOCK;
 		_tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip, 0,
3dc4f620
 													new_conn_alias_flags);
d22b82a0
 		_tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
3dc4f620
 									c->rcv.dst_port, new_conn_alias_flags);
d22b82a0
 		TCPCONN_UNLOCK;
 	}else if (su_cmp(from, &local_addr)!=1){
3dc4f620
 		new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
d22b82a0
 		TCPCONN_LOCK;
 			/* remove all the aliases except the first one and re-add them
 			 * (there shouldn't be more then the 3 default aliases at this 
 			 * stage) */
 			for (r=1; r<c->aliases; r++){
 				a=&c->con_aliases[r];
 				tcpconn_listrm(tcpconn_aliases_hash[a->hash], a, next, prev);
 			}
 			c->aliases=1;
 			/* add the local_ip:0 and local_ip:local_port aliases */
 			_tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
3dc4f620
 												0, new_conn_alias_flags);
d22b82a0
 			_tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
3dc4f620
 									c->rcv.dst_port, new_conn_alias_flags);
d22b82a0
 		TCPCONN_UNLOCK;
 	}
 	
 	return s;
 }
 #endif /* TCP_CONNECT_WAIT */
 
 
 
d8b11bbc
 /* adds a tcp connection to the tcpconn hashes
  * Note: it's called _only_ from the tcp_main process */
a288ee34
 inline static struct tcp_connection*  tcpconn_add(struct tcp_connection *c)
0c5da34b
 {
a288ee34
 	struct ip_addr zero_ip;
3dc4f620
 	int new_conn_alias_flags;
8aeb47e2
 
a288ee34
 	if (likely(c)){
 		ip_addr_mk_any(c->rcv.src_ip.af, &zero_ip);
be7401cc
 		c->id_hash=tcp_id_hash(c->id);
a288ee34
 		c->aliases=0;
3dc4f620
 		new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
8aeb47e2
 		TCPCONN_LOCK;
19782e1c
 		c->flags|=F_CONN_HASHED;
8aeb47e2
 		/* add it at the begining of the list*/
be7401cc
 		tcpconn_listadd(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
a288ee34
 		/* set the aliases */
 		/* first alias is for (peer_ip, peer_port, 0 ,0) -- for finding
 		 *  any connection to peer_ip, peer_port
 		 * the second alias is for (peer_ip, peer_port, local_addr, 0) -- for
 		 *  finding any conenction to peer_ip, peer_port from local_addr 
 		 * the third alias is for (peer_ip, peer_port, local_addr, local_port) 
 		 *   -- for finding if a fully specified connection exists */
22f06258
 		_tcpconn_add_alias_unsafe(c, c->rcv.src_port, &zero_ip, 0,
3dc4f620
 													new_conn_alias_flags);
d22b82a0
 		if (likely(c->rcv.dst_ip.af && ! ip_addr_any(&c->rcv.dst_ip))){
 			_tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip, 0,
3dc4f620
 													new_conn_alias_flags);
d22b82a0
 			_tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
3dc4f620
 									c->rcv.dst_port, new_conn_alias_flags);
d22b82a0
 		}
a288ee34
 		/* ignore add_alias errors, there are some valid cases when one
 		 *  of the add_alias would fail (e.g. first add_alias for 2 connections
 		 *   with the same destination but different src. ip*/
8aeb47e2
 		TCPCONN_UNLOCK;
a288ee34
 		DBG("tcpconn_add: hashes: %d:%d:%d, %d\n",
 												c->con_aliases[0].hash,
 												c->con_aliases[1].hash,
 												c->con_aliases[2].hash,
be7401cc
 												c->id_hash);
8aeb47e2
 		return c;
 	}else{
 		LOG(L_CRIT, "tcpconn_add: BUG: null connection pointer\n");
 		return 0;
 	}
0c5da34b
 }
 
 
ccb7fda2
 static inline void _tcpconn_detach(struct tcp_connection *c)
ab130758
 {
59653eb8
 	int r;
ab130758
 	tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
59653eb8
 	/* remove all the aliases */
 	for (r=0; r<c->aliases; r++)
 		tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
 						&c->con_aliases[r], next, prev);
42d1a155
 	c->aliases = 0;
ccb7fda2
 }
 
 
 
 static inline void _tcpconn_free(struct tcp_connection* c)
 {
76cb799e
 #ifdef TCP_ASYNC
20863813
 	if (unlikely(_wbufq_non_empty(c)))
 		_wbufq_destroy(&c->wbuf_q);
885b9f62
 #endif
ab130758
 	lock_destroy(&c->write_lock);
f6e50f08
 #ifdef USE_TLS
3d4a77d8
 	if (unlikely(c->type==PROTO_TLS || c->type==PROTO_WSS)) tls_tcpconn_clean(c);
f6e50f08
 #endif
ab130758
 	shm_free(c);
 }
 
 
0c5da34b
 
ccb7fda2
 /* unsafe tcpconn_rm version (nolocks) */
 void _tcpconn_rm(struct tcp_connection* c)
 {
 	_tcpconn_detach(c);
 	_tcpconn_free(c);
 }
 
 
 
5b532c7f
 void tcpconn_rm(struct tcp_connection* c)
 {
59653eb8
 	int r;
0c5da34b
 	TCPCONN_LOCK;
8aeb47e2
 	tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
59653eb8
 	/* remove all the aliases */
 	for (r=0; r<c->aliases; r++)
 		tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash], 
 						&c->con_aliases[r], next, prev);
42d1a155
 	c->aliases = 0;
0c5da34b
 	TCPCONN_UNLOCK;
ab130758
 	lock_destroy(&c->write_lock);
f6e50f08
 #ifdef USE_TLS
3d4a77d8
 	if ((c->type==PROTO_TLS || c->type==PROTO_WSS)&&(c->extra_data)) tls_tcpconn_clean(c);
f6e50f08
 #endif
6bc40dea
 	shm_free(c);
 }
 
 
a288ee34
 /* finds a connection, if id=0 uses the ip addr, port, local_ip and local port
  *  (host byte order) and tries to find the connection that matches all of
  *   them. Wild cards can be used for local_ip and local_port (a 0 filled
  *   ip address and/or a 0 local port).
8aeb47e2
  * WARNING: unprotected (locks) use tcpconn_get unless you really
  * know what you are doing */
a288ee34
 struct tcp_connection* _tcpconn_find(int id, struct ip_addr* ip, int port,
 										struct ip_addr* l_ip, int l_port)
0c5da34b
 {
 
 	struct tcp_connection *c;
59653eb8
 	struct tcp_conn_alias* a;
8aeb47e2
 	unsigned hash;
a288ee34
 	int is_local_ip_any;
0c5da34b
 	
f2e456c3
 #ifdef EXTRA_DEBUG
9c01c860
 	DBG("tcpconn_find: %d  port %d\n",id, port);
59653eb8
 	if (ip) print_ip("tcpconn_find: ip ", ip, "\n");
f2e456c3
 #endif
00f45c49
 	if (likely(id)){
8aeb47e2
 		hash=tcp_id_hash(id);
 		for (c=tcpconn_id_hash[hash]; c; c=c->id_next){
f2e456c3
 #ifdef EXTRA_DEBUG
9c01c860
 			DBG("c=%p, c->id=%d, port=%d\n",c, c->id, c->rcv.src_port);
 			print_ip("ip=", &c->rcv.src_ip, "\n");
f2e456c3
 #endif
f6e50f08
 			if ((id==c->id)&&(c->state!=S_CONN_BAD)) return c;
8aeb47e2
 		}
00f45c49
 	}else if (likely(ip)){
a288ee34
 		hash=tcp_addr_hash(ip, port, l_ip, l_port);
 		is_local_ip_any=ip_addr_any(l_ip);
59653eb8
 		for (a=tcpconn_aliases_hash[hash]; a; a=a->next){
f2e456c3
 #ifdef EXTRA_DEBUG
59653eb8
 			DBG("a=%p, c=%p, c->id=%d, alias port= %d port=%d\n", a, a->parent,
 					a->parent->id, a->port, a->parent->rcv.src_port);
 			print_ip("ip=",&a->parent->rcv.src_ip,"\n");
f2e456c3
 #endif
59653eb8
 			if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
a288ee34
 					((l_port==0) || (l_port==a->parent->rcv.dst_port)) &&
 					(ip_addr_cmp(ip, &a->parent->rcv.src_ip)) &&
 					(is_local_ip_any ||
 						ip_addr_cmp(l_ip, &a->parent->rcv.dst_ip))
 				)
59653eb8
 				return a->parent;
8aeb47e2
 		}
0c5da34b
 	}
 	return 0;
 }
 
 
 
a288ee34
 /* _tcpconn_find with locks and timeout
  * local_addr contains the desired local ip:port. If null any local address 
  * will be used.  IN*ADDR_ANY or 0 port are wild cards.
  */
ab130758
 struct tcp_connection* tcpconn_get(int id, struct ip_addr* ip, int port,
a288ee34
 									union sockaddr_union* local_addr,
d8b11bbc
 									ticks_t timeout)
0c5da34b
 {
 	struct tcp_connection* c;
a288ee34
 	struct ip_addr local_ip;
 	int local_port;
 	
 	local_port=0;
00f45c49
 	if (likely(ip)){
 		if (unlikely(local_addr)){
a288ee34
 			su2ip_addr(&local_ip, local_addr);
 			local_port=su_getport(local_addr);
 		}else{
 			ip_addr_mk_any(ip->af, &local_ip);
 			local_port=0;
 		}
 	}
0c5da34b
 	TCPCONN_LOCK;
a288ee34
 	c=_tcpconn_find(id, ip, port, &local_ip, local_port);
00f45c49
 	if (likely(c)){ 
be7401cc
 			atomic_inc(&c->refcnt);
00f45c49
 			/* update the timeout only if the connection is not handled
74a32e90
 			 * by a tcp reader _and_the timeout is non-zero  (the tcp
 			 * reader process uses c->timeout for its own internal
 			 * timeout and c->timeout will be overwritten * anyway on
 			 * return to tcp_main) */
 			if (likely(c->reader_pid==0 && timeout != 0))
00f45c49
 				c->timeout=get_ticks_raw()+timeout;
ab130758
 	}
0c5da34b
 	TCPCONN_UNLOCK;
 	return c;
 }
 
 
 
a288ee34
 /* add c->dst:port, local_addr as an alias for the "id" connection, 
22f06258
  * flags: TCP_ALIAS_FORCE_ADD  - add an alias even if a previous one exists
  *        TCP_ALIAS_REPLACE    - if a prev. alias exists, replace it with the
  *                                new one
a288ee34
  * returns 0 on success, <0 on failure ( -1  - null c, -2 too many aliases,
  *  -3 alias already present and pointing to another connection)
  * WARNING: must be called with TCPCONN_LOCK held */
 inline static int _tcpconn_add_alias_unsafe(struct tcp_connection* c, int port,
22f06258
 										struct ip_addr* l_ip, int l_port,
 										int flags)
59653eb8
 {
 	unsigned hash;
 	struct tcp_conn_alias* a;
22f06258
 	struct tcp_conn_alias* nxt;
55cdf20a
 	struct tcp_connection* p;
a288ee34
 	int is_local_ip_any;
55cdf20a
 	int i;
 	int r;
59653eb8
 	
 	a=0;
a288ee34
 	is_local_ip_any=ip_addr_any(l_ip);
00f45c49
 	if (likely(c)){
a288ee34
 		hash=tcp_addr_hash(&c->rcv.src_ip, port, l_ip, l_port);
59653eb8
 		/* search the aliases for an already existing one */
f0302ef7
 		for (a=tcpconn_aliases_hash[hash], nxt=0; a; a=nxt){
 			nxt=a->next;
59653eb8
 			if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
a288ee34
 					( (l_port==0) || (l_port==a->parent->rcv.dst_port)) &&
 					(ip_addr_cmp(&c->rcv.src_ip, &a->parent->rcv.src_ip)) &&
 					( is_local_ip_any || 
 					  ip_addr_cmp(&a->parent->rcv.dst_ip, l_ip))
 					){
59653eb8
 				/* found */
00f45c49
 				if (unlikely(a->parent!=c)){
22f06258
 					if (flags & TCP_ALIAS_FORCE_ADD)
 						/* still have to walk the whole list to check if
 						 * the alias was not already added */
 						continue;
 					else if (flags & TCP_ALIAS_REPLACE){
55cdf20a
 						/* remove the alias =>
 						 * remove the current alias and all the following
 						 *  ones from the corresponding connection, shift the 
 						 *  connection aliases array and re-add the other 
 						 *  aliases (!= current one) */
 						p=a->parent;
 						for (i=0; (i<p->aliases) && (&(p->con_aliases[i])!=a);
 								i++);
 						if (unlikely(i==p->aliases)){
 							LOG(L_CRIT, "BUG: _tcpconn_add_alias_unsafe: "
 									" alias %p not found in con %p (id %d)\n",
 									a, p, p->id);
 							goto error_not_found;
 						}
 						for (r=i; r<p->aliases; r++){
 							tcpconn_listrm(
 								tcpconn_aliases_hash[p->con_aliases[r].hash],
 								&p->con_aliases[r], next, prev);
 						}
 						if (likely((i+1)<p->aliases)){
 							memmove(&p->con_aliases[i], &p->con_aliases[i+1],
 											(p->aliases-i-1)*
 												sizeof(p->con_aliases[0]));
 						}
 						p->aliases--;
 						/* re-add the remaining aliases */
 						for (r=i; r<p->aliases; r++){
 							tcpconn_listadd(
 								tcpconn_aliases_hash[p->con_aliases[r].hash], 
 								&p->con_aliases[r], next, prev);
 						}
22f06258
 					}else
 						goto error_sec;
 				}else goto ok;
59653eb8
 			}
 		}
00f45c49
 		if (unlikely(c->aliases>=TCP_CON_MAX_ALIASES)) goto error_aliases;
59653eb8
 		c->con_aliases[c->aliases].parent=c;
 		c->con_aliases[c->aliases].port=port;
 		c->con_aliases[c->aliases].hash=hash;
 		tcpconn_listadd(tcpconn_aliases_hash[hash], 
 								&c->con_aliases[c->aliases], next, prev);
 		c->aliases++;
 	}else goto error_not_found;
 ok:
 #ifdef EXTRA_DEBUG
a288ee34
 	if (a) DBG("_tcpconn_add_alias_unsafe: alias already present\n");
 	else   DBG("_tcpconn_add_alias_unsafe: alias port %d for hash %d, id %d\n",
59653eb8
 			port, hash, c->id);
 #endif
 	return 0;
 error_aliases:
a288ee34
 	/* too many aliases */
 	return -2;
 error_not_found:
 	/* null connection */
59653eb8
 	return -1;
a288ee34
 error_sec:
 	/* alias already present and pointing to a different connection
 	 * (hijack attempt?) */
 	return -3;
 }
 
 
 
 /* add port as an alias for the "id" connection, 
  * returns 0 on success,-1 on failure */
 int tcpconn_add_alias(int id, int port, int proto)
 {
 	struct tcp_connection* c;
 	int ret;
 	struct ip_addr zero_ip;
23dc5abf
 	int r;
3dc4f620
 	int alias_flags;
a288ee34
 	
 	/* fix the port */
 	port=port?port:((proto==PROTO_TLS)?SIPS_PORT:SIP_PORT);
 	TCPCONN_LOCK;
 	/* check if alias already exists */
 	c=_tcpconn_find(id, 0, 0, 0, 0);
00f45c49
 	if (likely(c)){
a288ee34
 		ip_addr_mk_any(c->rcv.src_ip.af, &zero_ip);
3dc4f620
 		alias_flags=cfg_get(tcp, tcp_cfg, alias_flags);
a288ee34
 		/* alias src_ip:port, 0, 0 */
22f06258
 		ret=_tcpconn_add_alias_unsafe(c, port,  &zero_ip, 0, 
3dc4f620
 										alias_flags);
a288ee34
 		if (ret<0 && ret!=-3) goto error;
 		/* alias src_ip:port, local_ip, 0 */
22f06258
 		ret=_tcpconn_add_alias_unsafe(c, port,  &c->rcv.dst_ip, 0, 
3dc4f620
 										alias_flags);
a288ee34
 		if (ret<0 && ret!=-3) goto error;
 		/* alias src_ip:port, local_ip, local_port */
22f06258
 		ret=_tcpconn_add_alias_unsafe(c, port, &c->rcv.dst_ip, c->rcv.dst_port,
3dc4f620
 										alias_flags);
00f45c49
 		if (unlikely(ret<0)) goto error;
a288ee34
 	}else goto error_not_found;
 	TCPCONN_UNLOCK;
 	return 0;
59653eb8
 error_not_found:
 	TCPCONN_UNLOCK;
 	LOG(L_ERR, "ERROR: tcpconn_add_alias: no connection found for id %d\n",id);
 	return -1;
a288ee34
 error:
59653eb8
 	TCPCONN_UNLOCK;
a288ee34
 	switch(ret){
 		case -2:
23dc5abf
 			LOG(L_ERR, "ERROR: tcpconn_add_alias: too many aliases (%d)"
 					" for connection %p (id %d) %s:%d <- %d\n",
 					c->aliases, c, c->id, ip_addr2a(&c->rcv.src_ip),
 					c->rcv.src_port, port);
 			for (r=0; r<c->aliases; r++){
 				LOG(L_ERR, "ERROR: tcpconn_add_alias: alias %d: for %p (%d)"
 						" %s:%d <-%d hash %x\n",  r, c, c->id, 
 						 ip_addr2a(&c->rcv.src_ip), c->rcv.src_port, 
 						c->con_aliases[r].port, c->con_aliases[r].hash);
 			}
a288ee34
 			break;
 		case -3:
 			LOG(L_ERR, "ERROR: tcpconn_add_alias: possible port"
 					" hijack attempt\n");
 			LOG(L_ERR, "ERROR: tcpconn_add_alias: alias for %d port %d already"
 						" present and points to another connection \n",
 						c->id, port);
 			break;
 		default:
adc585e4
 			LOG(L_ERR, "ERROR: tcpconn_add_alias: unknown error %d\n", ret);
a288ee34
 	}
59653eb8
 	return -1;
 }
 
 
 
5c5cd736
 #ifdef TCP_FD_CACHE
 
b264d2c6
 static void tcp_fd_cache_init(void)
5c5cd736
 {
 	int r;
 	for (r=0; r<TCP_FD_CACHE_SIZE; r++)
 		fd_cache[r].fd=-1;
 }
 
 
 inline static struct fd_cache_entry* tcp_fd_cache_get(struct tcp_connection *c)
 {
 	int h;
 	
 	h=c->id%TCP_FD_CACHE_SIZE;
 	if ((fd_cache[h].fd>0) && (fd_cache[h].id==c->id) && (fd_cache[h].con==c))
 		return &fd_cache[h];
 	return 0;
 }
 
 
 inline static void tcp_fd_cache_rm(struct fd_cache_entry* e)
 {
 	e->fd=-1;
 }
 
 
 inline static void tcp_fd_cache_add(struct tcp_connection *c, int fd)
 {
 	int h;
 	
 	h=c->id%TCP_FD_CACHE_SIZE;
00f45c49
 	if (likely(fd_cache[h].fd>0))
ab88df95
 		tcp_safe_close(fd_cache[h].fd);
5c5cd736
 	fd_cache[h].fd=fd;
 	fd_cache[h].id=c->id;
 	fd_cache[h].con=c;
 }
 
 #endif /* TCP_FD_CACHE */
 
 
19782e1c
 
 inline static int tcpconn_chld_put(struct tcp_connection* tcpconn);
 
5f653089
 static int tcpconn_send_put(struct tcp_connection* c, const char* buf,
 							unsigned len, snd_flags_t send_flags);
 static int tcpconn_do_send(int fd, struct tcp_connection* c,
 							const char* buf, unsigned len,
 							snd_flags_t send_flags, long* resp, int locked);
 
 static int tcpconn_1st_send(int fd, struct tcp_connection* c,
 							const char* buf, unsigned len,
 							snd_flags_t send_flags, long* resp, int locked);
19782e1c
 
bf458eea
 /* finds a tcpconn & sends on it
a288ee34
  * uses the dst members to, proto (TCP|TLS) and id and tries to send
  *  from the "from" address (if non null and id==0)
bf458eea
  * returns: number of bytes written (>=0) on success
  *          <0 on error */
a288ee34
 int tcp_send(struct dest_info* dst, union sockaddr_union* from,
5f653089
 					const char* buf, unsigned len)
0c5da34b
 {
 	struct tcp_connection *c;
 	struct ip_addr ip;
 	int port;
e29d027b
 	int fd;
0c5da34b
 	long response[2];
 	int n;
3dc4f620
 	ticks_t con_lifetime;
e3163126
 #ifdef USE_TLS
 	const char* rest_buf;
 	const char* t_buf;
 	unsigned rest_len, t_len;
 	long resp;
 	snd_flags_t t_send_flags;
 #endif /* USE_TLS */
0c5da34b
 	
e6a2b12e
 	port=su_getport(&dst->to);
3dc4f620
 	con_lifetime=cfg_get(tcp, tcp_cfg, con_lifetime);
5c5cd736
 	if (likely(port)){
e6a2b12e
 		su2ip_addr(&ip, &dst->to);
3dc4f620
 		c=tcpconn_get(dst->id, &ip, port, from, con_lifetime); 
5c5cd736
 	}else if (likely(dst->id)){
3dc4f620
 		c=tcpconn_get(dst->id, 0, 0, 0, con_lifetime);
f2f969dd
 	}else{
 		LOG(L_CRIT, "BUG: tcp_send called with null id & to\n");
 		return -1;
 	}
0c5da34b
 	
5c5cd736
 	if (likely(dst->id)){
 		if (unlikely(c==0)) {
 			if (likely(port)){
ab130758
 				/* try again w/o id */
3dc4f620
 				c=tcpconn_get(0, &ip, port, from, con_lifetime);
f2f969dd
 			}else{
 				LOG(L_ERR, "ERROR: tcp_send: id %d not found, dropping\n",
e6a2b12e
 						dst->id);
f2f969dd
 				return -1;
 			}
340ce466
 		}
f2f969dd
 	}
ce51fbb8
 	/* connection not found or unusable => open a new one and send on it */
 	if (unlikely((c==0) || tcpconn_close_after_send(c))){
 		if (unlikely(c)){
 			/* can't use c if it's marked as close-after-send  =>
 			   release it and try opening new one */
 			tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
 			c=0;
 		}
 		/* check if connect() is disabled */
 		if (unlikely((dst->send_flags.f & SND_F_FORCE_CON_REUSE) ||
 						cfg_get(tcp, tcp_cfg, no_connect)))
 			return -1;
 		DBG("tcp_send: no open tcp connection found, opening new one\n");
 		/* create tcp connection */
 		if (likely(from==0)){
 			/* check to see if we have to use a specific source addr. */
 			switch (dst->to.s.sa_family) {
 				case AF_INET:
 						from = tcp_source_ipv4;
 					break;
205fcb02
 #ifdef USE_IPV6
ce51fbb8
 				case AF_INET6:
 						from = tcp_source_ipv6;
 					break;
205fcb02
 #endif
ce51fbb8
 				default:
 					/* error, bad af, ignore ... */
 					break;
a288ee34
 			}
ce51fbb8
 		}
76cb799e
 #if defined(TCP_CONNECT_WAIT) && defined(TCP_ASYNC)
ce51fbb8
 		if (likely(cfg_get(tcp, tcp_cfg, tcp_connect_wait) && 
 					cfg_get(tcp, tcp_cfg, async) )){
 			if (unlikely(*tcp_connections_no >=
 							cfg_get(tcp, tcp_cfg, max_connections))){
 				LOG(L_ERR, "ERROR: tcp_send %s: maximum number of"
 							" connections exceeded (%d/%d)\n",
 							su2a(&dst->to, sizeof(dst->to)),
 							*tcp_connections_no,
 							cfg_get(tcp, tcp_cfg, max_connections));
 				return -1;
d22b82a0
 			}
61f8b970
 			if (unlikely(dst->proto==PROTO_TLS)) {
 				if (unlikely(*tls_connections_no >=
 							cfg_get(tcp, tcp_cfg, max_tls_connections))){
 					LM_ERR("tcp_send %s: maximum number of"
 							" tls connections exceeded (%d/%d)\n",
 							su2a(&dst->to, sizeof(dst->to)),
 							*tls_connections_no,
 							cfg_get(tcp, tcp_cfg, max_tls_connections));
 					return -1;
 				}
 			}
ce51fbb8
 			c=tcpconn_new(-1, &dst->to, from, 0, dst->proto,
 							S_CONN_CONNECT);
 			if (unlikely(c==0)){
 				LOG(L_ERR, "ERROR: tcp_send %s: could not create new"
 						" connection\n",
 						su2a(&dst->to, sizeof(dst->to)));
534092ba
 				return -1;
0c5da34b
 			}
ce51fbb8
 			c->flags|=F_CONN_PENDING|F_CONN_FD_CLOSED;
340ce466
 			tcpconn_set_send_flags(c, dst->send_flags);
ce51fbb8
 			atomic_set(&c->refcnt, 2); /* ref from here and from main hash
 										 table */
 			/* add it to id hash and aliases */
 			if (unlikely(tcpconn_add(c)==0)){
 				LOG(L_ERR, "ERROR: tcp_send %s: could not add "
 							"connection %p\n",
 							su2a(&dst->to, sizeof(dst->to)),
 								c);
19782e1c
 				_tcpconn_free(c);
06aaa54f
 				n=-1;
19782e1c
 				goto end_no_conn;
 			}
ce51fbb8
 			/* do connect and if src ip or port changed, update the 
 			 * aliases */
 			if (unlikely((fd=tcpconn_finish_connect(c, from))<0)){
 				/* tcpconn_finish_connect will automatically blacklist
 				   on error => no need to do it here */
 				LOG(L_ERR, "ERROR: tcp_send %s: tcpconn_finish_connect(%p)"
 						" failed\n", su2a(&dst->to, sizeof(dst->to)),
 							c);
 				goto conn_wait_error;
 			}
 			/* ? TODO: it might be faster just to queue the write directly
 			 *  and send to main CONN_NEW_PENDING_WRITE */
 			/* delay sending the fd to main after the send */
 			
 			/* NOTE: no lock here, because the connection is marked as
 			 * pending and nobody else will try to write on it. However
 			 * this might produce out-of-order writes. If this is not
 			 * desired either lock before the write or use 
5f653089
 			 * _wbufq_insert(...)
 			 * NOTE2: _wbufq_insert() is used now (no out-of-order).
 			 */
ce51fbb8
 #ifdef USE_TLS
5f653089
 			if (unlikely(c->type==PROTO_TLS)) {
 			/* for TLS the TLS processing and the send must happen
 			   atomically w/ respect to other sends on the same connection
 			   (otherwise reordering might occur which would break TLS) =>
e3163126
 			   lock. However in this case this send will always be the first.
 			   We can have the send() outside the lock only if this is the
 			   first and only send (tls_encode is not called again), or
 			   this is the last send for a tls_encode() loop and all the
 			   previous ones did return CONN_NEW_COMPLETE or CONN_EOF.
5f653089
 			*/
e3163126
 				response[1] = CONN_NOP;
 				t_buf = buf;
 				t_len = len;
5f653089
 				lock_get(&c->write_lock);
e3163126
 redo_tls_encode:
 					t_send_flags = dst->send_flags;
 					n = tls_encode(c, &t_buf, &t_len, &rest_buf, &rest_len,
 									&t_send_flags);
 					/* There are 4 cases:
 					   1. entire buffer consumed from the first try
 					     (rest_len == rest_buf == 0)
 					   2. rest_buf & first call
 					   3. rest_buf & not first call
 						  3a. CONN_NEW_COMPLETE or CONN_EOF
 						  3b. CONN_NEW_PENDING_WRITE
 					   4. entire buffer consumed, but not first call
 					       4a. CONN_NEW_COMPLETE or CONN_EOF
 						   4b. CONN_NEW_PENDING_WRITE
 						We misuse response[1] == CONN_NOP to test for the
 						first call.
 					*/
 					if (unlikely(n < 0)) {
 						lock_release(&c->write_lock);
 						goto conn_wait_error;
 					}
 					if (likely(rest_len == 0)) {
 						/* 1 or 4*: CONN_NEW_COMPLETE, CONN_EOF,  CONN_NOP
 						    or CONN_NEW_PENDING_WRITE (*rest_len == 0) */
 						if (likely(response[1] != CONN_NEW_PENDING_WRITE)) {
 							/* 1 or 4a => it's safe to do the send outside the
 							   lock (it will either send directly or
 							   wbufq_insert())
 							*/
 							lock_release(&c->write_lock);
 							if (likely(t_len != 0)) {
 								n=tcpconn_1st_send(fd, c, t_buf, t_len,
 													t_send_flags,
 													&response[1], 0);
 							} else { /* t_len == 0 */
 								if (response[1] == CONN_NOP) {
 									/* nothing to send (e.g  parallel send
 									   tls_encode queues some data and then
 									   WANT_READ => this tls_encode will queue
 									   the cleartext too and will have nothing
 									   to send right now) and initial send =>
 									   behave as if the send was successful
 									   (but never return EOF here) */
 									response[1] = CONN_NEW_COMPLETE;
 								}
 							}
 							/* exit */
 						} else {
 							/* CONN_NEW_PENDING_WRITE:  4b: it was a
 							   repeated tls_encode() (or otherwise we would
 							   have here CONN_NOP) => add to the queue */
 							if (unlikely(t_len &&
 											_wbufq_add(c, t_buf, t_len) < 0)) {
 								response[1] = CONN_ERROR;
 								n = -1;
 							}
 							lock_release(&c->write_lock);
 							/* exit (no send) */
 						}
 					} else {  /* rest_len != 0 */
 						/* 2 or 3*: if tls_encode hasn't finished, we have to
 						   call tcpconn_1st_send() under lock (otherwise if it
 						   returns CONN_NEW_PENDING_WRITE, there is no way
 						   to find the right place to add the new queued
 						   data from the 2nd tls_encode()) */
 						if (likely((response[1] == CONN_NOP /*2*/ ||
 									response[1] == CONN_NEW_COMPLETE /*3a*/ ||
 									response[1] == CONN_EOF /*3a*/) && t_len))
 							n = tcpconn_1st_send(fd, c, t_buf, t_len,
 													t_send_flags,
 													&response[1], 1);
 						else if (unlikely(t_len &&
 											_wbufq_add(c, t_buf, t_len) < 0)) {
 							/*3b: CONN_NEW_PENDING_WRITE*/
 							response[1] = CONN_ERROR;
 							n = -1;
 						}
 						if (likely(n >= 0)) {
 							/* if t_len == 0 => nothing was sent => previous
 							   response will be kept */
 							t_buf = rest_buf;
 							t_len = rest_len;
 							goto redo_tls_encode;
 						} else {
 							lock_release(&c->write_lock);
 							/* error exit */
 						}
 					}
5f653089
 			} else
ce51fbb8
 #endif /* USE_TLS */
 				n=tcpconn_1st_send(fd, c, buf, len, dst->send_flags,
 									&response[1], 0);
5f653089
 			if (unlikely(n<0)) /* this will catch CONN_ERROR too */
ce51fbb8
 				goto conn_wait_error;
 			if (unlikely(response[1]==CONN_EOF)){
 				/* if close-after-send requested, don't bother
 				   sending the fd back to tcp_main, try closing it
 				   immediately (no other tcp_send should use it,
 				   because it is marked as close-after-send before
5f653089
 				   being added to the hash) */
ce51fbb8
 				goto conn_wait_close;
 			}
 			/* send to tcp_main */
 			response[0]=(long)c;
5f653089
 			if (unlikely(send_fd(unix_tcp_sock, response,
 									sizeof(response), fd) <= 0)){
ce51fbb8
 				LOG(L_ERR, "BUG: tcp_send %s: %ld for %p"
 							" failed:" " %s (%d)\n",
 							su2a(&dst->to, sizeof(dst->to)),
 							response[1], c, strerror(errno), errno);
 				goto conn_wait_error;
 			}
 			goto conn_wait_success;
 		}
 #endif /* TCP_CONNECT_WAIT  && TCP_ASYNC */
 		if (unlikely((c=tcpconn_connect(&dst->to, from, dst->proto,
 										&dst->send_flags))==0)){
 			LOG(L_ERR, "ERROR: tcp_send %s: connect failed\n",
 							su2a(&dst->to, sizeof(dst->to)));
 			return -1;
 		}
 		tcpconn_set_send_flags(c, dst->send_flags);
 		if (likely(c->state==S_CONN_OK))
 			TCP_STATS_ESTABLISHED(S_CONN_CONNECT);
 		atomic_set(&c->refcnt, 2); /* ref. from here and it will also
 									  be added in the tcp_main hash */
 		fd=c->s;
 		c->flags|=F_CONN_FD_CLOSED; /* not yet opened in main */
 		/* ? TODO: it might be faster just to queue the write and
 		 * send to main a CONN_NEW_PENDING_WRITE */
 		
 		/* send the new tcpconn to "tcp main" */
 		response[0]=(long)c;
 		response[1]=CONN_NEW;
 		n=send_fd(unix_tcp_sock, response, sizeof(response), c->s);
 		if (unlikely(n<=0)){
 			LOG(L_ERR, "BUG: tcp_send %s: failed send_fd: %s (%d)\n",
 					su2a(&dst->to, sizeof(dst->to)),
 					strerror(errno), errno);
 			/* we can safely delete it, it's not referenced by anybody */
 			_tcpconn_free(c);
 			n=-1;
 			goto end_no_conn;
 		}
 		/* new connection => send on it directly */
 #ifdef USE_TLS
 		if (unlikely(c->type==PROTO_TLS)) {
5f653089
 			/* for TLS the TLS processing and the send must happen
 			   atomically w/ respect to other sends on the same connection
 			   (otherwise reordering might occur which would break TLS) =>
 			   lock.
 			*/
e3163126
 			response[1] = CONN_NOP;
 			t_buf = buf;
 			t_len = len;
5f653089
 			lock_get(&c->write_lock);
e3163126
 				do {
 					t_send_flags = dst->send_flags;
 					n = tls_encode(c, &t_buf, &t_len, &rest_buf, &rest_len,
 									&t_send_flags);
 					if (likely(n > 0)) {
 						n = tcpconn_do_send(fd, c, t_buf, t_len, t_send_flags,
 												&resp, 1);
 						if (likely(response[1] != CONN_QUEUED_WRITE ||
 									resp == CONN_ERROR))
 							/* don't overwrite a previous CONN_QUEUED_WRITE
 							   unless error */
 							response[1] = resp;
 					} else  if (unlikely(n < 0)) {
 						response[1] = CONN_ERROR;
 						break;
 					}
 					/* else do nothing for n (t_len) == 0, keep
 					   the last reponse */
 					t_buf = rest_buf;
 					t_len = rest_len;
 				} while(unlikely(rest_len && n > 0));
5f653089
 			lock_release(&c->write_lock);
ce51fbb8
 		} else
 #endif /* USE_TLS */
 			n = tcpconn_do_send(fd, c, buf, len, dst->send_flags,
 									&response[1], 0);
 		if (unlikely(response[1] != CONN_NOP)) {
 			response[0]=(long)c;
 			if (send_all(unix_tcp_sock, response, sizeof(response)) <= 0) {
 				BUG("tcp_main command %ld sending failed (write):"
 						"%s (%d)\n", response[1], strerror(errno), errno);
 				/* all commands != CONN_NOP returned by tcpconn_do_send()
 				   (CONN_EOF, CONN_ERROR, CONN_QUEUED_WRITE) will auto-dec
 				   refcnt => if sending the command fails we have to
 				   dec. refcnt by hand */
 				tcpconn_chld_put(c); /* deref. it manually */
 				n=-1;
 			}
 			/* here refcnt for c is already decremented => c contents can
 			   no longer be used and refcnt _must_ _not_ be decremented
 			   again on exit */
 			if (unlikely(n < 0 || response[1] == CONN_EOF)) {
 				/* on error or eof, close fd */
ab88df95
 				tcp_safe_close(fd);
ce51fbb8
 			} else if (response[1] == CONN_QUEUED_WRITE) {
0c7e84ff
 #ifdef TCP_FD_CACHE
ce51fbb8
 				if (cfg_get(tcp, tcp_cfg, fd_cache)) {
 					tcp_fd_cache_add(c, fd);
 				} else
0c7e84ff
 #endif /* TCP_FD_CACHE */
ab88df95
 					tcp_safe_close(fd);
ce51fbb8
 			} else {
 				BUG("unexpected tcpconn_do_send() return & response:"
 						" %d, %ld\n", n, response[1]);
0c7e84ff
 			}
ce51fbb8
 			goto end_no_deref;
 		}
0c7e84ff
 #ifdef TCP_FD_CACHE
ce51fbb8
 		if (cfg_get(tcp, tcp_cfg, fd_cache)) {
 			tcp_fd_cache_add(c, fd);
 		}else
0c7e84ff
 #endif /* TCP_FD_CACHE */
ab88df95
 			tcp_safe_close(fd);
ce51fbb8
 	/* here we can have only commands that _do_ _not_ dec refcnt.
 	   (CONN_EOF, CON_ERROR, CON_QUEUED_WRITE are all treated above) */
 		goto release_c;
 	} /* if (c==0 or unusable) new connection */
0c7e84ff
 	/* existing connection, send on it */
 	n = tcpconn_send_put(c, buf, len, dst->send_flags);
 	/* no deref needed (automatically done inside tcpconn_send_put() */
 	return n;
 #ifdef TCP_CONNECT_WAIT
 conn_wait_success:
 #ifdef TCP_FD_CACHE
 	if (cfg_get(tcp, tcp_cfg, fd_cache)) {
 		tcp_fd_cache_add(c, fd);
 	} else
 #endif /* TCP_FD_CACHE */
ab88df95
 		if (unlikely (tcp_safe_close(fd) < 0))
 			LOG(L_ERR, "closing temporary send fd for %p: %s: "
 					"close(%d) failed (flags 0x%x): %s (%d)\n", c,
 					su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
 					fd, c->flags, strerror(errno), errno);
0c7e84ff
 	tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
 	return n;
 conn_wait_error:
 	n=-1;
 conn_wait_close:
 	/* connect or send failed or immediate close-after-send was requested on
 	 * newly created connection which was not yet sent to tcp_main (but was
 	 * already hashed) => don't send to main, unhash and destroy directly
 	 * (if refcnt>2 it will be destroyed when the last sender releases the
 	 * connection (tcpconn_chld_put(c))) or when tcp_main receives a
 	 * CONN_ERROR it*/
 	c->state=S_CONN_BAD;
 	/* we are here only if we opened a new fd (and not reused a cached or
 	   a reader one) => if the connect was successful close the fd */
ab88df95
 	if (fd>=0) {
 		if (unlikely(tcp_safe_close(fd) < 0 ))
 			LOG(L_ERR, "closing temporary send fd for %p: %s: "
 					"close(%d) failed (flags 0x%x): %s (%d)\n", c,
 					su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
 					fd, c->flags, strerror(errno), errno);
 	}
76cb63c6
 	/* here the connection is for sure in the hash (tcp_main will not
 	   remove it because it's marked as PENDing) and the refcnt is at least
 	   2
 	 */
0c7e84ff
 	TCPCONN_LOCK;
76cb63c6
 		_tcpconn_detach(c);
 		c->flags&=~F_CONN_HASHED;
 		tcpconn_put(c);
 	TCPCONN_UNLOCK;
0c7e84ff
 	/* dec refcnt -> mark it for destruction */
 	tcpconn_chld_put(c);
 	return n;
76cb63c6
 #endif /* TCP_CONNECT_WAIT */
0c7e84ff
 release_c:
 	tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
 end_no_deref:
 end_no_conn:
 	return n;
 }
 
 
 
ce51fbb8
 /** sends on an existing tcpconn and auto-dec. con. ref counter.
0c7e84ff
  * As opposed to tcp_send(), this function requires an existing
  * tcp connection.
  * WARNING: the tcp_connection will be de-referenced.
  * @param c - existing tcp connection pointer.
  * @param buf - data to be sent.
  * @param len - data length,
  * @return >=0 on success, -1 on error.
  */
5f653089
 static int tcpconn_send_put(struct tcp_connection* c, const char* buf,
 								unsigned len, snd_flags_t send_flags)
0c7e84ff
 {
 	struct tcp_connection *tmp;
 	int fd;
 	long response[2];
 	int n;
 	int do_close_fd;
e3163126
 #ifdef USE_TLS
 	const char* rest_buf;
 	const char* t_buf;
 	unsigned rest_len, t_len;
 	long resp;
 	snd_flags_t t_send_flags;
 #endif /* USE_TLS */
0c7e84ff
 #ifdef TCP_FD_CACHE
 	struct fd_cache_entry* fd_cache_e;
 	int use_fd_cache;
 	
 	use_fd_cache=cfg_get(tcp, tcp_cfg, fd_cache);
 	fd_cache_e=0;
 #endif /* TCP_FD_CACHE */
 	do_close_fd=1; /* close the fd on exit */
 	response[1] = CONN_NOP;
76cb799e
 #ifdef TCP_ASYNC
0c7e84ff
 	/* if data is already queued, we don't need the fd */
d22b82a0
 #ifdef TCP_CONNECT_WAIT
b0c1c3fb
 		if (unlikely(cfg_get(tcp, tcp_cfg, async) &&
 						(_wbufq_non_empty(c) || (c->flags&F_CONN_PENDING)) ))
 #else /* ! TCP_CONNECT_WAIT */
 		if (unlikely(cfg_get(tcp, tcp_cfg, async) && (_wbufq_non_empty(c)) ))
d22b82a0
 #endif /* TCP_CONNECT_WAIT */
b0c1c3fb
 		{
885b9f62
 			lock_get(&c->write_lock);
d22b82a0
 #ifdef TCP_CONNECT_WAIT
b0c1c3fb
 				if (likely(_wbufq_non_empty(c) || (c->flags&F_CONN_PENDING)))
 #else /* ! TCP_CONNECT_WAIT */
 				if (likely(_wbufq_non_empty(c)))
d22b82a0
 #endif /* TCP_CONNECT_WAIT */
b0c1c3fb
 				{
885b9f62
 					do_close_fd=0;
5f653089
 #ifdef USE_TLS
3d4a77d8
 					if (unlikely(c->type==PROTO_TLS || c->type==PROTO_WSS)) {
e3163126
 						t_buf = buf;
 						t_len = len;
 						do {
 							t_send_flags = send_flags;
 							n = tls_encode(c, &t_buf, &t_len,
 											&rest_buf, &rest_len,
 											&t_send_flags);
 							if (unlikely((n < 0) || (t_len &&
 									 (_wbufq_add(c, t_buf, t_len) < 0)))) {
 								lock_release(&c->write_lock);
 								n=-1;
 								response[1] = CONN_ERROR;
 								c->state=S_CONN_BAD;
 								c->timeout=get_ticks_raw(); /* force timeout */
 								goto error;
 							}
 							t_buf = rest_buf;
 							t_len = rest_len;
 						} while(unlikely(rest_len && n > 0));
 					} else
 #endif /* USE_TLS */
 						if (unlikely(len && (_wbufq_add(c, buf, len)<0))){
5f653089
 							lock_release(&c->write_lock);
e3163126
 							n=-1;
5f653089
 							response[1] = CONN_ERROR;
 							c->state=S_CONN_BAD;
 							c->timeout=get_ticks_raw(); /* force timeout */
 							goto error;
 						}
885b9f62
 					n=len;
 					lock_release(&c->write_lock);
 					goto release_c;
 				}
 			lock_release(&c->write_lock);
 		}
76cb799e
 #endif /* TCP_ASYNC */
5c5cd736
 		/* check if this is not the same reader process holding
 		 *  c  and if so send directly on c->fd */
 		if (c->reader_pid==my_pid()){
00f45c49
 			DBG("tcp_send: send from reader (%d (%d)), reusing fd\n",
5c5cd736
 					my_pid(), process_no);
 			fd=c->fd;
 			do_close_fd=0; /* don't close the fd on exit, it's in use */
 #ifdef TCP_FD_CACHE
b4fa727e
 			use_fd_cache=0; /* don't cache: problems would arise due to the
 							   close() on cache eviction (if the fd is still 
 							   used). If it has to be cached then dup() _must_ 
 							   be used */
 		}else if (likely(use_fd_cache && 
20c64cc6
 							((fd_cache_e=tcp_fd_cache_get(c))!=0))){
5c5cd736
 			fd=fd_cache_e->fd;
 			do_close_fd=0;
00f45c49
 			DBG("tcp_send: found fd in cache ( %d, %p, %d)\n",
5c5cd736
 					fd, c, fd_cache_e->id);
 #endif /* TCP_FD_CACHE */
 		}else{
06aaa54f
 			DBG("tcp_send: tcp connection found (%p), acquiring fd\n", c);
0c5da34b
 			/* get the fd */
 			response[0]=(long)c;
 			response[1]=CONN_GET_FD;
06aaa54f
 			n=send_all(unix_tcp_sock, response, sizeof(response));
5c5cd736
 			if (unlikely(n<=0)){
534092ba
 				LOG(L_ERR, "BUG: tcp_send: failed to get fd(write):%s (%d)\n",
 						strerror(errno), errno);
06aaa54f
 				n=-1;
534092ba
 				goto release_c;
 			}
f2f969dd
 			DBG("tcp_send, c= %p, n=%d\n", c, n);
b8116f04
 			n=receive_fd(unix_tcp_sock, &tmp, sizeof(tmp), &fd, MSG_WAITALL);
5c5cd736
 			if (unlikely(n<=0)){
534092ba
 				LOG(L_ERR, "BUG: tcp_send: failed to get fd(receive_fd):"
 							" %s (%d)\n", strerror(errno), errno);
06aaa54f
 				n=-1;
885b9f62
 				do_close_fd=0;
06aaa54f
 				goto release_c;
 			}
9da6fae7
 			/* handle fd closed or bad connection/error
 				(it's possible that this happened in the time between
 				we found the intial connection and the time when we get
 				the fd)
 			 */
 			if (unlikely(c!=tmp || fd==-1 || c->state==S_CONN_BAD)){
 				if (unlikely(c!=tmp && tmp!=0))
 					BUG("tcp_send: get_fd: got different connection:"
b8116f04
 						"  %p (id= %d, refcnt=%d state=%d) != "
 						"  %p (n=%d)\n",
be7401cc
 						  c,   c->id,   atomic_get(&c->refcnt),   c->state,
b8116f04
 						  tmp, n
9da6fae7
 						);
06aaa54f
 				n=-1; /* fail */
0c7e84ff
 				/* don't cache fd & close it */
9da6fae7
 				do_close_fd = (fd==-1)?0:1;
0c7e84ff
 #ifdef TCP_FD_CACHE
 				use_fd_cache = 0;
 #endif /* TCP_FD_CACHE */
76b0ff49
 				goto end;
534092ba
 			}
e29d027b
 			DBG("tcp_send: after receive_fd: c= %p n=%d fd=%d\n",c, n, fd);
5c5cd736
 		}
0c5da34b
 	
ce51fbb8
 #ifdef USE_TLS
3d4a77d8
 		if (unlikely(c->type==PROTO_TLS || c->type==PROTO_WSS)) {
5f653089
 			/* for TLS the TLS processing and the send must happen
 			   atomically w/ respect to other sends on the same connection
 			   (otherwise reordering might occur which would break TLS) =>
 			   lock.
 			*/
e3163126
 			response[1] = CONN_NOP;
 			t_buf = buf;
 			t_len = len;
5f653089
 			lock_get(&c->write_lock);
e3163126
 				do {
 					t_send_flags = send_flags;
 					n = tls_encode(c, &t_buf, &t_len, &rest_buf, &rest_len,
 									&t_send_flags);
 					if (likely(n > 0)) {
 						n = tcpconn_do_send(fd, c, t_buf, t_len, t_send_flags,
 												&resp, 1);
 						if (likely(response[1] != CONN_QUEUED_WRITE ||
 									resp == CONN_ERROR))
 							/* don't overwrite a previous CONN_QUEUED_WRITE
 							   unless error */
 							response[1] = resp;
 					} else if (unlikely(n < 0)) {
 						response[1] = CONN_ERROR;
 						break;
 					}
 					/* else do nothing for n (t_len) == 0, keep
 					   the last reponse */
 					t_buf = rest_buf;
 					t_len = rest_len;
 				} while(unlikely(rest_len && n > 0));
5f653089
 			lock_release(&c->write_lock);
ce51fbb8
 		} else
 #endif
 			n = tcpconn_do_send(fd, c, buf, len, send_flags, &response[1], 0);
0c7e84ff
 	if (unlikely(response[1] != CONN_NOP)) {
 error:
 		response[0]=(long)c;
 		if (send_all(unix_tcp_sock, response, sizeof(response)) <= 0) {
 			BUG("tcp_main command %ld sending failed (write):%s (%d)\n",
 					response[1], strerror(errno), errno);
 			/* all commands != CONN_NOP returned by tcpconn_do_send()
 			   (CONN_EOF, CONN_ERROR, CONN_QUEUED_WRITE) will auto-dec refcnt
 			   => if sending the command fails we have to dec. refcnt by hand
 			 */
 			tcpconn_chld_put(c); /* deref. it manually */
 			n=-1;
 		}
 		/* here refcnt for c is already decremented => c contents can no
 		   longer be used and refcnt _must_ _not_ be decremented again
 		   on exit */
 		if (unlikely(n < 0 || response[1] == CONN_EOF)) {
 			/* on error or eof, remove from cache or close fd */
 #ifdef TCP_FD_CACHE
 			if (unlikely(fd_cache_e)){
 				tcp_fd_cache_rm(fd_cache_e);
 				fd_cache_e = 0;
ab88df95
 				tcp_safe_close(fd);
0c7e84ff
 			}else
 #endif /* TCP_FD_CACHE */
ab88df95
 				if (do_close_fd) tcp_safe_close(fd);
0c7e84ff
 		} else if (response[1] == CONN_QUEUED_WRITE) {
 #ifdef TCP_FD_CACHE
 			if (unlikely((fd_cache_e==0) && use_fd_cache)){
 				tcp_fd_cache_add(c, fd);
 			}else
 #endif /* TCP_FD_CACHE */
ab88df95
 				if (do_close_fd) tcp_safe_close(fd);
0c7e84ff
 		} else {
 			BUG("unexpected tcpconn_do_send() return & response: %d, %ld\n",
 					n, response[1]);
 		}
 		return n; /* no tcpconn_put */
 	}
 end:
 #ifdef TCP_FD_CACHE
 	if (unlikely((fd_cache_e==0) && use_fd_cache)){
 		tcp_fd_cache_add(c, fd);
 	}else
 #endif /* TCP_FD_CACHE */
ab88df95
 	if (do_close_fd) {
 		if (unlikely(tcp_safe_close(fd) < 0))
 			LOG(L_ERR, "closing temporary send fd for %p: %s: "
 					"close(%d) failed (flags 0x%x): %s (%d)\n", c,
 					su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
 					fd, c->flags, strerror(errno), errno);
 	}
0c7e84ff
 	/* here we can have only commands that _do_ _not_ dec refcnt.
 	   (CONN_EOF, CON_ERROR, CON_QUEUED_WRITE are all treated above) */
 release_c:
 	tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
 	return n;
 }
 
 
 
ce51fbb8
 /* unsafe send on a known tcp connection.
  * Directly send on a known tcp connection with a given fd.
  * It is assumed that the connection locks are already held.
  * Side effects: if needed it will send state update commands to
  *  tcp_main (e.g. CON_EOF, CON_ERROR, CON_QUEUED_WRITE).
  * @param fd - fd used for sending.
  * @param c - existing tcp connection pointer (state and flags might be
  *            changed).
  * @param buf - data to be sent.
  * @param len - data length.
  * @param send_flags
  * @return <0 on error, number of bytes sent on success.
  */
 int tcpconn_send_unsafe(int fd, struct tcp_connection *c,
5f653089
 						const char* buf, unsigned len, snd_flags_t send_flags)
ce51fbb8
 {
 	int n;
 	long response[2];
 	
 	n = tcpconn_do_send(fd, c, buf, len, send_flags, &response[1], 1);
 	if (unlikely(response[1] != CONN_NOP)) {
 		/* all commands != CONN_NOP returned by tcpconn_do_send()
 		   (CONN_EOF, CONN_ERROR, CONN_QUEUED_WRITE) will auto-dec refcnt
 		   => increment it (we don't want the connection to be destroyed
 		   from under us)
 		 */
 		atomic_inc(&c->refcnt);
 		response[0]=(long)c;
 		if (send_all(unix_tcp_sock, response, sizeof(response)) <= 0) {
 			BUG("connection %p command %ld sending failed (write):%s (%d)\n",
 					c, response[1], strerror(errno), errno);
 			/* send failed => deref. it back by hand */
 			tcpconn_chld_put(c); 
 			n=-1;
 		}
 		/* here refcnt for c is already decremented => c contents can no
 		   longer be used and refcnt _must_ _not_ be decremented again
 		   on exit */
 		return n;
 	}
 	return n;
 }
 
 
 
0c7e84ff
 /** lower level send (connection and fd should be known).
  * It takes care of possible write-queueing, blacklisting a.s.o.
  * It expects a valid tcp connection. It doesn't touch the ref. cnts.
ce51fbb8
  * It will also set the connection flags from send_flags (it's better
  * to do it here, because it's guaranteed to be under lock).
  * @param fd - fd used for sending.
0c7e84ff
  * @param c - existing tcp connection pointer (state and flags might be
  *            changed).
  * @param buf - data to be sent.
  * @param len - data length.
  * @param send_flags
  * @param resp - filled with a cmd. for tcp_main:
  *                      CONN_NOP - nothing needs to be done (do not send
  *                                 anything to tcp_main).
  *                      CONN_ERROR - error, connection should be closed.
  *                      CONN_EOF - no error, but connection should be closed.
  *                      CONN_QUEUED_WRITE - new write queue (connection
  *                                 should be watched for write and the wr.
  *                                 queue flushed).
ce51fbb8
  * @param locked - if set assume the connection is already locked (call from
  *                  tls) and do not lock/unlock the connection.
0c7e84ff
  * @return >=0 on success, < 0 on error && *resp == CON_ERROR.
  *
  */
5f653089
 static int tcpconn_do_send(int fd, struct tcp_connection* c,
 							const char* buf, unsigned len,
ce51fbb8
 							snd_flags_t send_flags, long* resp,
 							int locked)
0c7e84ff
 {
 	int  n;
 #ifdef TCP_ASYNC
 	int enable_write_watch;
 #endif /* TCP_ASYNC */
 
0c5da34b
 	DBG("tcp_send: sending...\n");
0c7e84ff
 	*resp = CONN_NOP;
ce51fbb8
 	if (likely(!locked)) lock_get(&c->write_lock);
340ce466
 	/* update connection send flags with the current ones */
0c7e84ff
 	tcpconn_set_send_flags(c, send_flags);
76cb799e
 #ifdef TCP_ASYNC
 	if (likely(cfg_get(tcp, tcp_cfg, async))){
d22b82a0
 		if (_wbufq_non_empty(c)
 #ifdef TCP_CONNECT_WAIT
efc23dce
 			|| (c->flags&F_CONN_PENDING) 
d22b82a0
 #endif /* TCP_CONNECT_WAIT */
 			){
20863813
 			if (unlikely(_wbufq_add(c, buf, len)<0)){
ce51fbb8
 				if (likely(!locked)) lock_release(&c->write_lock);
885b9f62
 				n=-1;
 				goto error;
 			}
ce51fbb8
 			if (likely(!locked)) lock_release(&c->write_lock);
885b9f62
 			n=len;
 			goto end;
 		}
 		n=_tcpconn_write_nb(fd, c, buf, len);
 	}else{
76cb799e
 #endif /* TCP_ASYNC */
518c9339
 		/* n=tcp_blocking_write(c, fd, buf, len); */
ce51fbb8
 		n=tsend_stream(fd, buf, len,
 						TICKS_TO_S(cfg_get(tcp, tcp_cfg, send_timeout)) *
 						1000);
76cb799e
 #ifdef TCP_ASYNC
885b9f62
 	}
76cb799e
 #else /* ! TCP_ASYNC */
ce51fbb8
 	if (likely(!locked)) lock_release(&c->write_lock);
76cb799e
 #endif /* TCP_ASYNC */
d22b82a0
 	
 	DBG("tcp_send: after real write: c= %p n=%d fd=%d\n",c, n, fd);
ab130758
 	DBG("tcp_send: buf=\n%.*s\n", (int)len, buf);
36ac6749
 	if (unlikely(n<(int)len)){
76cb799e
 #ifdef TCP_ASYNC
ce51fbb8
 		if (cfg_get(tcp, tcp_cfg, async) &&
5b892e3e
 				((n>=0) || errno==EAGAIN || errno==EWOULDBLOCK)){
20863813
 			enable_write_watch=_wbufq_empty(c);
5b892e3e
 			if (n<0) n=0;
efc23dce
 			else if (unlikely(c->state==S_CONN_CONNECT ||
7bb2b4ca
 						c->state==S_CONN_ACCEPT)){
 				TCP_STATS_ESTABLISHED(c->state);
efc23dce
 				c->state=S_CONN_OK; /* something was written */
7bb2b4ca
 			}
5b892e3e
 			if (unlikely(_wbufq_add(c, buf+n, len-n)<0)){
ce51fbb8
 				if (likely(!locked)) lock_release(&c->write_lock);
885b9f62
 				n=-1;
 				goto error;
 			}
ce51fbb8
 			if (likely(!locked)) lock_release(&c->write_lock);
885b9f62
 			n=len;
0c7e84ff
 			if (likely(enable_write_watch))
 				*resp=CONN_QUEUED_WRITE;
885b9f62
 			goto end;
19782e1c
 		}else{
ce51fbb8
 			if (likely(!locked)) lock_release(&c->write_lock);
885b9f62
 		}
76cb799e
 #endif /* TCP_ASYNC */
7bb2b4ca
 		if (unlikely(c->state==S_CONN_CONNECT)){
38429f23
 			switch(errno){
 				case ENETUNREACH:
7bb2b4ca
 				case EHOSTUNREACH: /* not posix for send() */
 #ifdef USE_DST_BLACKLIST
5d6752dc
 					dst_blacklist_su(BLST_ERR_CONNECT, c->rcv.proto,
 										&c->rcv.src_su, &c->send_flags, 0);
7bb2b4ca
 #endif /* USE_DST_BLACKLIST */
 					TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
 									TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
 					break;
 				case ECONNREFUSED:
 				case ECONNRESET:
 #ifdef USE_DST_BLACKLIST
5d6752dc
 					dst_blacklist_su(BLST_ERR_CONNECT, c->rcv.proto,
 										&c->rcv.src_su, &c->send_flags, 0);
7bb2b4ca
 #endif /* USE_DST_BLACKLIST */
 					TCP_EV_CONNECT_RST(errno, TCP_LADDR(c), TCP_LPORT(c),
 										TCP_PSU(c), TCP_PROTO(c));
 					break;
 				default:
 					TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c), TCP_LPORT(c),
 										TCP_PSU(c), TCP_PROTO(c));
 				}
 			TCP_STATS_CONNECT_FAILED();
 		}else{
 			switch(errno){
 				case ECONNREFUSED:
38429f23
 				case ECONNRESET:
7bb2b4ca
 					TCP_STATS_CON_RESET();
 					/* no break */
 				case ENETUNREACH:
38429f23
 				/*case EHOSTUNREACH: -- not posix */
7bb2b4ca
 #ifdef USE_DST_BLACKLIST
5d6752dc
 					dst_blacklist_su(BLST_ERR_SEND, c->rcv.proto,
 										&c->rcv.src_su, &c->send_flags, 0);
7bb2b4ca
 #endif /* USE_DST_BLACKLIST */
38429f23
 					break;
 			}
7bb2b4ca
 		}
edcdea00
 		LOG(L_ERR, "ERROR: tcp_send: failed to send on %p (%s:%d->%s): %s (%d)"
 					"\n", c, ip_addr2a(&c->rcv.dst_ip), c->rcv.dst_port,
e88c2542
 					su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
edcdea00
 					strerror(errno), errno);
0c7e84ff
 		n = -1;
76cb799e
 #ifdef TCP_ASYNC
885b9f62
 error:
76cb799e
 #endif /* TCP_ASYNC */
ab130758
 		/* error on the connection , mark it as bad and set 0 timeout */
f6e50f08
 		c->state=S_CONN_BAD;
d8b11bbc
 		c->timeout=get_ticks_raw();
ab130758
 		/* tell "main" it should drop this (optional it will t/o anyway?)*/
0c7e84ff
 		*resp=CONN_ERROR;
1c93f767
 		return n; /* error return, no tcpconn_put */
ab130758
 	}
19782e1c
 	
76cb799e
 #ifdef TCP_ASYNC
ce51fbb8
 	if (likely(!locked)) lock_release(&c->write_lock);
76cb799e
 #endif /* TCP_ASYNC */
efc23dce
 	/* in non-async mode here we're either in S_CONN_OK or S_CONN_ACCEPT*/
7bb2b4ca
 	if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT)){
 			TCP_STATS_ESTABLISHED(c->state);
efc23dce
 			c->state=S_CONN_OK;
7bb2b4ca
 	}
0c7e84ff
 	if (unlikely(send_flags.f & SND_F_CON_CLOSE)){
340ce466
 		/* close after write => send EOF request to tcp_main */
 		c->state=S_CONN_BAD;
 		c->timeout=get_ticks_raw();
 		/* tell "main" it should drop this*/
0c7e84ff
 		*resp=CONN_EOF;
 		return n;
340ce466
 	}
534092ba
 end:
340ce466
 	return n;
0c5da34b
 }
 
 
 
ce51fbb8
 /** low level 1st send on a new connection.
  * It takes care of possible write-queueing, blacklisting a.s.o.
  * It expects a valid just-opened tcp connection. It doesn't touch the 
  * ref. counters. It's used only in the async first send case.
  * @param fd - fd used for sending.
  * @param c - existing tcp connection pointer (state and flags might be
  *            changed). The connection must be new (no previous send on it).
  * @param buf - data to be sent.
  * @param len - data length.
  * @param send_flags
5f653089
  * @param resp - filled with a fd sending cmd. for tcp_main on success. It
  *                      _must_ be one of the commands listed below:
ce51fbb8
  *                      CONN_NEW_PENDING_WRITE - new connection, first write
  *                                 was partially successful (or EAGAIN) and
  *                                 was queued (connection should be watched
  *                                 for write and the write queue flushed).
  *                                 The fd should be sent to tcp_main.
  *                      CONN_NEW_COMPLETE - new connection, first write
5f653089
  *                                 completed successfully and no data is
  *                                 queued. The fd should be sent to tcp_main.
ce51fbb8
  *                      CONN_EOF - no error, but the connection should be
  *                                  closed (e.g. SND_F_CON_CLOSE send flag).
5f653089
  *                      CONN_ERROR - error, _must_ return < 0.
ce51fbb8
  * @param locked - if set assume the connection is already locked (call from
  *                  tls) and do not lock/unlock the connection.
  * @return >=0 on success, < 0 on error (on error *resp is undefined).
  *
  */
5f653089
 static int tcpconn_1st_send(int fd, struct tcp_connection* c,
 							const char* buf, unsigned len,
ce51fbb8
 							snd_flags_t send_flags, long* resp,
 							int locked)
 {
 	int n;
 	
 	n=_tcpconn_write_nb(fd, c, buf, len);
 	if (unlikely(n<(int)len)){
3d4a5942
 		/* on EAGAIN or ENOTCONN return success.
 		   ENOTCONN appears on newer FreeBSD versions (non-blocking socket,
 		   connect() & send immediately) */
 		if ((n>=0) || errno==EAGAIN || errno==EWOULDBLOCK || errno==ENOTCONN){
ce51fbb8
 			DBG("pending write on new connection %p "
 				" (%d/%d bytes written)\n", c, n, len);
 			if (unlikely(n<0)) n=0;
 			else{
e3163126
 				if (likely(c->state == S_CONN_CONNECT))
 					TCP_STATS_ESTABLISHED(S_CONN_CONNECT);
ce51fbb8
 				c->state=S_CONN_OK; /* partial write => connect()
 												ended */
 			}
 			/* add to the write queue */
 			if (likely(!locked)) lock_get(&c->write_lock);
 				if (unlikely(_wbufq_insert(c, buf+n, len-n)<0)){
 					if (likely(!locked)) lock_release(&c->write_lock);
 					n=-1;
 					LOG(L_ERR, "%s: EAGAIN and"
 							" write queue full or failed for %p\n",
 							su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)), c);
 					goto error;
 				}
 			if (likely(!locked)) lock_release(&c->write_lock);
 			/* send to tcp_main */
 			*resp=CONN_NEW_PENDING_WRITE;
 			n=len;
 			goto end;
 		}
 		/* n < 0 and not EAGAIN => write error */
 		/* if first write failed it's most likely a
 		   connect error */
 		switch(errno){
 			case ENETUNREACH:
 			case EHOSTUNREACH:  /* not posix for send() */
 #ifdef USE_DST_BLACKLIST
 				dst_blacklist_su( BLST_ERR_CONNECT, c->rcv.proto,
 									&c->rcv.src_su, &c->send_flags, 0);
 #endif /* USE_DST_BLACKLIST */
 				TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
 								TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
 				break;
 			case ECONNREFUSED:
 			case ECONNRESET:
 #ifdef USE_DST_BLACKLIST
 				dst_blacklist_su( BLST_ERR_CONNECT, c->rcv.proto,
 									&c->rcv.src_su, &c->send_flags, 0);
 #endif /* USE_DST_BLACKLIST */
 				TCP_EV_CONNECT_RST(errno, TCP_LADDR(c),
 								TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
 				break;
 			default:
 				TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c),
 								TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
 		}
 		/* error: destroy it directly */
 		TCP_STATS_CONNECT_FAILED();
 		LOG(L_ERR, "%s: connect & send  for %p failed:" " %s (%d)\n",
 					su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
 					c, strerror(errno), errno);
 		goto error;
 	}
 	LOG(L_INFO, "quick connect for %p\n", c);
e3163126
 	if (likely(c->state == S_CONN_CONNECT))
 		TCP_STATS_ESTABLISHED(S_CONN_CONNECT);
ce51fbb8
 	if (unlikely(send_flags.f & SND_F_CON_CLOSE)){
 		/* close after write =>  EOF => close immediately */
 		c->state=S_CONN_BAD;
 		/* tell our caller that it should drop this*/
 		*resp=CONN_EOF;
 	}else{
 		c->state=S_CONN_OK;
 		/* send to tcp_main */
 		*resp=CONN_NEW_COMPLETE;
 	}
 end:
 	return n; /* >= 0 */
 error:
 	*resp=CONN_ERROR;
 	return -1;
 }
 
 
 
f2f969dd
 int tcp_init(struct socket_info* sock_info)
5b532c7f
 {
 	union sockaddr_union* addr;
e6509c23
 	int optval;
20c64cc6
 #ifdef HAVE_TCP_ACCEPT_FILTER
 	struct accept_filter_arg afa;
 #endif /* HAVE_TCP_ACCEPT_FILTER */
b33736bc
 #ifdef DISABLE_NAGLE
 	int flag;
 	struct protoent* pe;
7ec958f3
 
 	if (tcp_proto_no==-1){ /* if not already set */
 		pe=getprotobyname("tcp");
 		if (pe==0){
 			LOG(L_ERR, "ERROR: tcp_init: could not get TCP protocol number\n");
 			tcp_proto_no=-1;
 		}else{
 			tcp_proto_no=pe->p_proto;
 		}
b33736bc
 	}
 #endif
5b532c7f
 	
 	addr=&sock_info->su;
faa66933
 	/* sock_info->proto=PROTO_TCP; */
6eacb2bc
 	if (init_su(addr, &sock_info->address, sock_info->port_no)<0){
5b532c7f
 		LOG(L_ERR, "ERROR: tcp_init: could no init sockaddr_union\n");
 		goto error;
 	}
edcdea00
 	DBG("tcp_init: added %s\n", su2a(addr, sizeof(*addr)));
5b532c7f
 	sock_info->socket=socket(AF2PF(addr->s.sa_family), SOCK_STREAM, 0);
 	if (sock_info->socket==-1){
 		LOG(L_ERR, "ERROR: tcp_init: socket: %s\n", strerror(errno));
 		goto error;
 	}
b33736bc
 #ifdef DISABLE_NAGLE
 	flag=1;
 	if ( (tcp_proto_no!=-1) &&
 		 (setsockopt(sock_info->socket, tcp_proto_no , TCP_NODELAY,
 					 &flag, sizeof(flag))<0) ){
 		LOG(L_ERR, "ERROR: tcp_init: could not disable Nagle: %s\n",
 				strerror(errno));
 	}
 #endif
e6509c23
 
 
39546e5f
 #if  !defined(TCP_DONT_REUSEADDR) 
e6509c23
 	/* Stevens, "Network Programming", Section 7.5, "Generic Socket
      * Options": "...server started,..a child continues..on existing
 	 * connection..listening server is restarted...call to bind fails
 	 * ... ALL TCP servers should specify the SO_REUSEADDRE option 
 	 * to allow the server to be restarted in this situation
 	 *
 	 * Indeed, without this option, the server can't restart.
 	 *   -jiri
 	 */
 	optval=1;
 	if (setsockopt(sock_info->socket, SOL_SOCKET, SO_REUSEADDR,
 				(void*)&optval, sizeof(optval))==-1) {
 		LOG(L_ERR, "ERROR: tcp_init: setsockopt %s\n",
 			strerror(errno));
 		goto error;
 	}
 #endif
39546e5f
 	/* tos */
fe09f315
 	optval = tos;
39546e5f
 	if (setsockopt(sock_info->socket, IPPROTO_IP, IP_TOS, (void*)&optval, 
 				sizeof(optval)) ==-1){
 		LOG(L_WARN, "WARNING: tcp_init: setsockopt tos: %s\n", strerror(errno));
 		/* continue since this is not critical */
 	}
20c64cc6
 #ifdef HAVE_TCP_DEFER_ACCEPT
 	/* linux only */
22db42e4
 	if ((optval=cfg_get(tcp, tcp_cfg, defer_accept))){
20c64cc6
 		if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_DEFER_ACCEPT,
 					(void*)&optval, sizeof(optval)) ==-1){
 			LOG(L_WARN, "WARNING: tcp_init: setsockopt TCP_DEFER_ACCEPT %s\n",
 						strerror(errno));
 		/* continue since this is not critical */
 		}
 	}
 #endif /* HAVE_TCP_DEFFER_ACCEPT */
 #ifdef HAVE_TCP_SYNCNT
22db42e4
 	if ((optval=cfg_get(tcp, tcp_cfg, syncnt))){
20c64cc6
 		if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_SYNCNT, &optval,
 						sizeof(optval))<0){
 			LOG(L_WARN, "WARNING: tcp_init: failed to set"
 						" maximum SYN retr. count: %s\n", strerror(errno));
 		}
 	}
 #endif
 #ifdef HAVE_TCP_LINGER2
22db42e4
 	if ((optval=cfg_get(tcp, tcp_cfg, linger2))){
20c64cc6
 		if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_LINGER2, &optval,
 						sizeof(optval))<0){
 			LOG(L_WARN, "WARNING: tcp_init: failed to set"
 						" maximum LINGER2 timeout: %s\n", strerror(errno));
 		}
 	}
 #endif
 	init_sock_keepalive(sock_info->socket);
5b532c7f
 	if (bind(sock_info->socket, &addr->s, sockaddru_len(*addr))==-1){
0ba367ec
 		LOG(L_ERR, "ERROR: tcp_init: bind(%x, %p, %d) on %s:%d : %s\n",
 				sock_info->socket,  &addr->s, 
4d080f49
 				(unsigned)sockaddru_len(*addr),
5b532c7f
 				sock_info->address_str.s,
0ba367ec
 				sock_info->port_no,
5b532c7f
 				strerror(errno));
 		goto error;
 	}
1ce28841
 	if (listen(sock_info->socket, TCP_LISTEN_BACKLOG)==-1){
5b532c7f
 		LOG(L_ERR, "ERROR: tcp_init: listen(%x, %p, %d) on %s: %s\n",
 				sock_info->socket, &addr->s, 
4d080f49
 				(unsigned)sockaddru_len(*addr),
5b532c7f
 				sock_info->address_str.s,
 				strerror(errno));
 		goto error;
 	}
a0b4a4b9
 #ifdef HAVE_TCP_ACCEPT_FILTER
 	/* freebsd */
22db42e4
 	if (cfg_get(tcp, tcp_cfg, defer_accept)){
a0b4a4b9
 		memset(&afa, 0, sizeof(afa));
 		strcpy(afa.af_name, "dataready");
 		if (setsockopt(sock_info->socket, SOL_SOCKET, SO_ACCEPTFILTER,
 					(void*)&afa, sizeof(afa)) ==-1){
 			LOG(L_WARN, "WARNING: tcp_init: setsockopt SO_ACCEPTFILTER %s\n",
 						strerror(errno));
 		/* continue since this is not critical */
 		}
 	}
 #endif /* HAVE_TCP_ACCEPT_FILTER */
5b532c7f
 	
 	return 0;
 error:
 	if (sock_info->socket!=-1){
ab88df95
 		tcp_safe_close(sock_info->socket);
5b532c7f
 		sock_info->socket=-1;
 	}
 	return -1;
 }
 
 
 
19782e1c
 /* close tcp_main's fd from a tcpconn
db237b38
  * WARNING: call only in tcp_main context */
19782e1c
 inline static void tcpconn_close_main_fd(struct tcp_connection* tcpconn)
 {
 	int fd;
 	
 	
 	fd=tcpconn->s;
 #ifdef USE_TLS
3d4a77d8
 	if (tcpconn->type==PROTO_TLS || tcpconn->type==PROTO_WSS)
19782e1c
 		tls_close(tcpconn, fd);
 #endif
 #ifdef TCP_FD_CACHE
22db42e4
 	if (likely(cfg_get(tcp, tcp_cfg, fd_cache))) shutdown(fd, SHUT_RDWR);
19782e1c
 #endif /* TCP_FD_CACHE */
ab88df95
 	if (unlikely(tcp_safe_close(fd)<0))
5f653089
 		LOG(L_ERR, "ERROR: tcpconn_close_main_fd(%p): %s "
 					"close(%d) failed (flags 0x%x): %s (%d)\n", tcpconn,
 					su2a(&tcpconn->rcv.src_su, sizeof(tcpconn->rcv.src_su)),
 					fd, tcpconn->flags, strerror(errno), errno);
d89437a3
 	tcpconn->s=-1;
19782e1c
 }
 
 
 
 /* dec refcnt & frees the connection if refcnt==0
  * returns 1 if the connection is freed, 0 otherwise
  *
  * WARNING: use only from child processes */
 inline static int tcpconn_chld_put(struct tcp_connection* tcpconn)
 {
 	if (unlikely(atomic_dec_and_test(&tcpconn->refcnt))){
d22b82a0
 		DBG("tcpconn_chld_put: destroying connection %p (%d, %d) "
19782e1c
 				"flags %04x\n", tcpconn, tcpconn->id,
 				tcpconn->s, tcpconn->flags);
 		/* sanity checks */
23dc5abf
 		membar_read_atomic_op(); /* make sure we see the current flags */
efc23dce
 		if (unlikely(!(tcpconn->flags & F_CONN_FD_CLOSED) ||
 			(tcpconn->flags &
 				(F_CONN_HASHED|F_CONN_MAIN_TIMER|
 				 F_CONN_READ_W|F_CONN_WRITE_W)) )){
d22b82a0
 			LOG(L_CRIT, "BUG: tcpconn_chld_put: %p bad flags = %0x\n",
19782e1c
 					tcpconn, tcpconn->flags);
 			abort();
 		}
 		_tcpconn_free(tcpconn); /* destroys also the wbuf_q if still present*/
 		return 1;
 	}
 	return 0;
 }
 
 
 
 /* simple destroy function (the connection should be already removed
ce51fbb8
  * from the hashes. refcnt 0 and the fds should not be watched anymore for IO)
19782e1c
  */
 inline static void tcpconn_destroy(struct tcp_connection* tcpconn)
 {
 		DBG("tcpconn_destroy: destroying connection %p (%d, %d) "
 				"flags %04x\n", tcpconn, tcpconn->id,
 				tcpconn->s, tcpconn->flags);
 		if (unlikely(tcpconn->flags & F_CONN_HASHED)){
 			LOG(L_CRIT, "BUG: tcpconn_destroy: called with hashed"
 						" connection (%p)\n", tcpconn);
 			/* try to continue */
d22b82a0
 			if (likely(tcpconn->flags & F_CONN_MAIN_TIMER))
 				local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
19782e1c
 			TCPCONN_LOCK;
 				_tcpconn_detach(tcpconn);
42d1a155
 				tcpconn->flags &= ~(F_CONN_HASHED|F_CONN_MAIN_TIMER);
19782e1c
 			TCPCONN_UNLOCK;
 		}
 		if (likely(!(tcpconn->flags & F_CONN_FD_CLOSED))){
 			tcpconn_close_main_fd(tcpconn);
5f653089
 			tcpconn->flags|=F_CONN_FD_CLOSED;
19782e1c
 			(*tcp_connections_no)--;
3d4a77d8
 			if (unlikely(tcpconn->type==PROTO_TLS || tcpconn->type==PROTO_WSS))
61f8b970
 				(*tls_connections_no)--;
19782e1c
 		}
 		_tcpconn_free(tcpconn); /* destroys also the wbuf_q if still present*/
 }
 
 
 
 /* tries to destroy the connection: dec. refcnt and if 0 destroys the
  *  connection, else it will mark it as BAD and close the main fds
  *
  * returns 1 if the connection was destroyed, 0 otherwise
  *
  * WARNING: - the connection _has_ to be removed from the hash and timer
  *  first (use tcpconn_try_unhash() for this )
  *         - the fd should not be watched anymore (io_watch_del()...)
  *         - must be called _only_ from the tcp_main process context
  *          (or else the fd will remain open)
  */
 inline static int tcpconn_put_destroy(struct tcp_connection* tcpconn)
 {
0c7e84ff
 	if (unlikely((tcpconn->flags &
fc665070
 			(F_CONN_WRITE_W|F_CONN_HASHED|F_CONN_MAIN_TIMER|F_CONN_READ_W)) )){
19782e1c
 		/* sanity check */
23dc5abf
 		if (unlikely(tcpconn->flags & F_CONN_HASHED)){
 			LOG(L_CRIT, "BUG: tcpconn_destroy: called with hashed and/or"
 						"on timer connection (%p), flags = %0x\n",
 						tcpconn, tcpconn->flags);
 			/* try to continue */
 			if (likely(tcpconn->flags & F_CONN_MAIN_TIMER))
 				local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
 			TCPCONN_LOCK;
 				_tcpconn_detach(tcpconn);
42d1a155
 				tcpconn->flags &= ~(F_CONN_HASHED|F_CONN_MAIN_TIMER);
23dc5abf
 			TCPCONN_UNLOCK;
 		}else{
 			LOG(L_CRIT, "BUG: tcpconn_put_destroy: %p flags = %0x\n",
19782e1c
 					tcpconn, tcpconn->flags);
23dc5abf
 		}
19782e1c
 	}
23dc5abf
 	tcpconn->state=S_CONN_BAD;
 	/* in case it's still in a reader timer */
 	tcpconn->timeout=get_ticks_raw();
 	/* fast close: close fds now */
 	if (likely(!(tcpconn->flags & F_CONN_FD_CLOSED))){
 		tcpconn_close_main_fd(tcpconn);
 		tcpconn->flags|=F_CONN_FD_CLOSED;
 		(*tcp_connections_no)--;
3d4a77d8
 		if (unlikely(tcpconn->type==PROTO_TLS || tcpconn->type==PROTO_WSS))
61f8b970
 				(*tls_connections_no)--;
23dc5abf
 	}
 	/* all the flags / ops on the tcpconn must be done prior to decrementing
 	 * the refcnt. and at least a membar_write_atomic_op() mem. barrier or
 	 *  a mb_atomic_* op must * be used to make sure all the changed flags are
 	 *  written into memory prior to the new refcnt value */
 	if (unlikely(mb_atomic_dec_and_test(&tcpconn->refcnt))){
 		_tcpconn_free(tcpconn);
19782e1c
 		return 1;
 	}
 	return 0;
 }
 
 
 
 /* try to remove a connection from the hashes and timer.
  * returns 1 if the connection was removed, 0 if not (connection not in
  *  hash)
  *
  * WARNING: call it only in the  tcp_main process context or else the
  *  timer removal won't work.
  */
 inline static int tcpconn_try_unhash(struct tcp_connection* tcpconn)
 {
 	if (likely(tcpconn->flags & F_CONN_HASHED)){
 		tcpconn->state=S_CONN_BAD;
d22b82a0
 		if (likely(tcpconn->flags & F_CONN_MAIN_TIMER)){
19782e1c
 			local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
d22b82a0
 			tcpconn->flags&=~F_CONN_MAIN_TIMER;
 		}else
19782e1c
 			/* in case it's still in a reader timer */
 			tcpconn->timeout=get_ticks_raw();
 		TCPCONN_LOCK;
d22b82a0
 			if (tcpconn->flags & F_CONN_HASHED){
 				tcpconn->flags&=~F_CONN_HASHED;
 				_tcpconn_detach(tcpconn);
 				TCPCONN_UNLOCK;
 			}else{
 				/* tcp_send was faster and did unhash it itself */
 				TCPCONN_UNLOCK;
 				return 0;
 			}
76cb799e
 #ifdef TCP_ASYNC
19782e1c
 		/* empty possible write buffers (optional) */
 		if (unlikely(_wbufq_non_empty(tcpconn))){
 			lock_get(&tcpconn->write_lock);
 				/* check again, while holding the lock */
 				if (likely(_wbufq_non_empty(tcpconn)))
 					_wbufq_destroy(&tcpconn->wbuf_q);
 			lock_release(&tcpconn->write_lock);
 		}
76cb799e
 #endif /* TCP_ASYNC */
19782e1c
 		return 1;
 	}
 	return 0;
 }
746f7674
 
 
 
98f3d5e2
 #ifdef SEND_FD_QUEUE
 struct send_fd_info{
 	struct tcp_connection* tcp_conn;
746f7674
 	ticks_t expire;
98f3d5e2
 	int unix_sock;
746f7674
 	unsigned int retries; /* debugging */
98f3d5e2
 };
 
 struct tcp_send_fd_q{
 	struct send_fd_info* data; /* buffer */
 	struct send_fd_info* crt;  /* pointer inside the buffer */
 	struct send_fd_info* end;  /* points after the last valid position */
 };
 
 
 static struct tcp_send_fd_q send2child_q;
 
 
 
 static int send_fd_queue_init(struct tcp_send_fd_q *q, unsigned int size)
5b532c7f
 {
98f3d5e2
 	q->data=pkg_malloc(size*sizeof(struct send_fd_info));
 	if (q->data==0){
 		LOG(L_ERR, "ERROR: send_fd_queue_init: out of memory\n");
06aaa54f
 		return -1;
 	}
98f3d5e2
 	q->crt=&q->data[0];
 	q->end=&q->data[size];
06aaa54f
 	return 0;
5b532c7f
 }
 
98f3d5e2
 static void send_fd_queue_destroy(struct tcp_send_fd_q *q)
 {
 	if (q->data){
 		pkg_free(q->data);
 		q->data=0;
 		q->crt=q->end=0;
 	}
 }
5b532c7f
 
98f3d5e2
 
 
b264d2c6
 static int init_send_fd_queues(void)
f6e50f08
 {
98f3d5e2
 	if (send_fd_queue_init(&send2child_q, SEND_FD_QUEUE_SIZE)!=0)
 		goto error;
 	return 0;
 error:
 	LOG(L_ERR, "ERROR: init_send_fd_queues: init failed\n");
 	return -1;
 }
 
 
 
b264d2c6
 static void destroy_send_fd_queues(void)
98f3d5e2
 {
 	send_fd_queue_destroy(&send2child_q);
 }
 
 
 
 
 inline static int send_fd_queue_add(	struct tcp_send_fd_q* q, 
 										int unix_sock,
 										struct tcp_connection *t)
 {
 	struct send_fd_info* tmp;
 	unsigned long new_size;
f6e50f08
 	
98f3d5e2
 	if (q->crt>=q->end){
 		new_size=q->end-&q->data[0];
 		if (new_size< MAX_SEND_FD_QUEUE_SIZE/2){
 			new_size*=2;
 		}else new_size=MAX_SEND_FD_QUEUE_SIZE;
ccb7fda2
 		if (unlikely(q->crt>=&q->data[new_size])){
98f3d5e2
 			LOG(L_ERR, "ERROR: send_fd_queue_add: queue full: %ld/%ld\n",
d9515405
 					(long)(q->crt-&q->data[0]-1), new_size);
98f3d5e2
 			goto error;
 		}
 		LOG(L_CRIT, "INFO: send_fd_queue: queue full: %ld, extending to %ld\n",
d9515405
 				(long)(q->end-&q->data[0]), new_size);
98f3d5e2
 		tmp=pkg_realloc(q->data, new_size*sizeof(struct send_fd_info));
ccb7fda2
 		if (unlikely(tmp==0)){
98f3d5e2
 			LOG(L_ERR, "ERROR: send_fd_queue_add: out of memory\n");
 			goto error;
 		}
 		q->crt=(q->crt-&q->data[0])+tmp;
 		q->data=tmp;
 		q->end=&q->data[new_size];
0ba367ec
 	}
98f3d5e2
 	q->crt->tcp_conn=t;
 	q->crt->unix_sock=unix_sock;
746f7674
 	q->crt->expire=get_ticks_raw()+SEND_FD_QUEUE_TIMEOUT;
98f3d5e2
 	q->crt->retries=0;
 	q->crt++;
 	return 0;
 error:
 	return -1;
 }
 
 
 
 inline static void send_fd_queue_run(struct tcp_send_fd_q* q)
 {
 	struct send_fd_info* p;
 	struct send_fd_info* t;
0ba367ec
 	
98f3d5e2
 	for (p=t=&q->data[0]; p<q->crt; p++){
9da6fae7
 		if (unlikely(p->tcp_conn->state == S_CONN_BAD ||
 					 p->tcp_conn->flags & F_CONN_FD_CLOSED ||
 					 p->tcp_conn->s ==-1)) {
 			/* bad and/or already closed connection => remove */
 			goto rm_con;
 		}
ccb7fda2
 		if (unlikely(send_fd(p->unix_sock, &(p->tcp_conn),
 					sizeof(struct tcp_connection*), p->tcp_conn->s)<=0)){
28260509
 			if ( ((errno==EAGAIN)||(errno==EWOULDBLOCK)) && 
746f7674
 							((s_ticks_t)(p->expire-get_ticks_raw())>0)){
98f3d5e2
 				/* leave in queue for a future try */
 				*t=*p;
 				t->retries++;
 				t++;
 			}else{
28260509
 				LOG(L_ERR, "ERROR: run_send_fd_queue: send_fd failed"
746f7674
 						   " on socket %d , queue entry %ld, retries %d,"
 						   " connection %p, tcp socket %d, errno=%d (%s) \n",
d9515405
 						   p->unix_sock, (long)(p-&q->data[0]), p->retries,
746f7674
 						   p->tcp_conn, p->tcp_conn->s, errno,
 						   strerror(errno));
9da6fae7
 rm_con:
76cb799e
 #ifdef TCP_ASYNC
9da6fae7
 				/* if a connection is on the send_fd queue it means it's
 				   not watched for read anymore => could be watched only for
 				   write */
885b9f62
 				if (p->tcp_conn->flags & F_CONN_WRITE_W){
 					io_watch_del(&io_h, p->tcp_conn->s, -1, IO_FD_CLOSING);
 					p->tcp_conn->flags &=~F_CONN_WRITE_W;
 				}
 #endif
 				p->tcp_conn->flags &= ~F_CONN_READER;
19782e1c
 				if (likely(tcpconn_try_unhash(p->tcp_conn)))
 					tcpconn_put(p->tcp_conn);
 				tcpconn_put_destroy(p->tcp_conn); /* dec refcnt & destroy */
98f3d5e2
 			}
bc977837
 		}
f6e50f08
 	}
98f3d5e2
 	q->crt=t;
f6e50f08
 }
98f3d5e2
 #else
 #define send_fd_queue_run(q)
 #endif
f6e50f08
 
 
885b9f62
 /* non blocking write() on a tcpconnection, unsafe version (should be called
  * while holding  c->write_lock). The fd should be non-blocking.
  *  returns number of bytes written on success, -1 on error (and sets errno)
  */
ce51fbb8
 int _tcpconn_write_nb(int fd, struct tcp_connection* c,
5f653089
 									const char* buf, int len)
885b9f62
 {
 	int n;
 	
 again:
ce51fbb8
 	n=send(fd, buf, len,
885b9f62
 #ifdef HAVE_MSG_NOSIGNAL
 					MSG_NOSIGNAL
 #else
 					0
 #endif /* HAVE_MSG_NOSIGNAL */
 			  );
 	if (unlikely(n<0)){
 		if (errno==EINTR) goto again;
 	}
 	return n;
 }
 
 
0ba367ec
 
 /* handles io from a tcp child process
  * params: tcp_c - pointer in the tcp_children array, to the entry for
  *                 which an io event was detected 
  *         fd_i  - fd index in the fd_array (usefull for optimizing
  *                 io_watch_deletes)
  * returns:  handle_* return convention: -1 on error, 0 on EAGAIN (no more
  *           io events queued), >0 on success. success/error refer only to
  *           the reads from the fd.
  */
 inline static int handle_tcp_child(struct tcp_child* tcp_c, int fd_i)
5b532c7f
 {
 	struct tcp_connection* tcpconn;
 	long response[2];
0c5da34b
 	int cmd;
6a157851
 	int bytes;
885b9f62
 	int n;
ccb7fda2
 	ticks_t t;
20863813
 	ticks_t crt_timeout;
3dc4f620
 	ticks_t con_lifetime;
0ba367ec
 	
ccb7fda2
 	if (unlikely(tcp_c->unix_sock<=0)){
0ba367ec
 		/* (we can't have a fd==0, 0 is never closed )*/
 		LOG(L_CRIT, "BUG: handle_tcp_child: fd %d for %d "
ca351abe
 				"(pid %ld, ser no %d)\n", tcp_c->unix_sock,
 				(int)(tcp_c-&tcp_children[0]), (long)tcp_c->pid,
 				 tcp_c->proc_no);
0ba367ec
 		goto error;
 	}
 	/* read until sizeof(response)
 	 * (this is a SOCK_STREAM so read is not atomic) */
 	bytes=recv_all(tcp_c->unix_sock, response, sizeof(response), MSG_DONTWAIT);
ccb7fda2
 	if (unlikely(bytes<(int)sizeof(response))){
0ba367ec
 		if (bytes==0){
 			/* EOF -> bad, child has died */
ca351abe
 			DBG("DBG: handle_tcp_child: dead tcp child %d (pid %ld, no %d)"
0ba367ec
 					" (shutting down?)\n", (int)(tcp_c-&tcp_children[0]), 
ca351abe
 					(long)tcp_c->pid, tcp_c->proc_no );
741a9937
 			/* don't listen on it any more */
 			io_watch_del(&io_h, tcp_c->unix_sock, fd_i, 0); 
0ba367ec
 			goto error; /* eof. so no more io here, it's ok to return error */
 		}else if (bytes<0){
 			/* EAGAIN is ok if we try to empty the buffer
 			 * e.g.: SIGIO_RT overflow mode or EPOLL ET */
 			if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
2b04f2a8
 				LOG(L_CRIT, "ERROR: handle_tcp_child: read from tcp child %ld "
ca351abe
 						" (pid %ld, no %d) %s [%d]\n",
 						(long)(tcp_c-&tcp_children[0]), (long)tcp_c->pid,
2b04f2a8
 						tcp_c->proc_no, strerror(errno), errno );
0ba367ec
 			}else{
 				bytes=0;
 			}
 			/* try to ignore ? */
 			goto end;
 		}else{
 			/* should never happen */
 			LOG(L_CRIT, "BUG: handle_tcp_child: too few bytes received (%d)\n",
 					bytes );
 			bytes=0; /* something was read so there is no error; otoh if
 					  receive_fd returned less then requested => the receive
 					  buffer is empty => no more io queued on this fd */
 			goto end;
 		}
 	}
 	
 	DBG("handle_tcp_child: reader response= %lx, %ld from %d \n",
 					response[0], response[1], (int)(tcp_c-&tcp_children[0]));
 	cmd=response[1];
 	tcpconn=(struct tcp_connection*)response[0];
ccb7fda2
 	if (unlikely(tcpconn==0)){
0ba367ec
 		/* should never happen */
 		LOG(L_CRIT, "BUG: handle_tcp_child: null tcpconn pointer received"
ca351abe
 				 " from tcp child %d (pid %ld): %lx, %lx\n",
 				 	(int)(tcp_c-&tcp_children[0]), (long)tcp_c->pid,
0ba367ec
 					response[0], response[1]) ;
 		goto end;
 	}
 	switch(cmd){
 		case CONN_RELEASE:
 			tcp_c->busy--;
19782e1c
 			if (unlikely(tcpconn_put(tcpconn))){
504ef98e
 				/* if refcnt was 1 => it was used only in the
 				   tcp reader => it's not hashed or watched for IO
 				   anymore => no need to io_watch_del() */
19782e1c
 				tcpconn_destroy(tcpconn);
 				break;
 			}
ccb7fda2
 			if (unlikely(tcpconn->state==S_CONN_BAD)){ 
504ef98e
 				if (tcpconn_try_unhash(tcpconn)) {
76cb799e
 #ifdef TCP_ASYNC
504ef98e
 					if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
 						io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
 						tcpconn->flags &= ~F_CONN_WRITE_W;
 					}
 #endif /* TCP_ASYNC */
 					tcpconn_put_destroy(tcpconn);
 				}
 #ifdef TCP_ASYNC
 				 else if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
 					/* should never happen: if it's already unhashed, it
 					   should not be watched for IO */
 					BUG("unhashed connection watched for write\n");
 					io_watch_del(&io_h, tcpconn->s, -1, 0);
885b9f62
 					tcpconn->flags &= ~F_CONN_WRITE_W;
 				}
76cb799e
 #endif /* TCP_ASYNC */
0ba367ec
 				break;
 			}
 			/* update the timeout*/
ccb7fda2
 			t=get_ticks_raw();
3dc4f620
 			con_lifetime=cfg_get(tcp, tcp_cfg, con_lifetime);
 			tcpconn->timeout=t+con_lifetime;
 			crt_timeout=con_lifetime;
76cb799e
 #ifdef TCP_ASYNC
 			if (unlikely(cfg_get(tcp, tcp_cfg, async) && 
20863813
 							_wbufq_non_empty(tcpconn) )){
dda6ba60
 				if (unlikely(TICKS_GE(t, tcpconn->wbuf_q.wr_timeout))){
20863813
 					DBG("handle_tcp_child: wr. timeout on CONN_RELEASE for %p "
 							"refcnt= %d\n", tcpconn,
 							atomic_get(&tcpconn->refcnt));
 					/* timeout */
7bb2b4ca
 					if (unlikely(tcpconn->state==S_CONN_CONNECT)){
38429f23
 #ifdef USE_DST_BLACKLIST
a6c250c0
 						(void)dst_blacklist_su( BLST_ERR_CONNECT,
5d6752dc
 											tcpconn->rcv.proto,
 											&tcpconn->rcv.src_su,
 											&tcpconn->send_flags, 0);
7bb2b4ca
 #endif /* USE_DST_BLACKLIST */
 						TCP_EV_CONNECT_TIMEOUT(0, TCP_LADDR(tcpconn),
 										TCP_LPORT(tcpconn), TCP_PSU(tcpconn),
 										TCP_PROTO(tcpconn));
 						TCP_STATS_CONNECT_FAILED();
 					}else{
 #ifdef USE_DST_BLACKLIST
a6c250c0
 						(void)dst_blacklist_su( BLST_ERR_SEND,
5d6752dc
 											tcpconn->rcv.proto,
 											&tcpconn->rcv.src_su,
 											&tcpconn->send_flags, 0);
38429f23
 #endif /* USE_DST_BLACKLIST */
7bb2b4ca
 						TCP_EV_SEND_TIMEOUT(0, &tcpconn->rcv);
 						TCP_STATS_SEND_TIMEOUT();
 					}
504ef98e
 					if (tcpconn_try_unhash(tcpconn)) {
 						if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
 							io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
 							tcpconn->flags&=~F_CONN_WRITE_W;
 						}
 						tcpconn_put_destroy(tcpconn);
 					} else if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
 						BUG("unhashed connection watched for write\n");
 						io_watch_del(&io_h, tcpconn->s, -1, 0);
20863813
 						tcpconn->flags&=~F_CONN_WRITE_W;
 					}
 					break;
 				}else{
3dc4f620
 					crt_timeout=MIN_unsigned(con_lifetime,
20863813
 											tcpconn->wbuf_q.wr_timeout-t);
 				}
 			}
76cb799e
 #endif /* TCP_ASYNC */
ccb7fda2
 			/* re-activate the timer */
 			tcpconn->timer.f=tcpconn_main_timeout;
8b0472d7
 			local_timer_reinit(&tcpconn->timer);
20863813
 			local_timer_add(&tcp_main_ltimer, &tcpconn->timer, crt_timeout, t);
3135b4bb
 			/* must be after the de-ref*/
92c0024c
 			tcpconn->flags|=(F_CONN_MAIN_TIMER|F_CONN_READ_W|F_CONN_WANTS_RD);
fc665070
 			tcpconn->flags&=~(F_CONN_READER|F_CONN_OOB_DATA);
76cb799e
 #ifdef TCP_ASYNC
885b9f62
 			if (unlikely(tcpconn->flags & F_CONN_WRITE_W))
 				n=io_watch_chg(&io_h, tcpconn->s, POLLIN| POLLOUT, -1);
 			else
76cb799e
 #endif /* TCP_ASYNC */
885b9f62
 				n=io_watch_add(&io_h, tcpconn->s, POLLIN, F_TCPCONN, tcpconn);
 			if (unlikely(n<0)){
ccb7fda2
 				LOG(L_CRIT, "ERROR: tcp_main: handle_tcp_child: failed to add"
 						" new socket to the fd list\n");
fc665070
 				tcpconn->flags&=~F_CONN_READ_W;
504ef98e
 				if (tcpconn_try_unhash(tcpconn)) {
76cb799e
 #ifdef TCP_ASYNC
504ef98e
 					if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
 						io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
 						tcpconn->flags&=~F_CONN_WRITE_W;
 					}
 #endif /* TCP_ASYNC */
 					tcpconn_put_destroy(tcpconn);
 				}
 #ifdef TCP_ASYNC
 				 else if (unlikely(tcpconn->flags & F_CONN_WRITE_W)) {
 					BUG("unhashed connection watched for write\n");
 					io_watch_del(&io_h, tcpconn->s, -1, 0);
885b9f62
 					tcpconn->flags&=~F_CONN_WRITE_W;
 				}
76cb799e
 #endif /* TCP_ASYNC */
19782e1c
 				break;
ccb7fda2
 			}
0ba367ec
 			DBG("handle_tcp_child: CONN_RELEASE  %p refcnt= %d\n", 
be7401cc
 							tcpconn, atomic_get(&tcpconn->refcnt));
0ba367ec
 			break;
 		case CONN_ERROR:
 		case CONN_DESTROY:
 		case CONN_EOF:
 			/* WARNING: this will auto-dec. refcnt! */
 				tcp_c->busy--;
 				/* main doesn't listen on it => we don't have to delete it
 				 if (tcpconn->s!=-1)
741a9937
 					io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
0ba367ec
 				*/
76cb799e
 #ifdef TCP_ASYNC
885b9f62
 				if ((tcpconn->flags & F_CONN_WRITE_W) && (tcpconn->s!=-1)){
 					io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
 					tcpconn->flags&=~F_CONN_WRITE_W;
 				}
76cb799e
 #endif /* TCP_ASYNC */
19782e1c
 				if (tcpconn_try_unhash(tcpconn))
 					tcpconn_put(tcpconn);
 				tcpconn_put_destroy(tcpconn); /* deref & delete if refcnt==0 */
0ba367ec
 				break;
 		default:
 				LOG(L_CRIT, "BUG: handle_tcp_child:  unknown cmd %d"
 									" from tcp reader %d\n",
 									cmd, (int)(tcp_c-&tcp_children[0]));
 	}
 end:
 	return bytes;
 error:
 	return -1;
 }
 
 
 
 /* handles io from a "generic" ser process (get fd or new_fd from a tcp_send)
  * 
  * params: p     - pointer in the ser processes array (pt[]), to the entry for
  *                 which an io event was detected
  *         fd_i  - fd index in the fd_array (usefull for optimizing
  *                 io_watch_deletes)
  * returns:  handle_* return convention:
  *          -1 on error reading from the fd,
  *           0 on EAGAIN  or when no  more io events are queued 
  *             (receive buffer empty),
  *           >0 on successfull reads from the fd (the receive buffer might
  *             be non-empty).
  */
 inline static int handle_ser_child(struct process_table* p, int fd_i)
 {
 	struct tcp_connection* tcpconn;
9da6fae7
 	struct tcp_connection* tmp;
0ba367ec
 	long response[2];
 	int cmd;
 	int bytes;
 	int ret;
744a2341
 	int fd;
885b9f62
 	int flags;
ccb7fda2
 	ticks_t t;
3dc4f620
 	ticks_t con_lifetime;
76cb799e
 #ifdef TCP_ASYNC
dda6ba60
 	ticks_t nxt_timeout;
76cb799e
 #endif /* TCP_ASYNC */
0ba367ec
 	
 	ret=-1;
ccb7fda2
 	if (unlikely(p->unix_sock<=0)){
0ba367ec
 		/* (we can't have a fd==0, 0 is never closed )*/
 		LOG(L_CRIT, "BUG: handle_ser_child: fd %d for %d "
 				"(pid %d)\n", p->unix_sock, (int)(p-&pt[0]), p->pid);
 		goto error;
 	}
 			
 	/* get all bytes and the fd (if transmitted)
 	 * (this is a SOCK_STREAM so read is not atomic) */
 	bytes=receive_fd(p->unix_sock, response, sizeof(response), &fd,
 						MSG_DONTWAIT);
ccb7fda2
 	if (unlikely(bytes<(int)sizeof(response))){
0ba367ec
 		/* too few bytes read */
 		if (bytes==0){
 			/* EOF -> bad, child has died */
 			DBG("DBG: handle_ser_child: dead child %d, pid %d"
 					" (shutting down?)\n", (int)(p-&pt[0]), p->pid);
 			/* don't listen on it any more */
741a9937
 			io_watch_del(&io_h, p->unix_sock, fd_i, 0);
0ba367ec
 			goto error; /* child dead => no further io events from it */
 		}else if (bytes<0){
 			/* EAGAIN is ok if we try to empty the buffer
 			 * e.g: SIGIO_RT overflow mode or EPOLL ET */
 			if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
 				LOG(L_CRIT, "ERROR: handle_ser_child: read from child %d  "
 						"(pid %d):  %s [%d]\n", (int)(p-&pt[0]), p->pid,
 						strerror(errno), errno);
 				ret=-1;
 			}else{
 				ret=0;
 			}
 			/* try to ignore ? */
 			goto end;
 		}else{
 			/* should never happen */
 			LOG(L_CRIT, "BUG: handle_ser_child: too few bytes received (%d)\n",
 					bytes );
 			ret=0; /* something was read so there is no error; otoh if
 					  receive_fd returned less then requested => the receive
 					  buffer is empty => no more io queued on this fd */
 			goto end;
 		}
 	}
 	ret=1; /* something was received, there might be more queued */
 	DBG("handle_ser_child: read response= %lx, %ld, fd %d from %d (%d)\n",
 					response[0], response[1], fd, (int)(p-&pt[0]), p->pid);
 	cmd=response[1];
 	tcpconn=(struct tcp_connection*)response[0];
ccb7fda2
 	if (unlikely(tcpconn==0)){
0ba367ec
 		LOG(L_CRIT, "BUG: handle_ser_child: null tcpconn pointer received"
 				 " from child %d (pid %d): %lx, %lx\n",
 				 	(int)(p-&pt[0]), p->pid, response[0], response[1]) ;
 		goto end;
 	}
 	switch(cmd){
 		case CONN_ERROR:
d22b82a0
 			LOG(L_ERR, "handle_ser_child: ERROR: received CON_ERROR for %p"
bf755a37
 					" (id %d), refcnt %d, flags 0x%0x\n",
 					tcpconn, tcpconn->id, atomic_get(&tcpconn->refcnt),
 					tcpconn->flags);
340ce466
 		case CONN_EOF: /* forced EOF after full send, due to send flags */
d22b82a0
 #ifdef TCP_CONNECT_WAIT
76cb63c6
 			/* if the connection is marked as pending => it might be on
 			 *  the way of reaching tcp_main (e.g. CONN_NEW_COMPLETE or
 			 *  CONN_NEW_PENDING_WRITE) =>  it cannot be destroyed here,
 			 *  it will be destroyed on CONN_NEW_COMPLETE /
 			 *  CONN_NEW_PENDING_WRITE or in the send error case by the
 			 *  sender process */
 			if (unlikely(tcpconn->flags & F_CONN_PENDING)) {
 				if (tcpconn_put(tcpconn))
 					tcpconn_destroy(tcpconn);
 				/* no need for io_watch_del(), if PENDING it should not
 				   be watched for anything in tcp_main */
 				break;
 			}
 #endif /* TCP_CONNECT_WAIT */
d22b82a0
 			if ( tcpconn_try_unhash(tcpconn) )
 				tcpconn_put(tcpconn);
fc665070
 			if ( ((tcpconn->flags & (F_CONN_WRITE_W|F_CONN_READ_W)) ) &&
 					(tcpconn->s!=-1)){
741a9937
 				io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
fc665070
 				tcpconn->flags&=~(F_CONN_WRITE_W|F_CONN_READ_W);
06c04bc2
 			}
19782e1c
 			tcpconn_put_destroy(tcpconn); /* dec refcnt & destroy on 0 */
0ba367ec
 			break;
 		case CONN_GET_FD:
 			/* send the requested FD  */
 			/* WARNING: take care of setting refcnt properly to
19782e1c
 			 * avoid race conditions */
9da6fae7
 			if (unlikely(tcpconn->state == S_CONN_BAD ||
 						(tcpconn->flags & F_CONN_FD_CLOSED) ||
 						tcpconn->s ==-1)) {
 				/* connection is already marked as bad and/or has no
 				   fd => don't try to send the fd (trying to send a
 				   closed fd _will_ fail) */
 				tmp = 0;
 				if (unlikely(send_all(p->unix_sock, &tmp, sizeof(tmp)) <= 0))
 					BUG("handle_ser_child: CONN_GET_FD: send_all failed\n");
 				/* no need to attempt to destroy the connection, it should
 				   be already in the process of being destroyed */
 			} else if (unlikely(send_fd(p->unix_sock, &tcpconn,
 										sizeof(tcpconn), tcpconn->s)<=0)){
 				LOG(L_ERR, "handle_ser_child: CONN_GET_FD:"
 							" send_fd failed\n");
 				/* try sending error (better then not sending anything) */
 				tmp = 0;
 				if (unlikely(send_all(p->unix_sock, &tmp, sizeof(tmp)) <= 0))
 					BUG("handle_ser_child: CONN_GET_FD:"
 							" send_fd send_all fallback failed\n");
0ba367ec
 			}
 			break;
 		case CONN_NEW:
 			/* update the fd in the requested tcpconn*/
 			/* WARNING: take care of setting refcnt properly to
19782e1c
 			 * avoid race conditions */
ccb7fda2
 			if (unlikely(fd==-1)){
0ba367ec
 				LOG(L_CRIT, "BUG: handle_ser_child: CONN_NEW:"
 							" no fd received\n");
19782e1c
 				tcpconn->flags|=F_CONN_FD_CLOSED;
 				tcpconn_put_destroy(tcpconn);
0ba367ec
 				break;
 			}
d9515405
 			(*tcp_connections_no)++;
61f8b970
 			if (unlikely(tcpconn->type==PROTO_TLS))
 				(*tls_connections_no)++;
0ba367ec
 			tcpconn->s=fd;
 			/* add tcpconn to the list*/
 			tcpconn_add(tcpconn);
 			/* update the timeout*/
ccb7fda2
 			t=get_ticks_raw();
3dc4f620
 			con_lifetime=cfg_get(tcp, tcp_cfg, con_lifetime);
 			tcpconn->timeout=t+con_lifetime;
20863813
 			/* activate the timer (already properly init. in tcpconn_new())
19782e1c
 			 * no need for reinit */
ccb7fda2
 			local_timer_add(&tcp_main_ltimer, &tcpconn->timer, 
3dc4f620
 								con_lifetime, t);
92c0024c
 			tcpconn->flags|=(F_CONN_MAIN_TIMER|F_CONN_READ_W|F_CONN_WANTS_RD)
76cb799e
 #ifdef TCP_ASYNC
92c0024c
 					/* not used for now, the connection is sent to tcp_main
 					 * before knowing whether we can write on it or we should 
 					 * wait */
 							| (((int)!(tcpconn->flags & F_CONN_WANTS_WR)-1)& 
 								F_CONN_WRITE_W)
76cb799e
 #endif /* TCP_ASYNC */
92c0024c
 				;
fc665070
 			tcpconn->flags&=~F_CONN_FD_CLOSED;
885b9f62
 			flags=POLLIN 
76cb799e
 #ifdef TCP_ASYNC
885b9f62
 					/* not used for now, the connection is sent to tcp_main
 					 * before knowing if we can write on it or we should 
 					 * wait */
92c0024c
 					| (((int)!(tcpconn->flags & F_CONN_WANTS_WR)-1) & POLLOUT)
76cb799e
 #endif /* TCP_ASYNC */
885b9f62
 					;
ccb7fda2
 			if (unlikely(
885b9f62
 					io_watch_add(&io_h, tcpconn->s, flags,
a0553f4e
 												F_TCPCONN, tcpconn)<0)){
ccb7fda2
 				LOG(L_CRIT, "ERROR: tcp_main: handle_ser_child: failed to add"
 						" new socket to the fd list\n");
fc665070
 				tcpconn->flags&=~(F_CONN_WRITE_W|F_CONN_READ_W);
19782e1c
 				tcpconn_try_unhash(tcpconn); /*  unhash & dec refcnt */
 				tcpconn_put_destroy(tcpconn);
ccb7fda2
 			}
0ba367ec
 			break;
76cb799e
 #ifdef TCP_ASYNC
885b9f62
 		case CONN_QUEUED_WRITE:
d22b82a0
 			/* received only if the wr. queue is empty and a write finishes
 			 * with EAGAIN (common after connect())
 			 * it should only enable write watching on the fd. The connection
0c7e84ff
 			 * should be  already in the hash. The refcnt is automatically
 			 * decremented.
d22b82a0
 			 */
0c7e84ff
 			/* auto-dec refcnt */
 			if (unlikely(tcpconn_put(tcpconn))){
 				tcpconn_destroy(tcpconn);
 				break;
 			}
 			if (unlikely((tcpconn->state==S_CONN_BAD) ||
19782e1c
 							!(tcpconn->flags & F_CONN_HASHED) ))
0c7e84ff
 				/* in the process of being destroyed => do nothing */
19782e1c
 				break;
92c0024c
 			if (!(tcpconn->flags & F_CONN_WANTS_WR)){
 				tcpconn->flags|=F_CONN_WANTS_WR;
2a45890d
 				t=get_ticks_raw();
 				if (likely((tcpconn->flags & F_CONN_MAIN_TIMER) && 
 					(TICKS_LT(tcpconn->wbuf_q.wr_timeout, tcpconn->timeout)) &&
 						TICKS_LT(t, tcpconn->wbuf_q.wr_timeout) )){
 					/* _wbufq_nonempty() is guaranteed here */
 					/* update the timer */
 					local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
 					local_timer_reinit(&tcpconn->timer);
 					local_timer_add(&tcp_main_ltimer, &tcpconn->timer,
 										tcpconn->wbuf_q.wr_timeout-t, t);
 					DBG("tcp_main: handle_ser_child: CONN_QUEUED_WRITE; %p "
 							"timeout adjusted to %d s\n", tcpconn, 
 							TICKS_TO_S(tcpconn->wbuf_q.wr_timeout-t));
 				}
92c0024c
 				if (!(tcpconn->flags & F_CONN_WRITE_W)){
 					tcpconn->flags|=F_CONN_WRITE_W;
 					if (!(tcpconn->flags & F_CONN_READ_W)){
 						if (unlikely(io_watch_add(&io_h, tcpconn->s, POLLOUT,
885b9f62
 												F_TCPCONN, tcpconn)<0)){
92c0024c
 							LOG(L_CRIT, "ERROR: tcp_main: handle_ser_child:"
 										" failed to enable write watch on"
 										" socket\n");
 							if (tcpconn_try_unhash(tcpconn))
 								tcpconn_put_destroy(tcpconn);
 							break;
 						}
 					}else{
 						if (unlikely(io_watch_chg(&io_h, tcpconn->s,
 													POLLIN|POLLOUT, -1)<0)){
 							LOG(L_CRIT, "ERROR: tcp_main: handle_ser_child:"
 									" failed to change socket watch events\n");
504ef98e
 							if (tcpconn_try_unhash(tcpconn)) {
 								io_watch_del(&io_h, tcpconn->s, -1,
 												IO_FD_CLOSING);
 								tcpconn->flags&=~F_CONN_READ_W;
92c0024c
 								tcpconn_put_destroy(tcpconn);
504ef98e
 							} else {
 								BUG("unhashed connection watched for IO\n");
 								io_watch_del(&io_h, tcpconn->s, -1, 0);
 								tcpconn->flags&=~F_CONN_READ_W;
 							}
92c0024c
 							break;
 						}
885b9f62
 					}
 				}
 			}else{
340ce466
 				LOG(L_WARN, "tcp_main: handler_ser_child: connection %p"
885b9f62
 							" already watched for write\n", tcpconn);
 			}
 			break;
d22b82a0
 #ifdef TCP_CONNECT_WAIT
 		case CONN_NEW_COMPLETE:
 		case CONN_NEW_PENDING_WRITE:
 				/* received when a pending connect completes in the same
 				 * tcp_send() that initiated it
efc23dce
 				 * the connection is already in the hash with F_CONN_PENDING
 				 * flag (added by tcp_send()) and refcnt at least 1 (for the
d22b82a0
 				 *  hash)*/
fc665070
 			tcpconn->flags&=~(F_CONN_PENDING|F_CONN_FD_CLOSED);
d22b82a0
 			if (unlikely((tcpconn->state==S_CONN_BAD) || (fd==-1))){
 				if (unlikely(fd==-1))
 					LOG(L_CRIT, "BUG: handle_ser_child: CONN_NEW_COMPLETE:"
 								" no fd received\n");
 				else
 					LOG(L_WARN, "WARNING: handle_ser_child: CONN_NEW_COMPLETE:"
 							" received connection with error\n");
 				tcpconn->flags|=F_CONN_FD_CLOSED;
 				tcpconn->state=S_CONN_BAD;
 				tcpconn_try_unhash(tcpconn);
 				tcpconn_put_destroy(tcpconn);
 				break;
 			}
 			(*tcp_connections_no)++;
61f8b970
 			if (unlikely(tcpconn->type==PROTO_TLS))
 				(*tls_connections_no)++;
d22b82a0
 			tcpconn->s=fd;
 			/* update the timeout*/
 			t=get_ticks_raw();
3dc4f620
 			con_lifetime=cfg_get(tcp, tcp_cfg, con_lifetime);
 			tcpconn->timeout=t+con_lifetime;
 			nxt_timeout=con_lifetime;
d22b82a0
 			if (unlikely(cmd==CONN_NEW_COMPLETE)){
 				/* check if needs to be watched for write */
 				lock_get(&tcpconn->write_lock);
 					/* if queue non empty watch it for write */
 					flags=(_wbufq_empty(tcpconn)-1)&POLLOUT;
 				lock_release(&tcpconn->write_lock);
dda6ba60
 				if (flags){
 					if (TICKS_LT(tcpconn->wbuf_q.wr_timeout, tcpconn->timeout)
 							&& TICKS_LT(t, tcpconn->wbuf_q.wr_timeout))
 						nxt_timeout=tcpconn->wbuf_q.wr_timeout-t;
 					tcpconn->flags|=F_CONN_WRITE_W|F_CONN_WANTS_WR;
 				}
 				/* activate the timer (already properly init. in 
 				   tcpconn_new())  no need for reinit */
 				local_timer_add(&tcp_main_ltimer, &tcpconn->timer, nxt_timeout,
 									t);
 				tcpconn->flags|=F_CONN_MAIN_TIMER|F_CONN_READ_W| 
 								F_CONN_WANTS_RD;
d22b82a0
 			}else{
 				/* CONN_NEW_PENDING_WRITE */
 				/* no need to check, we have something queued for write */
 				flags=POLLOUT;
dda6ba60
 				if (TICKS_LT(tcpconn->wbuf_q.wr_timeout, tcpconn->timeout)
 						&& TICKS_LT(t, tcpconn->wbuf_q.wr_timeout))
 					nxt_timeout=tcpconn->wbuf_q.wr_timeout-t;
 				/* activate the timer (already properly init. in 
 				   tcpconn_new())  no need for reinit */
 				local_timer_add(&tcp_main_ltimer, &tcpconn->timer, nxt_timeout,
 									t);
 				tcpconn->flags|=F_CONN_MAIN_TIMER|F_CONN_READ_W| 
 								F_CONN_WANTS_RD |
 								F_CONN_WRITE_W|F_CONN_WANTS_WR;
d22b82a0
 			}
 			flags|=POLLIN;
 			if (unlikely(
 					io_watch_add(&io_h, tcpconn->s, flags,
 												F_TCPCONN, tcpconn)<0)){
 				LOG(L_CRIT, "ERROR: tcp_main: handle_ser_child: failed to add"
 						" new socket to the fd list\n");
fc665070
 				tcpconn->flags&=~(F_CONN_WRITE_W|F_CONN_READ_W);
d22b82a0
 				tcpconn_try_unhash(tcpconn); /*  unhash & dec refcnt */
 				tcpconn_put_destroy(tcpconn);
 			}
 			break;
 #endif /* TCP_CONNECT_WAIT */
76cb799e
 #endif /* TCP_ASYNC */
0ba367ec
 		default:
 			LOG(L_CRIT, "BUG: handle_ser_child: unknown cmd %d\n", cmd);
 	}
 end:
 	return ret;
 error:
 	return -1;
 }
 
 
 
98f3d5e2
 /* sends a tcpconn + fd to a choosen child */
 inline static int send2child(struct tcp_connection* tcpconn)
 {
 	int i;
 	int min_busy;
 	int idx;
edf5e385
 	int wfirst;
 	int wlast;
746f7674
 	static int crt=0; /* current child */
 	int last;
98f3d5e2
 	
edf5e385
 	if(likely(tcp_sockets_gworkers==0)) {
 		/* no child selection based on received socket
 		 * - use least loaded over all */
 		min_busy=tcp_children[0].busy;
 		idx=0;
 		last=crt+tcp_children_no;
 		for (; crt<last; crt++){
 			i=crt%tcp_children_no;
 			if (!tcp_children[i].busy){
 				idx=i;
 				min_busy=0;
 				break;
 			}else if (min_busy>tcp_children[i].busy){
 				min_busy=tcp_children[i].busy;
 				idx=i;
 			}
 		}
 		crt=idx+1; /* next time we start with crt%tcp_children_no */
 	} else {
 		/* child selection based on received socket
 		 * - use least loaded per received socket, starting with the first
 		 *   in its group */
 		if(tcpconn->rcv.bind_address->workers>0) {
 			wfirst = tcpconn->rcv.bind_address->workers_tcpidx;
 			wlast = wfirst + tcpconn->rcv.bind_address->workers;
 			LM_DBG("===== checking per-socket specific workers (%d/%d..%d/%d) [%s]\n",
 					tcp_children[wfirst].pid, tcp_children[wfirst].proc_no,
 					tcp_children[wlast-1].pid, tcp_children[wlast-1].proc_no,
 					tcpconn->rcv.bind_address->sock_str.s);
 		} else {
 			wfirst = 0;
 			wlast = tcp_sockets_gworkers - 1;
 			LM_DBG("+++++ checking per-socket generic workers (%d/%d..%d/%d) [%s]\n",
 					tcp_children[wfirst].pid, tcp_children[wfirst].proc_no,
 					tcp_children[wlast-1].pid, tcp_children[wlast-1].proc_no,
 					tcpconn->rcv.bind_address->sock_str.s);
 		}
 		idx = wfirst;
 		min_busy = tcp_children[idx].busy;
 		for(i=wfirst; i<wlast; i++) {
 			if (!tcp_children[i].busy){
 				idx=i;
 				min_busy=0;
 				break;
 			} else {
 				if (min_busy>tcp_children[i].busy) {
 					min_busy=tcp_children[i].busy;
 					idx=i;
 				}
 			}
98f3d5e2
 		}
 	}
 	
 	tcp_children[idx].busy++;
 	tcp_children[idx].n_reqs++;
ccb7fda2
 	if (unlikely(min_busy)){
98f3d5e2
 		DBG("WARNING: send2child: no free tcp receiver, "
 				" connection passed to the least busy one (%d)\n",
 				min_busy);
 	}
edf5e385
 	LM_DBG("selected tcp worker %d %d(%ld) for activity on [%s], %p\n",
 			idx, tcp_children[idx].proc_no, (long)tcp_children[idx].pid,
 			tcpconn->rcv.bind_address->sock_str.s, tcpconn);
98f3d5e2
 	/* first make sure this child doesn't have pending request for
 	 * tcp_main (to avoid a possible deadlock: e.g. child wants to
 	 * send a release command, but the master fills its socket buffer
 	 * with new connection commands => deadlock) */
 	/* answer tcp_send requests first */
d89437a3
 	while(unlikely((tcpconn->state != S_CONN_BAD) &&
 					(handle_ser_child(&pt[tcp_children[idx].proc_no], -1)>0)));
98f3d5e2
 	/* process tcp readers requests */
d89437a3
 	while(unlikely((tcpconn->state != S_CONN_BAD &&
 					(handle_tcp_child(&tcp_children[idx], -1)>0))));
 	
 	/* the above possible pending requests might have included a
 	   command to close this tcpconn (e.g. CONN_ERROR, CONN_EOF).
 	   In this case the fd is already closed here (and possible
 	   even replaced by another one with the same number) so it
 	   must not be sent to a reader anymore */
 	if (unlikely(tcpconn->state == S_CONN_BAD ||
 					(tcpconn->flags & F_CONN_FD_CLOSED)))
 		return -1;
98f3d5e2
 #ifdef SEND_FD_QUEUE
28260509
 	/* if queue full, try to queue the io */
ccb7fda2
 	if (unlikely(send_fd(tcp_children[idx].unix_sock, &tcpconn,
 							sizeof(tcpconn), tcpconn->s)<=0)){
28260509
 		if ((errno==EAGAIN)||(errno==EWOULDBLOCK)){
 			/* FIXME: remove after debugging */
746f7674
 			 LOG(L_CRIT, "INFO: tcp child %d, socket %d: queue full,"
 					 	" %d requests queued (total handled %d)\n",
 					idx, tcp_children[idx].unix_sock, min_busy,
 					tcp_children[idx].n_reqs-1);
28260509
 			if (send_fd_queue_add(&send2child_q, tcp_children[idx].unix_sock, 
98f3d5e2
 						tcpconn)!=0){
28260509
 				LOG(L_ERR, "ERROR: send2child: queue send op. failed\n");
 				return -1;
 			}
 		}else{
bf755a37
 			LOG(L_ERR, "ERROR: send2child: send_fd failed for %p (flags 0x%0x)"
 						", fd %d\n", tcpconn, tcpconn->flags, tcpconn->s);
746f7674
 			return -1;
28260509
 		}
98f3d5e2
 	}
 #else
ccb7fda2
 	if (unlikely(send_fd(tcp_children[idx].unix_sock, &tcpconn,
 						sizeof(tcpconn), tcpconn->s)<=0)){
bf755a37
 		LOG(L_ERR, "ERROR: send2child: send_fd failed for %p (flags 0x%0x)"
 					", fd %d\n", tcpconn, tcpconn->flags, tcpconn->s);
98f3d5e2
 		return -1;
 	}
 #endif
 	
 	return 0;
 }
 
 
 
 /* handles a new connection, called internally by tcp_main_loop/handle_io.
  * params: si - pointer to one of the tcp socket_info structures on which
  *              an io event was detected (connection attempt)
  * returns:  handle_* return convention: -1 on error, 0 on EAGAIN (no more
  *           io events queued), >0 on success. success/error refer only to
  *           the accept.
  */
 static inline int handle_new_connect(struct socket_info* si)
 {
 	union sockaddr_union su;
a6357e25
 	union sockaddr_union sock_name;
 	unsigned sock_name_len;
 	union sockaddr_union* dst_su;
98f3d5e2
 	struct tcp_connection* tcpconn;
 	socklen_t su_len;
 	int new_sock;
 	
 	/* got a connection on r */
 	su_len=sizeof(su);
 	new_sock=accept(si->socket, &(su.s), &su_len);
ccb7fda2
 	if (unlikely(new_sock==-1)){
98f3d5e2
 		if ((errno==EAGAIN)||(errno==EWOULDBLOCK))
 			return 0;
 		LOG(L_ERR,  "WARNING: handle_new_connect: error while accepting"
 				" connection(%d): %s\n", errno, strerror(errno));
 		return -1;
 	}
3dc4f620
 	if (unlikely(*tcp_connections_no>=cfg_get(tcp, tcp_cfg, max_connections))){
98f3d5e2
 		LOG(L_ERR, "ERROR: maximum number of connections exceeded: %d/%d\n",
3dc4f620
 					*tcp_connections_no,
 					cfg_get(tcp, tcp_cfg, max_connections));
ab88df95
 		tcp_safe_close(new_sock);
7bb2b4ca
 		TCP_STATS_LOCAL_REJECT();
98f3d5e2
 		return 1; /* success, because the accept was succesfull */
 	}
61f8b970
 	if (unlikely(si->proto==PROTO_TLS)) {
 		if (unlikely(*tls_connections_no>=cfg_get(tcp, tcp_cfg, max_tls_connections))){
 			LM_ERR("maximum number of tls connections exceeded: %d/%d\n",
 					*tls_connections_no,
 					cfg_get(tcp, tcp_cfg, max_tls_connections));
 			tcp_safe_close(new_sock);
 			TCP_STATS_LOCAL_REJECT();
 			return 1; /* success, because the accept was succesfull */
 		}
 	}
ccb7fda2
 	if (unlikely(init_sock_opt_accept(new_sock)<0)){
98f3d5e2
 		LOG(L_ERR, "ERROR: handle_new_connect: init_sock_opt failed\n");
ab88df95
 		tcp_safe_close(new_sock);
98f3d5e2
 		return 1; /* success, because the accept was succesfull */
 	}
8f0e1af3
 	(*tcp_connections_no)++;
61f8b970
 	if (unlikely(si->proto==PROTO_TLS))
 		(*tls_connections_no)++;
66cda7bc
 	/* stats for established connections are incremented after
 	   the first received or sent packet.
 	   Alternatively they could be incremented here for accepted
 	   connections, but then the connection state must be changed to
 	  S_CONN_OK:
 	  TCP_STATS_ESTABLISHED(S_CONN_ACCEPT);
 	  ...
 	  tcpconn=tcpconn_new(new_sock, &su, dst_su, si, si->proto, S_CONN_OK);
 	*/
98f3d5e2
 	
a6357e25
 	dst_su=&si->su;
ccb7fda2
 	if (unlikely(si->flags & SI_IS_ANY)){
a6357e25
 		/* INADDR_ANY => get local dst */
 		sock_name_len=sizeof(sock_name);
 		if (getsockname(new_sock, &sock_name.s, &sock_name_len)!=0){
 			LOG(L_ERR, "ERROR: handle_new_connect:"
 						" getsockname failed: %s(%d)\n",
 						strerror(errno), errno);
 			/* go on with the 0.0.0.0 dst from the sock_info */
 		}else{
 			dst_su=&sock_name;
 		}
 	}
98f3d5e2
 	/* add socket to list */
a6357e25
 	tcpconn=tcpconn_new(new_sock, &su, dst_su, si, si->proto, S_CONN_ACCEPT);
ccb7fda2
 	if (likely(tcpconn)){
6d263c0c
 		tcpconn->flags|=F_CONN_PASSIVE;
746f7674
 #ifdef TCP_PASS_NEW_CONNECTION_ON_DATA
19782e1c
 		atomic_set(&tcpconn->refcnt, 1); /* safe, not yet available to the
 											outside world */
746f7674
 		tcpconn_add(tcpconn);
ccb7fda2
 		/* activate the timer */
 		local_timer_add(&tcp_main_ltimer, &tcpconn->timer, 
3dc4f620
 								cfg_get(tcp, tcp_cfg, con_lifetime),
 								get_ticks_raw());
92c0024c
 		tcpconn->flags|=(F_CONN_MAIN_TIMER|F_CONN_READ_W|F_CONN_WANTS_RD);
a0553f4e
 		if (unlikely(io_watch_add(&io_h, tcpconn->s, POLLIN, 
 													F_TCPCONN, tcpconn)<0)){
ccb7fda2
 			LOG(L_CRIT, "ERROR: tcp_main: handle_new_connect: failed to add"
 						" new socket to the fd list\n");
fc665070
 			tcpconn->flags&=~F_CONN_READ_W;
19782e1c
 			if (tcpconn_try_unhash(tcpconn))
 				tcpconn_put_destroy(tcpconn);
ccb7fda2
 		}
746f7674
 #else
19782e1c
 		atomic_set(&tcpconn->refcnt, 2); /* safe, not yet available to the
be7401cc
 											outside world */
19782e1c
 		/* prepare it for passing to a child */
 		tcpconn->flags|=F_CONN_READER;
98f3d5e2
 		tcpconn_add(tcpconn);
edcdea00
 		DBG("handle_new_connect: new connection from %s: %p %d flags: %04x\n",
 			su2a(&su, sizeof(su)), tcpconn, tcpconn->s, tcpconn->flags);
ccb7fda2
 		if(unlikely(send2child(tcpconn)<0)){
885b9f62
 			tcpconn->flags&=~F_CONN_READER;
a8859f52
 			if (tcpconn_try_unhash(tcpconn))
 				tcpconn_put(tcpconn);
19782e1c
 			tcpconn_put_destroy(tcpconn);
98f3d5e2
 		}
746f7674
 #endif
98f3d5e2
 	}else{ /*tcpconn==0 */
 		LOG(L_ERR, "ERROR: handle_new_connect: tcpconn_new failed, "
 				"closing socket\n");
ab88df95
 		tcp_safe_close(new_sock);
8f0e1af3
 		(*tcp_connections_no)--;
61f8b970
 		if (unlikely(si->proto==PROTO_TLS))
 			(*tls_connections_no)--;
98f3d5e2
 	}
 	return 1; /* accept() was succesfull */
 }
 
 
 
 /* handles an io event on one of the watched tcp connections
  * 
  * params: tcpconn - pointer to the tcp_connection for which we have an io ev.
  *         fd_i    - index in the fd_array table (needed for delete)
  * returns:  handle_* return convention, but on success it always returns 0
885b9f62
  *           (because it's one-shot, after a succesful execution the fd is
98f3d5e2
  *            removed from tcp_main's watch fd list and passed to a child =>
  *            tcp_main is not interested in further io events that might be
  *            queued for this fd)
  */
bf755a37
 inline static int handle_tcpconn_ev(struct tcp_connection* tcpconn, short ev,
885b9f62
 										int fd_i)
98f3d5e2
 {
76cb799e
 #ifdef TCP_ASYNC
885b9f62
 	int empty_q;
7498b4dc
 	int bytes;
76cb799e
 #endif /* TCP_ASYNC */
98f3d5e2
 	/*  is refcnt!=0 really necessary? 
 	 *  No, in fact it's a bug: I can have the following situation: a send only
 	 *   tcp connection used by n processes simultaneously => refcnt = n. In 
 	 *   the same time I can have a read event and this situation is perfectly
 	 *   valid. -- andrei
 	 */
 #if 0
 	if ((tcpconn->refcnt!=0)){
 		/* FIXME: might be valid for sigio_rt iff fd flags are not cleared
 		 *        (there is a short window in which it could generate a sig
 		 *         that would be catched by tcp_main) */
 		LOG(L_CRIT, "BUG: handle_tcpconn_ev: io event on referenced"
 					" tcpconn (%p), refcnt=%d, fd=%d\n",
 					tcpconn, tcpconn->refcnt, tcpconn->s);
 		return -1;
 	}
 #endif
ccb7fda2
 	/* pass it to child, so remove it from the io watch list  and the local
 	 *  timer */
76cb799e
 #ifdef TCP_ASYNC
fcc935bb
 	empty_q=0; /* warning fix */
7498b4dc
 	if (unlikely((ev & (POLLOUT|POLLERR|POLLHUP)) &&
 					(tcpconn->flags & F_CONN_WRITE_W))){
340ce466
 		if (unlikely((ev & (POLLERR|POLLHUP)) ||
 					(wbufq_run(tcpconn->s, tcpconn, &empty_q)<0) ||
 					(empty_q && tcpconn_close_after_send(tcpconn))
 			)){
fc665070
 			if ((tcpconn->flags & F_CONN_READ_W) && (ev & POLLIN)){
7498b4dc
 				/* connection is watched for read and there is a read event
 				 * (unfortunately if we have POLLIN here we don't know if 
 				 * there's really any data in the read buffer or the POLLIN
 				 * was generated by the error or EOF => to avoid loosing
 				 *  data it's safer to either directly check the read buffer 
92c0024c
 				 *  or try a read)*/
7498b4dc
 				/* in most cases the read buffer will be empty, so in general
 				 * is cheaper to check it here and then send the 
 				 * conn.  to a a child only if needed (another syscall + at 
 				 * least 2 * syscalls in the reader + ...) */
 				if ((ioctl(tcpconn->s, FIONREAD, &bytes)>=0) && (bytes>0)){
504ef98e
 					if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)<0)){
 						LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_del(1)"
 								" failed: for %p, fd %d\n",
 								tcpconn, tcpconn->s);
 					}
92c0024c
 					tcpconn->flags&=~(F_CONN_WRITE_W|F_CONN_READ_W|
 										F_CONN_WANTS_RD|F_CONN_WANTS_WR);
7498b4dc
 					tcpconn->flags|=F_CONN_FORCE_EOF|F_CONN_WR_ERROR;
 					goto send_to_child;
 				}
 				/* if bytes==0 or ioctl failed, destroy the connection now */
 			}
504ef98e
 			if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i,
 											IO_FD_CLOSING) < 0)){
 				LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_del() failed:"
 							" for %p, fd %d\n", tcpconn, tcpconn->s);
 			}
92c0024c
 			tcpconn->flags&=~(F_CONN_WRITE_W|F_CONN_READ_W|
 								F_CONN_WANTS_RD|F_CONN_WANTS_WR);
7bb2b4ca
 			if (unlikely(ev & POLLERR)){
71eae780
 				if (unlikely(tcpconn->state==S_CONN_CONNECT)){
fb751cbf
 #ifdef USE_DST_BLACKLIST
a6c250c0
 					(void)dst_blacklist_su(BLST_ERR_CONNECT, tcpconn->rcv.proto,
5d6752dc
 										&tcpconn->rcv.src_su,
 										&tcpconn->send_flags, 0);
fb751cbf
 #endif /* USE_DST_BLACKLIST */
7bb2b4ca
 					TCP_EV_CONNECT_ERR(0, TCP_LADDR(tcpconn),
 										TCP_LPORT(tcpconn), TCP_PSU(tcpconn),
 										TCP_PROTO(tcpconn));
 					TCP_STATS_CONNECT_FAILED();
 				}else{
fb751cbf
 #ifdef USE_DST_BLACKLIST
a6c250c0
 					(void)dst_blacklist_su(BLST_ERR_SEND, tcpconn->rcv.proto,
5d6752dc
 										&tcpconn->rcv.src_su,
 										&tcpconn->send_flags, 0);
fb751cbf
 #endif /* USE_DST_BLACKLIST */
7bb2b4ca
 					TCP_STATS_CON_RESET(); /* FIXME: it could != RST */
 				}
 			}
8984eb5e
 			if (unlikely(!tcpconn_try_unhash(tcpconn))){
 				LOG(L_CRIT, "BUG: tcpconn_ev: unhashed connection %p\n",
 							tcpconn);
 			}
19782e1c
 			tcpconn_put_destroy(tcpconn);
885b9f62
 			goto error;
 		}
 		if (empty_q){
92c0024c
 			tcpconn->flags&=~F_CONN_WANTS_WR;
fc665070
 			if (!(tcpconn->flags & F_CONN_READ_W)){
7498b4dc
 				if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)==-1)){
 					LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_del(2)"
 								" failed:" " for %p, fd %d\n",
 								tcpconn, tcpconn->s);
885b9f62
 					goto error;
7498b4dc
 				}
885b9f62
 			}else{
 				if (unlikely(io_watch_chg(&io_h, tcpconn->s,
7498b4dc
 											POLLIN, fd_i)==-1)){
 					LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_chg(1)"
 								" failed:" " for %p, fd %d\n",
 								tcpconn, tcpconn->s);
885b9f62
 					goto error;
7498b4dc
 				}
885b9f62
 			}
8b0472d7
 			tcpconn->flags&=~F_CONN_WRITE_W;
885b9f62
 		}
503bc281
 		ev&=~POLLOUT; /* clear POLLOUT */
885b9f62
 	}
fc665070
 	if (likely(ev && (tcpconn->flags & F_CONN_READ_W))){
503bc281
 		/* if still some other IO event (POLLIN|POLLHUP|POLLERR) and
 		 * connection is still watched in tcp_main for reads, send it to a
 		 * child and stop watching it for input (but continue watching for
 		 *  writes if needed): */
885b9f62
 		if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
7498b4dc
 			if (unlikely(io_watch_chg(&io_h, tcpconn->s, POLLOUT, fd_i)==-1)){
 				LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_chg(2)"
 							" failed:" " for %p, fd %d\n",
 							tcpconn, tcpconn->s);
885b9f62
 				goto error;
7498b4dc
 			}
885b9f62
 		}else
 #else
 	{
76cb799e
 #endif /* TCP_ASYNC */
7498b4dc
 			if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)==-1)){
 				LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_del(3)"
 							" failed:" " for %p, fd %d\n",
 							tcpconn, tcpconn->s);
885b9f62
 				goto error;
7498b4dc
 			}
76cb799e
 #ifdef TCP_ASYNC
7498b4dc
 send_to_child:
 #endif
7f1b8eb8
 		DBG("tcp: DBG: sending to child, events %x\n", ev);
7498b4dc
 #ifdef POLLRDHUP
7f1b8eb8
 		tcpconn->flags|=((int)!(ev & (POLLRDHUP|POLLHUP|POLLERR)) -1) &
7498b4dc
 							F_CONN_EOF_SEEN;
 #else /* POLLRDHUP */
 		tcpconn->flags|=((int)!(ev & (POLLHUP|POLLERR)) -1) & F_CONN_EOF_SEEN;
 #endif /* POLLRDHUP */
 		tcpconn->flags|= ((int)!(ev & POLLPRI) -1)  & F_CONN_OOB_DATA;
fc665070
 		tcpconn->flags|=F_CONN_READER;
885b9f62
 		local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
92c0024c
 		tcpconn->flags&=~(F_CONN_MAIN_TIMER|F_CONN_READ_W|F_CONN_WANTS_RD);
885b9f62
 		tcpconn_ref(tcpconn); /* refcnt ++ */
 		if (unlikely(send2child(tcpconn)<0)){
 			tcpconn->flags&=~F_CONN_READER;
76cb799e
 #ifdef TCP_ASYNC
885b9f62
 			if (tcpconn->flags & F_CONN_WRITE_W){
504ef98e
 				if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i,
 														IO_FD_CLOSING) < 0)){
7498b4dc
 					LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_del(4)"
 							" failed:" " for %p, fd %d\n",
 							tcpconn, tcpconn->s);
 				}
885b9f62
 				tcpconn->flags&=~F_CONN_WRITE_W;
 			}
76cb799e
 #endif /* TCP_ASYNC */
a8859f52
 			if (tcpconn_try_unhash(tcpconn))
 				tcpconn_put(tcpconn);
19782e1c
 			tcpconn_put_destroy(tcpconn); /* because of the tcpconn_ref() */
885b9f62
 		}
98f3d5e2
 	}
 	return 0; /* we are not interested in possibly queued io events, 
885b9f62
 				 the fd was either passed to a child, closed, or for writes,
 				 everything possible was already written */
98f3d5e2
 error:
 	return -1;
 }
 
 
 
0ba367ec
 /* generic handle io routine, it will call the appropiate
  *  handle_xxx() based on the fd_map type
  *
  * params:  fm  - pointer to a fd hash entry
  *          idx - index in the fd_array (or -1 if not known)
  * return: -1 on error
  *          0 on EAGAIN or when by some other way it is known that no more 
  *            io events are queued on the fd (the receive buffer is empty).
  *            Usefull to detect when there are no more io events queued for
  *            sigio_rt, epoll_et, kqueue.
  *         >0 on successfull read from the fd (when there might be more io
  *            queued -- the receive buffer might still be non-empty)
  */
885b9f62
 inline static int handle_io(struct fd_map* fm, short ev, int idx)
0ba367ec
 {	
 	int ret;
9188021a
 
 	/* update the local config */
 	cfg_update();
0ba367ec
 	
 	switch(fm->type){
 		case F_SOCKINFO:
 			ret=handle_new_connect((struct socket_info*)fm->data);
 			break;
 		case F_TCPCONN:
885b9f62
 			ret=handle_tcpconn_ev((struct tcp_connection*)fm->data, ev, idx);
0ba367ec
 			break;
 		case F_TCPCHILD:
 			ret=handle_tcp_child((struct tcp_child*)fm->data, idx);
 			break;
 		case F_PROC:
 			ret=handle_ser_child((struct process_table*)fm->data, idx);
 			break;
 		case F_NONE:
895cb2af
 			LOG(L_CRIT, "BUG: handle_io: empty fd map: %p {%d, %d, %p},"
 						" idx %d\n", fm, fm->fd, fm->type, fm->data, idx);
0ba367ec
 			goto error;
 		default:
bf755a37
 			LOG(L_CRIT, "BUG: handle_io: unknown fd type %d\n", fm->type); 
0ba367ec
 			goto error;
 	}
 	return ret;
 error:
 	return -1;
 }
 
 
 
ccb7fda2
 /* timer handler for tcpconnection handled by tcp_main */
 static ticks_t tcpconn_main_timeout(ticks_t t, struct timer_ln* tl, void* data)
 {
 	struct tcp_connection *c;
 	int fd;
22db42e4
 	int tcp_async;
ccb7fda2
 	
 	c=(struct tcp_connection*)data; 
 	/* or (struct tcp...*)(tl-offset(c->timer)) */
 	
76cb799e
 #ifdef TCP_ASYNC
20863813
 	DBG( "tcp_main: entering timer for %p (ticks=%d, timeout=%d (%d s), "
 			"wr_timeout=%d (%d s)), write queue: %d bytes\n",
 			c, t, c->timeout, TICKS_TO_S(c->timeout-t),
 			c->wbuf_q.wr_timeout, TICKS_TO_S(c->wbuf_q.wr_timeout-t),
 			c->wbuf_q.queued);
 	
76cb799e
 	tcp_async=cfg_get(tcp, tcp_cfg, async);
22db42e4
 	if (likely(TICKS_LT(t, c->timeout) && ( !tcp_async | _wbufq_empty(c) |
 					TICKS_LT(t, c->wbuf_q.wr_timeout)) )){
 		if (unlikely(tcp_async && _wbufq_non_empty(c)))
20863813
 			return (ticks_t)MIN_unsigned(c->timeout-t, c->wbuf_q.wr_timeout-t);
 		else
 			return (ticks_t)(c->timeout - t);
 	}
38429f23
 	/* if time out due to write, add it to the blacklist */
7bb2b4ca
 	if (tcp_async && _wbufq_non_empty(c) && TICKS_GE(t, c->wbuf_q.wr_timeout)){
 		if (unlikely(c->state==S_CONN_CONNECT)){
 #ifdef USE_DST_BLACKLIST
a6c250c0
 			(void)dst_blacklist_su(BLST_ERR_CONNECT, c->rcv.proto, &c->rcv.src_su,
5d6752dc
 								&c->send_flags, 0);
7bb2b4ca
 #endif /* USE_DST_BLACKLIST */
 			TCP_EV_CONNECT_TIMEOUT(0, TCP_LADDR(c), TCP_LPORT(c), TCP_PSU(c),
 									TCP_PROTO(c));
 			TCP_STATS_CONNECT_FAILED();
 		}else{
 #ifdef USE_DST_BLACKLIST
a6c250c0
 			(void)dst_blacklist_su(BLST_ERR_SEND, c->rcv.proto, &c->rcv.src_su,
5d6752dc
 								&c->send_flags, 0);
38429f23
 #endif /* USE_DST_BLACKLIST */
7bb2b4ca
 			TCP_EV_SEND_TIMEOUT(0, &c->rcv);
 			TCP_STATS_SEND_TIMEOUT();
 		}
299ca7ce
 	}else{
 		/* idle timeout */
 		TCP_EV_IDLE_CONN_CLOSED(0, &c->rcv);
 		TCP_STATS_CON_TIMEOUT();
7bb2b4ca
 	}
76cb799e
 #else /* ! TCP_ASYNC */
ccb7fda2
 	if (TICKS_LT(t, c->timeout)){
 		/* timeout extended, exit */
 		return (ticks_t)(c->timeout - t);
 	}
299ca7ce
 	/* idle timeout */
 	TCP_EV_IDLE_CONN_CLOSED(0, &c->rcv);
 	TCP_STATS_CON_TIMEOUT();
76cb799e
 #endif /* TCP_ASYNC */
20863813
 	DBG("tcp_main: timeout for %p\n", c);
19782e1c
 	if (likely(c->flags & F_CONN_HASHED)){
55cdf20a
 		c->flags&=~(F_CONN_HASHED|F_CONN_MAIN_TIMER);
19782e1c
 		c->state=S_CONN_BAD;
ccb7fda2
 		TCPCONN_LOCK;
19782e1c
 			_tcpconn_detach(c);
 		TCPCONN_UNLOCK;
 	}else{
55cdf20a
 		c->flags&=~F_CONN_MAIN_TIMER;
19782e1c
 		LOG(L_CRIT, "BUG: tcp_main: timer: called with unhashed connection %p"
 				"\n", c);
 		tcpconn_ref(c); /* ugly hack to try to go on */
 	}
 	fd=c->s;
 	if (likely(fd>0)){
fc665070
 		if (likely(c->flags & (F_CONN_READ_W|F_CONN_WRITE_W))){
19782e1c
 			io_watch_del(&io_h, fd, -1, IO_FD_CLOSING);
fc665070
 			c->flags&=~(F_CONN_READ_W|F_CONN_WRITE_W);
19782e1c
 		}
ccb7fda2
 	}
19782e1c
 	tcpconn_put_destroy(c);
 	return 0;
ccb7fda2
 }
 
 
 
b264d2c6
 static inline void tcp_timer_run(void)
ccb7fda2
 {
 	ticks_t ticks;
 	
 	ticks=get_ticks_raw();
057063e6
 	if (unlikely((ticks-tcp_main_prev_ticks)<TCPCONN_TIMEOUT_MIN_RUN)) return;
 	tcp_main_prev_ticks=ticks;
ccb7fda2
 	local_timer_run(&tcp_main_ltimer, ticks);
 }
 
 
 
 /* keep in sync with tcpconn_destroy, the "delete" part should be
d8b11bbc
  * the same except for io_watch_del..
ccb7fda2
  * Note: this function is called only on shutdown by the main ser process via
  * cleanup(). However it's also safe to call it from the tcp_main process.
  * => with the ser shutdown exception, it cannot execute in parallel
d8b11bbc
  * with tcpconn_add() or tcpconn_destroy()*/
b264d2c6
 static inline void tcpconn_destroy_all(void)
0ba367ec
 {
 	struct tcp_connection *c, *next;
 	unsigned h;
 	int fd;
 	
 	
ccb7fda2
 	TCPCONN_LOCK; 
0ba367ec
 	for(h=0; h<TCP_ID_HASH_SIZE; h++){
 		c=tcpconn_id_hash[h];
 		while(c){
 			next=c->id_next;
ccb7fda2
 				if (is_tcp_main){
d8b11bbc
 					/* we cannot close or remove the fd if we are not in the
 					 * tcp main proc.*/
d22b82a0
 					if ((c->flags & F_CONN_MAIN_TIMER)){
ccb7fda2
 						local_timer_del(&tcp_main_ltimer, &c->timer);
d22b82a0
 						c->flags&=~F_CONN_MAIN_TIMER;
 					} /* else still in some reader */
d8b11bbc
 					fd=c->s;
fc665070
 					if (fd>0 && (c->flags & (F_CONN_READ_W|F_CONN_WRITE_W))){
d8b11bbc
 						io_watch_del(&io_h, fd, -1, IO_FD_CLOSING);
fc665070
 						c->flags&=~(F_CONN_READ_W|F_CONN_WRITE_W);
d8b11bbc
 					}
 				}else{
 					fd=-1;
 				}
0ba367ec
 #ifdef USE_TLS
3d4a77d8
 				if (fd>0 && (c->type==PROTO_TLS || c->type==PROTO_WSS))
0ba367ec
 					tls_close(c, fd);
 #endif
 				_tcpconn_rm(c);
42d1a155
 				c->flags &= ~F_CONN_HASHED;
d8b11bbc
 				if (fd>0) {
5c5cd736
 #ifdef TCP_FD_CACHE
22db42e4
 					if (likely(cfg_get(tcp, tcp_cfg, fd_cache)))
 						shutdown(fd, SHUT_RDWR);
5c5cd736
 #endif /* TCP_FD_CACHE */
ab88df95
 					tcp_safe_close(fd);
0ba367ec
 				}
d9515405
 				(*tcp_connections_no)--;
3d4a77d8
 				if (unlikely(c->type==PROTO_TLS || c->type==PROTO_WSS))
61f8b970
 					(*tls_connections_no)--;
0ba367ec
 			c=next;
 		}
 	}
 	TCPCONN_UNLOCK;
 }
 
5b532c7f
 
0ba367ec
 
 /* tcp main loop */
 void tcp_main_loop()
 {
 
 	struct socket_info* si;
 	int r;
 	
d8b11bbc
 	is_tcp_main=1; /* mark this process as tcp main */
 	
3e999281
 	tcp_main_max_fd_no=get_max_open_fds();
746f7674
 	/* init send fd queues (here because we want mem. alloc only in the tcp
 	 *  process */
 #ifdef SEND_FD_QUEUE
 	if (init_send_fd_queues()<0){
 		LOG(L_CRIT, "ERROR: init_tcp: could not init send fd queues\n");
 		goto error;
 	}
 #endif
0ba367ec
 	/* init io_wait (here because we want the memory allocated only in
 	 * the tcp_main process) */
3e999281
 	if  (init_io_wait(&io_h, tcp_main_max_fd_no, tcp_poll_method)<0)
0ba367ec
 		goto error;
 	/* init: start watching all the fds*/
 	
5c5cd736
 	/* init local timer */
057063e6
 	tcp_main_prev_ticks=get_ticks_raw();
5c5cd736
 	if (init_local_timer(&tcp_main_ltimer, get_ticks_raw())!=0){
 		LOG(L_ERR, "ERROR: init_tcp: failed to init local timer\n");
 		goto error;
 	}
 #ifdef TCP_FD_CACHE
22db42e4
 	if (cfg_get(tcp, tcp_cfg, fd_cache)) tcp_fd_cache_init();
5c5cd736
 #endif /* TCP_FD_CACHE */
 	
d8b11bbc
 	/* add all the sockets we listen on for connections */
9f4c52ce
 	for (si=tcp_listen; si; si=si->next){
 		if ((si->proto==PROTO_TCP) &&(si->socket!=-1)){
a0553f4e
 			if (io_watch_add(&io_h, si->socket, POLLIN, F_SOCKINFO, si)<0){
0ba367ec
 				LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
 							"listen socket to the fd list\n");
 				goto error;
 			}
9f4c52ce
 		}else{
 			LOG(L_CRIT, "BUG: tcp_main_loop: non tcp address in tcp_listen\n");
5b532c7f
 		}
9f4c52ce
 	}
f6e50f08
 #ifdef USE_TLS
6c53d41a
 	if (!tls_disable && tls_loaded()){
9f4c52ce
 		for (si=tls_listen; si; si=si->next){
 			if ((si->proto==PROTO_TLS) && (si->socket!=-1)){
a0553f4e
 				if (io_watch_add(&io_h, si->socket, POLLIN, F_SOCKINFO, si)<0){
0ba367ec
 					LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
 							"tls listen socket to the fd list\n");
 					goto error;
 				}
9f4c52ce
 			}else{
faa66933
 				LOG(L_CRIT, "BUG: tcp_main_loop: non tls address"
 						" in tls_listen\n");
 			}
f6e50f08
 		}
1d9e67a8
 	}
9f4c52ce
 #endif
0ba367ec
 	/* add all the unix sockets used for communcation with other ser processes
 	 *  (get fd, new connection a.s.o) */
f2f969dd
 	for (r=1; r<process_no; r++){
0ba367ec
 		if (pt[r].unix_sock>0) /* we can't have 0, we never close it!*/
a0553f4e
 			if (io_watch_add(&io_h, pt[r].unix_sock, POLLIN,F_PROC, &pt[r])<0){
0ba367ec
 					LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
 							"process %d unix socket to the fd list\n", r);
 					goto error;
 			}
5b532c7f
 	}
0ba367ec
 	/* add all the unix sokets used for communication with the tcp childs */
06aaa54f
 	for (r=0; r<tcp_children_no; r++){
0ba367ec
 		if (tcp_children[r].unix_sock>0)/*we can't have 0, we never close it!*/
a0553f4e
 			if (io_watch_add(&io_h, tcp_children[r].unix_sock, POLLIN,
 									F_TCPCHILD, &tcp_children[r]) <0){
0ba367ec
 				LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
 						"tcp child %d unix socket to the fd list\n", r);
 				goto error;
 			}
06aaa54f
 	}
9188021a
 
 
 	/* initialize the cfg framework */
 	if (cfg_child_init()) goto error;
 
0ba367ec
 	/* main loop */
 	switch(io_h.poll_method){
 		case POLL_POLL:
 			while(1){
 				/* wait and process IO */
 				io_wait_loop_poll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0); 
98f3d5e2
 				send_fd_queue_run(&send2child_q); /* then new io */
0ba367ec
 				/* remove old connections */
ccb7fda2
 				tcp_timer_run();
0ba367ec
 			}
 			break;
 #ifdef HAVE_SELECT
 		case POLL_SELECT:
 			while(1){
 				io_wait_loop_select(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
98f3d5e2
 				send_fd_queue_run(&send2child_q); /* then new io */
ccb7fda2
 				tcp_timer_run();
0ba367ec
 			}
 			break;
f6e50f08
 #endif
0ba367ec
 #ifdef HAVE_SIGIO_RT
 		case POLL_SIGIO_RT:
 			while(1){
 				io_wait_loop_sigio_rt(&io_h, TCP_MAIN_SELECT_TIMEOUT);
98f3d5e2
 				send_fd_queue_run(&send2child_q); /* then new io */
ccb7fda2
 				tcp_timer_run();
5b532c7f
 			}
0ba367ec
 			break;
 #endif
 #ifdef HAVE_EPOLL
 		case POLL_EPOLL_LT:
 			while(1){
 				io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
98f3d5e2
 				send_fd_queue_run(&send2child_q); /* then new io */
ccb7fda2
 				tcp_timer_run();
06aaa54f
 			}
0ba367ec
 			break;
 		case POLL_EPOLL_ET:
 			while(1){
 				io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 1);
98f3d5e2
 				send_fd_queue_run(&send2child_q); /* then new io */
ccb7fda2
 				tcp_timer_run();
5b532c7f
 			}
0ba367ec
 			break;
 #endif
741a9937
 #ifdef HAVE_KQUEUE
 		case POLL_KQUEUE:
 			while(1){
 				io_wait_loop_kqueue(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
98f3d5e2
 				send_fd_queue_run(&send2child_q); /* then new io */
ccb7fda2
 				tcp_timer_run();
741a9937
 			}
 			break;
 #endif
9eda5956
 #ifdef HAVE_DEVPOLL
 		case POLL_DEVPOLL:
 			while(1){
 				io_wait_loop_devpoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
98f3d5e2
 				send_fd_queue_run(&send2child_q); /* then new io */
ccb7fda2
 				tcp_timer_run();
9eda5956
 			}
 			break;
 #endif
0ba367ec
 		default:
 			LOG(L_CRIT, "BUG: tcp_main_loop: no support for poll method "
 					" %s (%d)\n", 
 					poll_method_name(io_h.poll_method), io_h.poll_method);
 			goto error;
5b532c7f
 	}
0ba367ec
 error:
746f7674
 #ifdef SEND_FD_QUEUE
 	destroy_send_fd_queues();
 #endif
0ba367ec
 	destroy_io_wait(&io_h);
 	LOG(L_CRIT, "ERROR: tcp_main_loop: exiting...");
 	exit(-1);
 }
 
 
 
 /* cleanup before exit */
 void destroy_tcp()
 {
 		if (tcpconn_id_hash){
c2518f54
 			if (tcpconn_lock)
 				TCPCONN_UNLOCK; /* hack: force-unlock the tcp lock in case
 								   some process was terminated while holding 
 								   it; this will allow an almost gracious 
 								   shutdown */
ccb7fda2
 			tcpconn_destroy_all(); 
0ba367ec
 			shm_free(tcpconn_id_hash);
 			tcpconn_id_hash=0;
 		}
0dc023ec
 		DESTROY_TCP_STATS();
d9515405
 		if (tcp_connections_no){
 			shm_free(tcp_connections_no);
 			tcp_connections_no=0;
 		}
61f8b970
 		if (tls_connections_no){
 			shm_free(tls_connections_no);
 			tls_connections_no=0;
 		}
76cb799e
 #ifdef TCP_ASYNC
885b9f62
 		if (tcp_total_wq){
 			shm_free(tcp_total_wq);
 			tcp_total_wq=0;
 		}
76cb799e
 #endif /* TCP_ASYNC */
0ba367ec
 		if (connection_id){
 			shm_free(connection_id);
 			connection_id=0;
 		}
 		if (tcpconn_aliases_hash){
 			shm_free(tcpconn_aliases_hash);
 			tcpconn_aliases_hash=0;
 		}
 		if (tcpconn_lock){
 			lock_destroy(tcpconn_lock);
 			lock_dealloc((void*)tcpconn_lock);
 			tcpconn_lock=0;
 		}
746f7674
 		if (tcp_children){
 			pkg_free(tcp_children);
 			tcp_children=0;
 		}
ccb7fda2
 		destroy_local_timer(&tcp_main_ltimer);
5b532c7f
 }
 
 
 
0c5da34b
 int init_tcp()
 {
55d8155e
 	char* poll_err;
 	
20c64cc6
 	tcp_options_check();
22db42e4
 	if (tcp_cfg==0){
 		BUG("tcp_cfg not initialized\n");
 		goto error;
 	}
8aeb47e2
 	/* init lock */
 	tcpconn_lock=lock_alloc();
 	if (tcpconn_lock==0){
 		LOG(L_CRIT, "ERROR: init_tcp: could not alloc lock\n");
 		goto error;
 	}
 	if (lock_init(tcpconn_lock)==0){
 		LOG(L_CRIT, "ERROR: init_tcp: could not init lock\n");
 		lock_dealloc((void*)tcpconn_lock);
 		tcpconn_lock=0;
 		goto error;
 	}
2719e69b
 	/* init globals */
d9515405
 	tcp_connections_no=shm_malloc(sizeof(int));
 	if (tcp_connections_no==0){
 		LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
 		goto error;
 	}
 	*tcp_connections_no=0;
61f8b970
 	tls_connections_no=shm_malloc(sizeof(int));
 	if (tls_connections_no==0){
 		LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
 		goto error;
 	}
 	*tls_connections_no=0;
0dc023ec
 	if (INIT_TCP_STATS()!=0) goto error;
d9515405
 	connection_id=shm_malloc(sizeof(int));
2719e69b
 	if (connection_id==0){
 		LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
 		goto error;
 	}
 	*connection_id=1;
76cb799e
 #ifdef TCP_ASYNC
885b9f62
 	tcp_total_wq=shm_malloc(sizeof(*tcp_total_wq));
 	if (tcp_total_wq==0){
 		LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
 		goto error;
 	}
76cb799e
 #endif /* TCP_ASYNC */
8aeb47e2
 	/* alloc hashtables*/
59653eb8
 	tcpconn_aliases_hash=(struct tcp_conn_alias**)
 			shm_malloc(TCP_ALIAS_HASH_SIZE* sizeof(struct tcp_conn_alias*));
 	if (tcpconn_aliases_hash==0){
8aeb47e2
 		LOG(L_CRIT, "ERROR: init_tcp: could not alloc address hashtable\n");
 		goto error;
 	}
 	tcpconn_id_hash=(struct tcp_connection**)shm_malloc(TCP_ID_HASH_SIZE*
 								sizeof(struct tcp_connection*));
 	if (tcpconn_id_hash==0){
 		LOG(L_CRIT, "ERROR: init_tcp: could not alloc id hashtable\n");
0c5da34b
 		goto error;
 	}
8aeb47e2
 	/* init hashtables*/
59653eb8
 	memset((void*)tcpconn_aliases_hash, 0, 
 			TCP_ALIAS_HASH_SIZE * sizeof(struct tcp_conn_alias*));
8aeb47e2
 	memset((void*)tcpconn_id_hash, 0, 
 			TCP_ID_HASH_SIZE * sizeof(struct tcp_connection*));
0ba367ec
 	
55d8155e
 	/* fix config variables */
3dc4f620
 	poll_err=check_poll_method(tcp_poll_method);
55d8155e
 	
d8b11bbc
 	/* set an appropriate poll method */
55d8155e
 	if (poll_err || (tcp_poll_method==0)){
 		tcp_poll_method=choose_poll_method();
 		if (poll_err){
 			LOG(L_ERR, "ERROR: init_tcp: %s, using %s instead\n",
 					poll_err, poll_method_name(tcp_poll_method));
 		}else{
 			LOG(L_INFO, "init_tcp: using %s as the io watch method"
 					" (auto detected)\n", poll_method_name(tcp_poll_method));
 		}
 	}else{
 			LOG(L_INFO, "init_tcp: using %s io watch method (config)\n",
 					poll_method_name(tcp_poll_method));
 	}
0ba367ec
 	
0c5da34b
 	return 0;
 error:
0ba367ec
 	/* clean-up */
 	destroy_tcp();
 	return -1;
8aeb47e2
 }
 
 
37209e14
 #ifdef TCP_CHILD_NON_BLOCKING
28260509
 /* returns -1 on error */
 static int set_non_blocking(int s)
 {
 	int flags;
 	/* non-blocking */
 	flags=fcntl(s, F_GETFL);
 	if (flags==-1){
 		LOG(L_ERR, "ERROR: set_non_blocking: fnctl failed: (%d) %s\n",
 				errno, strerror(errno));
 		goto error;
 	}
 	if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
 		LOG(L_ERR, "ERROR: set_non_blocking: fcntl: set non-blocking failed:"
 				" (%d) %s\n", errno, strerror(errno));
 		goto error;
 	}
 	return 0;
 error:
 	return -1;
 }
 
37209e14
 #endif
 
 
 /*  returns -1 on error, 0 on success */
 int tcp_fix_child_sockets(int* fd)
 {
 #ifdef TCP_CHILD_NON_BLOCKING
 	if ((set_non_blocking(fd[0])<0) ||
 		(set_non_blocking(fd[1])<0)){
 		return -1;
 	}
 #endif
 	return 0;
 }
 
28260509
 
 
5b532c7f
 /* starts the tcp processes */
6ee62314
 int tcp_init_children()
5b532c7f
 {
edf5e385
 	int r, i;
37209e14
 	int reader_fd_1; /* for comm. with the tcp children read  */
5b532c7f
 	pid_t pid;
edf5e385
 	char si_desc[MAX_PT_DESC];
55d8155e
 	struct socket_info *si;
 	
 	/* estimate max fd. no:
 	 * 1 tcp send unix socket/all_proc, 
 	 *  + 1 udp sock/udp proc + 1 tcp_child sock/tcp child*
 	 *  + no_listen_tcp */
 	for(r=0, si=tcp_listen; si; si=si->next, r++);
 #ifdef USE_TLS
 	if (! tls_disable)
d3ab3df2
 		for (si=tls_listen; si; si=si->next, r++);
55d8155e
 #endif
5b532c7f
 	
3e999281
 	register_fds(r+tcp_max_connections+get_max_procs()-1 /* tcp main */);
 #if 0
37209e14
 	tcp_max_fd_no=get_max_procs()*2 +r-1 /* timer */ +3; /* stdin/out/err*/
 	/* max connections can be temporarily exceeded with estimated_process_count
d9515405
 	 * - tcp_main (tcpconn_connect called simultaneously in all all the 
 	 *  processes) */
37209e14
 	tcp_max_fd_no+=tcp_max_connections+get_max_procs()-1 /* tcp main */;
3e999281
 #endif
746f7674
 	/* alloc the children array */
 	tcp_children=pkg_malloc(sizeof(struct tcp_child)*tcp_children_no);
 	if (tcp_children==0){
 			LOG(L_ERR, "ERROR: tcp_init_children: out of memory\n");
 			goto error;
 	}
edf5e385
 	memset(tcp_children, 0, sizeof(struct tcp_child)*tcp_children_no);
 	/* assign own socket for tcp workers, if it is the case
 	 * - add them from end to start of tcp children array
 	 * - thus, have generic tcp workers at beginning */
 	i = tcp_children_no-1;
 	for(si=tcp_listen; si; si=si->next) {
 		if(si->workers>0) {
 			si->workers_tcpidx = i - si->workers + 1;
 			for(r=0; r<si->workers; r++) {
 				tcp_children[i].mysocket = si;
 				i--;
 			}
 		}
 	}
 	tcp_sockets_gworkers = (i != tcp_children_no-1)?(1 + i + 1):0;
 
5b532c7f
 	/* create the tcp sock_info structures */
f2f969dd
 	/* copy the sockets --moved to main_loop*/
5b532c7f
 	
 	/* fork children & create the socket pairs*/
 	for(r=0; r<tcp_children_no; r++){
3167c744
 		child_rank++;
edf5e385
 		snprintf(si_desc, MAX_PT_DESC, "tcp receiver (%s)",
 				(tcp_children[r].mysocket!=NULL)?
 					tcp_children[r].mysocket->sock_str.s:"generic");
 		pid=fork_tcp_process(child_rank, si_desc, r, &reader_fd_1);
5b532c7f
 		if (pid<0){
 			LOG(L_ERR, "ERROR: tcp_main: fork failed: %s\n",
 					strerror(errno));
 			goto error;
 		}else if (pid>0){
 			/* parent */
 		}else{
 			/* child */
7ec958f3
 			bind_address=0; /* force a SEGFAULT if someone uses a non-init.
 							   bind address on tcp */
37209e14
 			tcp_receive_loop(reader_fd_1);
5b532c7f
 		}
 	}
6ee62314
 	return 0;
5b532c7f
 error:
 	return -1;
 }
 
d9515405
 
 
 void tcp_get_info(struct tcp_gen_info *ti)
 {
 	ti->tcp_readers=tcp_children_no;
 	ti->tcp_max_connections=tcp_max_connections;
61f8b970
 	ti->tls_max_connections=tls_max_connections;
d9515405
 	ti->tcp_connections_no=*tcp_connections_no;
61f8b970
 	ti->tls_connections_no=*tls_connections_no;
76cb799e
 #ifdef TCP_ASYNC
885b9f62
 	ti->tcp_write_queued=*tcp_total_wq;
 #else
 	ti->tcp_write_queued=0;
76cb799e
 #endif /* TCP_ASYNC */
d9515405
 }
 
5b532c7f
 #endif