From 3f7a87d2fa9b42f7aade43914f060df68cc89cc7 Mon Sep 17 00:00:00 2001 From: Frank Filz Date: Mon, 20 Jun 2005 13:14:57 -0700 Subject: [SCTP] sctp_connectx() API support Implements sctp_connectx() as defined in the SCTP sockets API draft by tunneling the request through a setsockopt(). Signed-off-by: Frank Filz Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/sctp/command.h | 8 +- include/net/sctp/constants.h | 7 - include/net/sctp/sctp.h | 17 ++ include/net/sctp/sm.h | 8 +- include/net/sctp/structs.h | 41 ++++- include/net/sctp/user.h | 3 + net/sctp/associola.c | 151 ++++++++++++---- net/sctp/endpointola.c | 1 - net/sctp/input.c | 2 +- net/sctp/outqueue.c | 11 +- net/sctp/sm_make_chunk.c | 20 ++- net/sctp/sm_sideeffect.c | 105 ++++++++--- net/sctp/sm_statefuns.c | 148 ++++++++++------ net/sctp/sm_statetable.c | 6 +- net/sctp/socket.c | 405 ++++++++++++++++++++++++++++++------------- net/sctp/transport.c | 4 +- 16 files changed, 677 insertions(+), 260 deletions(-) diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index ebc5282e6d58..dc107ffad483 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -65,9 +65,11 @@ typedef enum { SCTP_CMD_TIMER_START, /* Start a timer. */ SCTP_CMD_TIMER_RESTART, /* Restart a timer. */ SCTP_CMD_TIMER_STOP, /* Stop a timer. */ - SCTP_CMD_COUNTER_RESET, /* Reset a counter. */ - SCTP_CMD_COUNTER_INC, /* Increment a counter. */ + SCTP_CMD_INIT_CHOOSE_TRANSPORT, /* Choose transport for an INIT. */ + SCTP_CMD_INIT_COUNTER_RESET, /* Reset init counter. */ + SCTP_CMD_INIT_COUNTER_INC, /* Increment init counter. */ SCTP_CMD_INIT_RESTART, /* High level, do init timer work. */ + SCTP_CMD_COOKIEECHO_RESTART, /* High level, do cookie-echo timer work. */ SCTP_CMD_INIT_FAILED, /* High level, do init failure work. */ SCTP_CMD_REPORT_DUP, /* Report a duplicate TSN. */ SCTP_CMD_STRIKE, /* Mark a strike against a transport. */ @@ -118,7 +120,6 @@ typedef union { int error; sctp_state_t state; sctp_event_timeout_t to; - sctp_counter_t counter; void *ptr; struct sctp_chunk *chunk; struct sctp_association *asoc; @@ -165,7 +166,6 @@ SCTP_ARG_CONSTRUCTOR(U16, __u16, u16) SCTP_ARG_CONSTRUCTOR(U8, __u8, u8) SCTP_ARG_CONSTRUCTOR(ERROR, int, error) SCTP_ARG_CONSTRUCTOR(STATE, sctp_state_t, state) -SCTP_ARG_CONSTRUCTOR(COUNTER, sctp_counter_t, counter) SCTP_ARG_CONSTRUCTOR(TO, sctp_event_timeout_t, to) SCTP_ARG_CONSTRUCTOR(PTR, void *, ptr) SCTP_ARG_CONSTRUCTOR(CHUNK, struct sctp_chunk *, chunk) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 2b76c0f4babc..4868c7f7749d 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -263,13 +263,6 @@ enum { SCTP_MIN_PMTU = 576 }; enum { SCTP_MAX_DUP_TSNS = 16 }; enum { SCTP_MAX_GABS = 16 }; -typedef enum { - SCTP_COUNTER_INIT_ERROR, -} sctp_counter_t; - -/* How many counters does an association need? */ -#define SCTP_NUMBER_COUNTERS 5 - /* Here we define the default timers. */ /* cookie timer def = ? seconds */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 960abfa48d68..ef2738159ab3 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -223,6 +223,22 @@ DECLARE_SNMP_STAT(struct sctp_mib, sctp_statistics); extern int sctp_debug_flag; #define SCTP_DEBUG_PRINTK(whatever...) \ ((void) (sctp_debug_flag && printk(KERN_DEBUG whatever))) +#define SCTP_DEBUG_PRINTK_IPADDR(lead, trail, leadparm, saddr, otherparms...) \ + if (sctp_debug_flag) { \ + if (saddr->sa.sa_family == AF_INET6) { \ + printk(KERN_DEBUG \ + lead "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x" trail, \ + leadparm, \ + NIP6(saddr->v6.sin6_addr), \ + otherparms); \ + } else { \ + printk(KERN_DEBUG \ + lead "%u.%u.%u.%u" trail, \ + leadparm, \ + NIPQUAD(saddr->v4.sin_addr.s_addr), \ + otherparms); \ + } \ + } #define SCTP_ENABLE_DEBUG { sctp_debug_flag = 1; } #define SCTP_DISABLE_DEBUG { sctp_debug_flag = 0; } @@ -236,6 +252,7 @@ extern int sctp_debug_flag; #else /* SCTP_DEBUG */ #define SCTP_DEBUG_PRINTK(whatever...) +#define SCTP_DEBUG_PRINTK_IPADDR(whatever...) #define SCTP_ENABLE_DEBUG #define SCTP_DISABLE_DEBUG #define SCTP_ASSERT(expr, str, func) diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index f4fcee104707..a53e08a45e32 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -116,7 +116,8 @@ sctp_state_fn_t sctp_sf_eat_data_fast_4_4; sctp_state_fn_t sctp_sf_eat_sack_6_2; sctp_state_fn_t sctp_sf_tabort_8_4_8; sctp_state_fn_t sctp_sf_operr_notify; -sctp_state_fn_t sctp_sf_t1_timer_expire; +sctp_state_fn_t sctp_sf_t1_init_timer_expire; +sctp_state_fn_t sctp_sf_t1_cookie_timer_expire; sctp_state_fn_t sctp_sf_t2_timer_expire; sctp_state_fn_t sctp_sf_t4_timer_expire; sctp_state_fn_t sctp_sf_t5_timer_expire; @@ -258,7 +259,10 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, void sctp_chunk_assign_tsn(struct sctp_chunk *); void sctp_chunk_assign_ssn(struct sctp_chunk *); -void sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, __u16 error); +sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, + __u16 error, + const struct sctp_association *asoc, + struct sctp_transport *transport); /* Prototypes for statetable processing. */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 6c24d9cd3d66..dfad4d3c581c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -867,10 +867,13 @@ struct sctp_transport { */ unsigned long last_time_ecne_reduced; - /* active : The current active state of this destination, - * : i.e. DOWN, UP, etc. + /* The number of times INIT has been sent on this transport. */ + int init_sent_count; + + /* state : The current state of this destination, + * : i.e. SCTP_ACTIVE, SCTP_INACTIVE, SCTP_UNKOWN. */ - int active; + int state; /* hb_allowed : The current heartbeat state of this destination, * : i.e. ALLOW-HB, NO-HEARTBEAT, etc. @@ -1222,9 +1225,6 @@ struct sctp_endpoint { /* sendbuf acct. policy. */ __u32 sndbuf_policy; - - /* Name for debugging output... */ - char *debug_name; }; /* Recover the outter endpoint structure. */ @@ -1314,11 +1314,23 @@ struct sctp_association { * : association. Normally this information is * : hashed or keyed for quick lookup and access * : of the TCB. + * : The list is also initialized with the list + * : of addresses passed with the sctp_connectx() + * : call. * * It is a list of SCTP_transport's. */ struct list_head transport_addr_list; + /* transport_count + * + * Peer : A count of the number of peer addresses + * Transport : in the Peer Transport Address List. + * Address : + * Count : + */ + __u16 transport_count; + /* port * The transport layer port number. */ @@ -1486,6 +1498,9 @@ struct sctp_association { /* Transport to which SHUTDOWN chunk was last sent. */ struct sctp_transport *shutdown_last_sent_to; + /* Transport to which INIT chunk was last sent. */ + struct sctp_transport *init_last_sent_to; + /* Next TSN : The next TSN number to be assigned to a new * : DATA chunk. This is sent in the INIT or INIT * : ACK chunk to the peer and incremented each @@ -1549,8 +1564,11 @@ struct sctp_association { /* The message size at which SCTP fragmentation will occur. */ __u32 frag_point; - /* Currently only one counter is used to count INIT errors. */ - int counters[SCTP_NUMBER_COUNTERS]; + /* Counter used to count INIT errors. */ + int init_err_counter; + + /* Count the number of INIT cycles (for doubling timeout). */ + int init_cycle; /* Default send parameters. */ __u16 default_stream; @@ -1708,6 +1726,8 @@ void sctp_association_free(struct sctp_association *); void sctp_association_put(struct sctp_association *); void sctp_association_hold(struct sctp_association *); +struct sctp_transport *sctp_assoc_choose_init_transport( + struct sctp_association *); struct sctp_transport *sctp_assoc_choose_shutdown_transport( struct sctp_association *); void sctp_assoc_update_retran_path(struct sctp_association *); @@ -1717,9 +1737,12 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc, const union sctp_addr *laddr); struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *, const union sctp_addr *address, - const int gfp); + const int gfp, + const int peer_state); void sctp_assoc_del_peer(struct sctp_association *asoc, const union sctp_addr *addr); +void sctp_assoc_rm_peer(struct sctp_association *asoc, + struct sctp_transport *peer); void sctp_assoc_control_transport(struct sctp_association *, struct sctp_transport *, sctp_transport_cmd_t, sctp_sn_error_t); diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index 2758e8ce4f25..f6328aeddcce 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -111,6 +111,8 @@ enum sctp_optname { #define SCTP_GET_LOCAL_ADDRS_NUM SCTP_GET_LOCAL_ADDRS_NUM SCTP_GET_LOCAL_ADDRS, /* Get all local addresss. */ #define SCTP_GET_LOCAL_ADDRS SCTP_GET_LOCAL_ADDRS + SCTP_SOCKOPT_CONNECTX, /* CONNECTX requests. */ +#define SCTP_SOCKOPT_CONNECTX SCTP_SOCKOPT_CONNECTX }; /* @@ -527,6 +529,7 @@ struct sctp_paddrinfo { enum sctp_spinfo_state { SCTP_INACTIVE, SCTP_ACTIVE, + SCTP_UNKNOWN = 0xffff /* Value used for transport state unknown */ }; /* diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 663843d97a92..7ae6aa772dab 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -191,10 +191,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->last_cwr_tsn = asoc->ctsn_ack_point; asoc->unack_data = 0; - SCTP_DEBUG_PRINTK("myctsnap for %s INIT as 0x%x.\n", - asoc->ep->debug_name, - asoc->ctsn_ack_point); - /* ADDIP Section 4.1 Asconf Chunk Procedures * * When an endpoint has an ASCONF signaled change to be sent to the @@ -211,6 +207,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Make an empty list of remote transport addresses. */ INIT_LIST_HEAD(&asoc->peer.transport_addr_list); + asoc->peer.transport_count = 0; /* RFC 2960 5.1 Normal Establishment of an Association * @@ -288,6 +285,7 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep, asoc->base.malloced = 1; SCTP_DBG_OBJCNT_INC(assoc); + SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc); return asoc; @@ -356,6 +354,8 @@ void sctp_association_free(struct sctp_association *asoc) sctp_transport_free(transport); } + asoc->peer.transport_count = 0; + /* Free any cached ASCONF_ACK chunk. */ if (asoc->addip_last_asconf_ack) sctp_chunk_free(asoc->addip_last_asconf_ack); @@ -400,7 +400,7 @@ void sctp_assoc_set_primary(struct sctp_association *asoc, /* If the primary path is changing, assume that the * user wants to use this new path. */ - if (transport->active) + if (transport->state != SCTP_INACTIVE) asoc->peer.active_path = transport; /* @@ -428,10 +428,58 @@ void sctp_assoc_set_primary(struct sctp_association *asoc, transport->cacc.next_tsn_at_change = asoc->next_tsn; } +/* Remove a transport from an association. */ +void sctp_assoc_rm_peer(struct sctp_association *asoc, + struct sctp_transport *peer) +{ + struct list_head *pos; + struct sctp_transport *transport; + + SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_rm_peer:association %p addr: ", + " port: %d\n", + asoc, + (&peer->ipaddr), + peer->ipaddr.v4.sin_port); + + /* If we are to remove the current retran_path, update it + * to the next peer before removing this peer from the list. + */ + if (asoc->peer.retran_path == peer) + sctp_assoc_update_retran_path(asoc); + + /* Remove this peer from the list. */ + list_del(&peer->transports); + + /* Get the first transport of asoc. */ + pos = asoc->peer.transport_addr_list.next; + transport = list_entry(pos, struct sctp_transport, transports); + + /* Update any entries that match the peer to be deleted. */ + if (asoc->peer.primary_path == peer) + sctp_assoc_set_primary(asoc, transport); + if (asoc->peer.active_path == peer) + asoc->peer.active_path = transport; + if (asoc->peer.last_data_from == peer) + asoc->peer.last_data_from = transport; + + /* If we remove the transport an INIT was last sent to, set it to + * NULL. Combined with the update of the retran path above, this + * will cause the next INIT to be sent to the next available + * transport, maintaining the cycle. + */ + if (asoc->init_last_sent_to == peer) + asoc->init_last_sent_to = NULL; + + asoc->peer.transport_count--; + + sctp_transport_free(peer); +} + /* Add a transport address to an association. */ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, const union sctp_addr *addr, - int gfp) + const int gfp, + const int peer_state) { struct sctp_transport *peer; struct sctp_sock *sp; @@ -442,14 +490,25 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* AF_INET and AF_INET6 share common port field. */ port = addr->v4.sin_port; + SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_add_peer:association %p addr: ", + " port: %d state:%s\n", + asoc, + addr, + addr->v4.sin_port, + peer_state == SCTP_UNKNOWN?"UNKNOWN":"ACTIVE"); + /* Set the port if it has not been set yet. */ if (0 == asoc->peer.port) asoc->peer.port = port; /* Check to see if this is a duplicate. */ peer = sctp_assoc_lookup_paddr(asoc, addr); - if (peer) + if (peer) { + if (peer_state == SCTP_ACTIVE && + peer->state == SCTP_UNKNOWN) + peer->state = SCTP_ACTIVE; return peer; + } peer = sctp_transport_new(addr, gfp); if (!peer) @@ -516,8 +575,12 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* Set the transport's RTO.initial value */ peer->rto = asoc->rto_initial; + /* Set the peer's active state. */ + peer->state = peer_state; + /* Attach the remote transport to our asoc. */ list_add_tail(&peer->transports, &asoc->peer.transport_addr_list); + asoc->peer.transport_count++; /* If we do not yet have a primary path, set one. */ if (!asoc->peer.primary_path) { @@ -525,8 +588,9 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, asoc->peer.retran_path = peer; } - if (asoc->peer.active_path == asoc->peer.retran_path) + if (asoc->peer.active_path == asoc->peer.retran_path) { asoc->peer.retran_path = peer; + } return peer; } @@ -537,37 +601,16 @@ void sctp_assoc_del_peer(struct sctp_association *asoc, { struct list_head *pos; struct list_head *temp; - struct sctp_transport *peer = NULL; struct sctp_transport *transport; list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); if (sctp_cmp_addr_exact(addr, &transport->ipaddr)) { - peer = transport; - list_del(pos); + /* Do book keeping for removing the peer and free it. */ + sctp_assoc_rm_peer(asoc, transport); break; } } - - /* The address we want delete is not in the association. */ - if (!peer) - return; - - /* Get the first transport of asoc. */ - pos = asoc->peer.transport_addr_list.next; - transport = list_entry(pos, struct sctp_transport, transports); - - /* Update any entries that match the peer to be deleted. */ - if (asoc->peer.primary_path == peer) - sctp_assoc_set_primary(asoc, transport); - if (asoc->peer.active_path == peer) - asoc->peer.active_path = transport; - if (asoc->peer.retran_path == peer) - asoc->peer.retran_path = transport; - if (asoc->peer.last_data_from == peer) - asoc->peer.last_data_from = transport; - - sctp_transport_free(peer); } /* Lookup a transport by address. */ @@ -608,12 +651,12 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, /* Record the transition on the transport. */ switch (command) { case SCTP_TRANSPORT_UP: - transport->active = SCTP_ACTIVE; + transport->state = SCTP_ACTIVE; spc_state = SCTP_ADDR_AVAILABLE; break; case SCTP_TRANSPORT_DOWN: - transport->active = SCTP_INACTIVE; + transport->state = SCTP_INACTIVE; spc_state = SCTP_ADDR_UNREACHABLE; break; @@ -643,7 +686,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, list_for_each(pos, &asoc->peer.transport_addr_list) { t = list_entry(pos, struct sctp_transport, transports); - if (!t->active) + if (t->state == SCTP_INACTIVE) continue; if (!first || t->last_time_heard > first->last_time_heard) { second = first; @@ -663,7 +706,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, * [If the primary is active but not most recent, bump the most * recently used transport.] */ - if (asoc->peer.primary_path->active && + if (asoc->peer.primary_path->state != SCTP_INACTIVE && first != asoc->peer.primary_path) { second = first; first = asoc->peer.primary_path; @@ -958,7 +1001,7 @@ void sctp_assoc_update(struct sctp_association *asoc, transports); if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr)) sctp_assoc_add_peer(asoc, &trans->ipaddr, - GFP_ATOMIC); + GFP_ATOMIC, SCTP_ACTIVE); } asoc->ctsn_ack_point = asoc->next_tsn - 1; @@ -998,7 +1041,7 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) /* Try to find an active transport. */ - if (t->active) { + if (t->state != SCTP_INACTIVE) { break; } else { /* Keep track of the next transport in case @@ -1019,6 +1062,40 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) } asoc->peer.retran_path = t; + + SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association" + " %p addr: ", + " port: %d\n", + asoc, + (&t->ipaddr), + t->ipaddr.v4.sin_port); +} + +/* Choose the transport for sending a INIT packet. */ +struct sctp_transport *sctp_assoc_choose_init_transport( + struct sctp_association *asoc) +{ + struct sctp_transport *t; + + /* Use the retran path. If the last INIT was sent over the + * retran path, update the retran path and use it. + */ + if (!asoc->init_last_sent_to) { + t = asoc->peer.active_path; + } else { + if (asoc->init_last_sent_to == asoc->peer.retran_path) + sctp_assoc_update_retran_path(asoc); + t = asoc->peer.retran_path; + } + + SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association" + " %p addr: ", + " port: %d\n", + asoc, + (&t->ipaddr), + t->ipaddr.v4.sin_port); + + return t; } /* Choose the transport for sending a SHUTDOWN packet. */ diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 334f61773e6d..2ec0320fac3b 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -134,7 +134,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, ep->last_key = ep->current_key = 0; ep->key_changed_at = jiffies; - ep->debug_name = "unnamedEndpoint"; return ep; } diff --git a/net/sctp/input.c b/net/sctp/input.c index fffc880a646d..339f7acfdb64 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -353,7 +353,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk, sctp_do_sm(SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), - asoc->state, asoc->ep, asoc, NULL, + asoc->state, asoc->ep, asoc, t, GFP_ATOMIC); } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 1b2d4adc4ddb..4eb81a1407b7 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -682,9 +682,9 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) if (!new_transport) { new_transport = asoc->peer.active_path; - } else if (!new_transport->active) { - /* If the chunk is Heartbeat or Heartbeat Ack, - * send it to chunk->transport, even if it's + } else if (new_transport->state == SCTP_INACTIVE) { + /* If the chunk is Heartbeat or Heartbeat Ack, + * send it to chunk->transport, even if it's * inactive. * * 3.3.6 Heartbeat Acknowledgement: @@ -840,7 +840,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) * Otherwise, we want to use the active path. */ new_transport = chunk->transport; - if (!new_transport || !new_transport->active) + if (!new_transport || + new_transport->state == SCTP_INACTIVE) new_transport = asoc->peer.active_path; /* Change packets if necessary. */ @@ -1454,7 +1455,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, /* Mark the destination transport address as * active if it is not so marked. */ - if (!transport->active) { + if (transport->state == SCTP_INACTIVE) { sctp_assoc_control_transport( transport->asoc, transport, diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 33ac8bf47b0e..5baed9bb7de5 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1830,7 +1830,7 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, * be a a better choice than any of the embedded addresses. */ if (peer_addr) - if(!sctp_assoc_add_peer(asoc, peer_addr, gfp)) + if(!sctp_assoc_add_peer(asoc, peer_addr, gfp, SCTP_ACTIVE)) goto nomem; /* Process the initialization parameters. */ @@ -1841,6 +1841,14 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, goto clean_up; } + /* Walk list of transports, removing transports in the UNKNOWN state. */ + list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { + transport = list_entry(pos, struct sctp_transport, transports); + if (transport->state == SCTP_UNKNOWN) { + sctp_assoc_rm_peer(asoc, transport); + } + } + /* The fixed INIT headers are always in network byte * order. */ @@ -1906,7 +1914,8 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, * stream sequence number shall be set to 0. */ - /* Allocate storage for the negotiated streams if it is not a temporary * association. + /* Allocate storage for the negotiated streams if it is not a temporary + * association. */ if (!asoc->temp) { int assoc_id; @@ -1952,6 +1961,9 @@ clean_up: list_del_init(pos); sctp_transport_free(transport); } + + asoc->peer.transport_count = 0; + nomem: return 0; } @@ -1995,7 +2007,7 @@ static int sctp_process_param(struct sctp_association *asoc, af->from_addr_param(&addr, param.addr, asoc->peer.port, 0); scope = sctp_scope(peer_addr); if (sctp_in_scope(&addr, scope)) - if (!sctp_assoc_add_peer(asoc, &addr, gfp)) + if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_ACTIVE)) return 0; break; @@ -2396,7 +2408,7 @@ static __u16 sctp_process_asconf_param(struct sctp_association *asoc, * Due to Resource Shortage'. */ - peer = sctp_assoc_add_peer(asoc, &addr, GFP_ATOMIC); + peer = sctp_assoc_add_peer(asoc, &addr, GFP_ATOMIC, SCTP_ACTIVE); if (!peer) return SCTP_ERROR_RSRC_LOW; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index f65fa441952f..778639db125a 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -414,11 +414,13 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, */ asoc->overall_error_count++; - if (transport->active && + if (transport->state != SCTP_INACTIVE && (transport->error_count++ >= transport->max_retrans)) { - SCTP_DEBUG_PRINTK("transport_strike: transport " - "IP:%d.%d.%d.%d failed.\n", - NIPQUAD(transport->ipaddr.v4.sin_addr)); + SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p", + " transport IP: port:%d failed.\n", + asoc, + (&transport->ipaddr), + transport->ipaddr.v4.sin_port); sctp_assoc_control_transport(asoc, transport, SCTP_TRANSPORT_DOWN, SCTP_FAILED_THRESHOLD); @@ -593,7 +595,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, /* Mark the destination transport address as active if it is not so * marked. */ - if (!t->active) + if (t->state == SCTP_INACTIVE) sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP, SCTP_HEARTBEAT_SUCCESS); @@ -665,8 +667,11 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds, asoc->state = state; + SCTP_DEBUG_PRINTK("sctp_cmd_new_state: asoc %p[%s]\n", + asoc, sctp_state_tbl[state]); + if (sctp_style(sk, TCP)) { - /* Change the sk->sk_state of a TCP-style socket that has + /* Change the sk->sk_state of a TCP-style socket that has * sucessfully completed a connect() call. */ if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED)) @@ -678,6 +683,16 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds, sk->sk_shutdown |= RCV_SHUTDOWN; } + if (sctp_state(asoc, COOKIE_WAIT)) { + /* Reset init timeouts since they may have been + * increased due to timer expirations. + */ + asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] = + asoc->ep->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT]; + asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] = + asoc->ep->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE]; + } + if (sctp_state(asoc, ESTABLISHED) || sctp_state(asoc, CLOSED) || sctp_state(asoc, SHUTDOWN_RECEIVED)) { @@ -1120,10 +1135,10 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, * to be executed only during failed attempts of * association establishment. */ - if ((asoc->peer.retran_path != - asoc->peer.primary_path) && - (asoc->counters[SCTP_COUNTER_INIT_ERROR] > 0)) { - sctp_add_cmd_sf(commands, + if ((asoc->peer.retran_path != + asoc->peer.primary_path) && + (asoc->init_err_counter > 0)) { + sctp_add_cmd_sf(commands, SCTP_CMD_FORCE_PRIM_RETRAN, SCTP_NULL()); } @@ -1237,18 +1252,67 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_association_put(asoc); break; + case SCTP_CMD_INIT_CHOOSE_TRANSPORT: + chunk = cmd->obj.ptr; + t = sctp_assoc_choose_init_transport(asoc); + asoc->init_last_sent_to = t; + chunk->transport = t; + t->init_sent_count++; + break; + case SCTP_CMD_INIT_RESTART: /* Do the needed accounting and updates * associated with restarting an initialization - * timer. + * timer. Only multiply the timeout by two if + * all transports have been tried at the current + * timeout. + */ + t = asoc->init_last_sent_to; + asoc->init_err_counter++; + + if (t->init_sent_count > (asoc->init_cycle + 1)) { + asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] *= 2; + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] > + asoc->max_init_timeo) { + asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] = + asoc->max_init_timeo; + } + asoc->init_cycle++; + SCTP_DEBUG_PRINTK( + "T1 INIT Timeout adjustment" + " init_err_counter: %d" + " cycle: %d" + " timeout: %d\n", + asoc->init_err_counter, + asoc->init_cycle, + asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT]); + } + + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, + SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); + break; + + case SCTP_CMD_COOKIEECHO_RESTART: + /* Do the needed accounting and updates + * associated with restarting an initialization + * timer. Only multiply the timeout by two if + * all transports have been tried at the current + * timeout. */ - asoc->counters[SCTP_COUNTER_INIT_ERROR]++; - asoc->timeouts[cmd->obj.to] *= 2; - if (asoc->timeouts[cmd->obj.to] > + asoc->init_err_counter++; + + asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] *= 2; + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] > asoc->max_init_timeo) { - asoc->timeouts[cmd->obj.to] = + asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] = asoc->max_init_timeo; } + SCTP_DEBUG_PRINTK( + "T1 COOKIE Timeout adjustment" + " init_err_counter: %d" + " timeout: %d\n", + asoc->init_err_counter, + asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE]); /* If we've sent any data bundled with * COOKIE-ECHO we need to resend. @@ -1261,7 +1325,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, - SCTP_TO(cmd->obj.to)); + SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); break; case SCTP_CMD_INIT_FAILED: @@ -1273,12 +1337,13 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, subtype, chunk, cmd->obj.u32); break; - case SCTP_CMD_COUNTER_INC: - asoc->counters[cmd->obj.counter]++; + case SCTP_CMD_INIT_COUNTER_INC: + asoc->init_err_counter++; break; - case SCTP_CMD_COUNTER_RESET: - asoc->counters[cmd->obj.counter] = 0; + case SCTP_CMD_INIT_COUNTER_RESET: + asoc->init_err_counter = 0; + asoc->init_cycle = 0; break; case SCTP_CMD_REPORT_DUP: diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 8e01b8f09ac2..058189684c7c 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -533,6 +533,9 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_PEER_INIT, SCTP_PEER_INIT(initchunk)); + /* Reset init error count upon receipt of INIT-ACK. */ + sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL()); + /* 5.1 C) "A" shall stop the T1-init timer and leave * COOKIE-WAIT state. "A" shall then ... start the T1-cookie * timer, and enter the COOKIE-ECHOED state. @@ -775,8 +778,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep, * from the COOKIE-ECHOED state to the COOKIE-WAIT * state is performed. */ - sctp_add_cmd_sf(commands, SCTP_CMD_COUNTER_RESET, - SCTP_COUNTER(SCTP_COUNTER_INIT_ERROR)); + sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL()); /* RFC 2960 5.1 Normal Establishment of an Association * @@ -1019,10 +1021,22 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, link = sctp_assoc_lookup_paddr(asoc, &from_addr); /* This should never happen, but lets log it if so. */ - if (!link) { - printk(KERN_WARNING - "%s: Could not find address %d.%d.%d.%d\n", - __FUNCTION__, NIPQUAD(from_addr.v4.sin_addr)); + if (unlikely(!link)) { + if (from_addr.sa.sa_family == AF_INET6) { + printk(KERN_WARNING + "%s association %p could not find address " + "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + __FUNCTION__, + asoc, + NIP6(from_addr.v6.sin6_addr)); + } else { + printk(KERN_WARNING + "%s association %p could not find address " + "%u.%u.%u.%u\n", + __FUNCTION__, + asoc, + NIPQUAD(from_addr.v4.sin_addr.s_addr)); + } return SCTP_DISPOSITION_DISCARD; } @@ -2095,9 +2109,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep, sctp_errhdr_t *err; struct sctp_chunk *reply; struct sctp_bind_addr *bp; - int attempts; - - attempts = asoc->counters[SCTP_COUNTER_INIT_ERROR] + 1; + int attempts = asoc->init_err_counter + 1; if (attempts >= asoc->max_init_attempts) { sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, @@ -2157,8 +2169,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep, /* Cast away the const modifier, as we want to just * rerun it through as a sideffect. */ - sctp_add_cmd_sf(commands, SCTP_CMD_COUNTER_INC, - SCTP_COUNTER(SCTP_COUNTER_INIT_ERROR)); + sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_INC, SCTP_NULL()); sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); @@ -2281,8 +2292,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep, if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) error = ((sctp_errhdr_t *)chunk->skb->data)->cause; - sctp_stop_t1_and_abort(commands, error); - return SCTP_DISPOSITION_ABORT; + return sctp_stop_t1_and_abort(commands, error, asoc, chunk->transport); } /* @@ -2294,8 +2304,8 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(const struct sctp_endpoint *ep void *arg, sctp_cmd_seq_t *commands) { - sctp_stop_t1_and_abort(commands, SCTP_ERROR_NO_ERROR); - return SCTP_DISPOSITION_ABORT; + return sctp_stop_t1_and_abort(commands, SCTP_ERROR_NO_ERROR, asoc, + (struct sctp_transport *)arg); } /* @@ -2318,8 +2328,12 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep, * * This is common code called by several sctp_sf_*_abort() functions above. */ -void sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, __u16 error) +sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, + __u16 error, + const struct sctp_association *asoc, + struct sctp_transport *transport) { + SCTP_DEBUG_PRINTK("ABORT received (INIT).\n"); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); SCTP_INC_STATS(SCTP_MIB_ABORTEDS); @@ -2328,6 +2342,7 @@ void sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, __u16 error) /* CMD_INIT_FAILED will DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, SCTP_U32(error)); + return SCTP_DISPOSITION_ABORT; } /* @@ -3805,6 +3820,10 @@ sctp_disposition_t sctp_sf_do_prm_asoc(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC((struct sctp_association *) asoc)); + /* Choose transport for INIT. */ + sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT, + SCTP_CHUNK(repl)); + /* After sending the INIT, "A" starts the T1-init timer and * enters the COOKIE-WAIT state. */ @@ -4589,7 +4608,7 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep, } /* - * sctp_sf_t1_timer_expire + * sctp_sf_t1_init_timer_expire * * Section: 4 Note: 2 * Verification Tag: @@ -4603,7 +4622,59 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep, * endpoint MUST abort the initialization process and report the * error to SCTP user. * - * 3) If the T1-cookie timer expires, the endpoint MUST retransmit + * Outputs + * (timers, events) + * + */ +sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands) +{ + struct sctp_chunk *repl = NULL; + struct sctp_bind_addr *bp; + int attempts = asoc->init_err_counter + 1; + + SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); + + if (attempts < asoc->max_init_attempts) { + bp = (struct sctp_bind_addr *) &asoc->base.bind_addr; + repl = sctp_make_init(asoc, bp, GFP_ATOMIC, 0); + if (!repl) + return SCTP_DISPOSITION_NOMEM; + + /* Choose transport for INIT. */ + sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT, + SCTP_CHUNK(repl)); + + /* Issue a sideeffect to do the needed accounting. */ + sctp_add_cmd_sf(commands, SCTP_CMD_INIT_RESTART, + SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); + + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); + } else { + SCTP_DEBUG_PRINTK("Giving up on INIT, attempts: %d" + " max_init_attempts: %d\n", + attempts, asoc->max_init_attempts); + sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, + SCTP_U32(SCTP_ERROR_NO_ERROR)); + return SCTP_DISPOSITION_DELETE_TCB; + } + + return SCTP_DISPOSITION_CONSUME; +} + +/* + * sctp_sf_t1_cookie_timer_expire + * + * Section: 4 Note: 2 + * Verification Tag: + * Inputs + * (endpoint, asoc) + * + * RFC 2960 Section 4 Notes + * 3) If the T1-cookie timer expires, the endpoint MUST retransmit * COOKIE ECHO and re-start the T1-cookie timer without changing * state. This MUST be repeated up to 'Max.Init.Retransmits' times. * After that, the endpoint MUST abort the initialization process and @@ -4613,46 +4684,26 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep, * (timers, events) * */ -sctp_disposition_t sctp_sf_t1_timer_expire(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { - struct sctp_chunk *repl; - struct sctp_bind_addr *bp; - sctp_event_timeout_t timer = (sctp_event_timeout_t) arg; - int timeout; - int attempts; - - timeout = asoc->timeouts[timer]; - attempts = asoc->counters[SCTP_COUNTER_INIT_ERROR] + 1; - repl = NULL; + struct sctp_chunk *repl = NULL; + int attempts = asoc->init_err_counter + 1; - SCTP_DEBUG_PRINTK("Timer T1 expired.\n"); + SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); if (attempts < asoc->max_init_attempts) { - switch (timer) { - case SCTP_EVENT_TIMEOUT_T1_INIT: - bp = (struct sctp_bind_addr *) &asoc->base.bind_addr; - repl = sctp_make_init(asoc, bp, GFP_ATOMIC, 0); - break; - - case SCTP_EVENT_TIMEOUT_T1_COOKIE: - repl = sctp_make_cookie_echo(asoc, NULL); - break; - - default: - BUG(); - break; - }; - + repl = sctp_make_cookie_echo(asoc, NULL); if (!repl) - goto nomem; + return SCTP_DISPOSITION_NOMEM; /* Issue a sideeffect to do the needed accounting. */ - sctp_add_cmd_sf(commands, SCTP_CMD_INIT_RESTART, - SCTP_TO(timer)); + sctp_add_cmd_sf(commands, SCTP_CMD_COOKIEECHO_RESTART, + SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); } else { sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, @@ -4661,9 +4712,6 @@ sctp_disposition_t sctp_sf_t1_timer_expire(const struct sctp_endpoint *ep, } return SCTP_DISPOSITION_CONSUME; - -nomem: - return SCTP_DISPOSITION_NOMEM; } /* RFC2960 9.2 If the timer expires, the endpoint must re-send the SHUTDOWN diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 8967846f69e8..75ef10408764 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -783,7 +783,8 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_ /* SCTP_STATE_COOKIE_WAIT */ \ {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_t1_timer_expire, .name = "sctp_sf_t1_timer_expire"}, \ + {.fn = sctp_sf_t1_cookie_timer_expire, \ + .name = "sctp_sf_t1_cookie_timer_expire"}, \ /* SCTP_STATE_ESTABLISHED */ \ {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ @@ -802,7 +803,8 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_ /* SCTP_STATE_CLOSED */ \ {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_t1_timer_expire, .name = "sctp_sf_t1_timer_expire"}, \ + {.fn = sctp_sf_t1_init_timer_expire, \ + .name = "sctp_sf_t1_init_timer_expire"}, \ /* SCTP_STATE_COOKIE_ECHOED */ \ {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ /* SCTP_STATE_ESTABLISHED */ \ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e6926cb19420..aad55dc3792b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -262,18 +262,18 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk, * sockaddr_in6 [RFC 2553]), * addr_len - the size of the address structure. */ -SCTP_STATIC int sctp_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) +SCTP_STATIC int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) { int retval = 0; sctp_lock_sock(sk); - SCTP_DEBUG_PRINTK("sctp_bind(sk: %p, uaddr: %p, addr_len: %d)\n", - sk, uaddr, addr_len); + SCTP_DEBUG_PRINTK("sctp_bind(sk: %p, addr: %p, addr_len: %d)\n", + sk, addr, addr_len); /* Disallow binding twice. */ if (!sctp_sk(sk)->ep->base.bind_addr.port) - retval = sctp_do_bind(sk, (union sctp_addr *)uaddr, + retval = sctp_do_bind(sk, (union sctp_addr *)addr, addr_len); else retval = -EINVAL; @@ -318,23 +318,27 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) unsigned short snum; int ret = 0; - SCTP_DEBUG_PRINTK("sctp_do_bind(sk: %p, newaddr: %p, len: %d)\n", - sk, addr, len); - /* Common sockaddr verification. */ af = sctp_sockaddr_af(sp, addr, len); - if (!af) + if (!af) { + SCTP_DEBUG_PRINTK("sctp_do_bind(sk: %p, newaddr: %p, len: %d) EINVAL\n", + sk, addr, len); return -EINVAL; + } + + snum = ntohs(addr->v4.sin_port); + + SCTP_DEBUG_PRINTK_IPADDR("sctp_do_bind(sk: %p, new addr: ", + ", port: %d, new port: %d, len: %d)\n", + sk, + addr, + bp->port, snum, + len); /* PF specific bind() address verification. */ if (!sp->pf->bind_verify(sp, addr)) return -EADDRNOTAVAIL; - snum= ntohs(addr->v4.sin_port); - - SCTP_DEBUG_PRINTK("sctp_do_bind: port: %d, new port: %d\n", - bp->port, snum); - /* We must either be unbound, or bind to the same port. */ if (bp->port && (snum != bp->port)) { SCTP_DEBUG_PRINTK("sctp_do_bind:" @@ -816,7 +820,8 @@ out: * * Basically do nothing but copying the addresses from user to kernel * land and invoking either sctp_bindx_add() or sctp_bindx_rem() on the sk. - * This is used for tunneling the sctp_bindx() request through sctp_setsockopt() * from userspace. + * This is used for tunneling the sctp_bindx() request through sctp_setsockopt() + * from userspace. * * We don't use copy_from_user() for optimization: we first do the * sanity checks (buffer size -fast- and access check-healthy @@ -913,6 +918,243 @@ out: return err; } +/* __sctp_connect(struct sock* sk, struct sockaddr *kaddrs, int addrs_size) + * + * Common routine for handling connect() and sctp_connectx(). + * Connect will come in with just a single address. + */ +static int __sctp_connect(struct sock* sk, + struct sockaddr *kaddrs, + int addrs_size) +{ + struct sctp_sock *sp; + struct sctp_endpoint *ep; + struct sctp_association *asoc = NULL; + struct sctp_association *asoc2; + struct sctp_transport *transport; + union sctp_addr to; + struct sctp_af *af; + sctp_scope_t scope; + long timeo; + int err = 0; + int addrcnt = 0; + int walk_size = 0; + struct sockaddr *sa_addr; + void *addr_buf; + + sp = sctp_sk(sk); + ep = sp->ep; + + /* connect() cannot be done on a socket that is already in ESTABLISHED + * state - UDP-style peeled off socket or a TCP-style socket that + * is already connected. + * It cannot be done even on a TCP-style listening socket. + */ + if (sctp_sstate(sk, ESTABLISHED) || + (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) { + err = -EISCONN; + goto out_free; + } + + /* Walk through the addrs buffer and count the number of addresses. */ + addr_buf = kaddrs; + while (walk_size < addrs_size) { + sa_addr = (struct sockaddr *)addr_buf; + af = sctp_get_af_specific(sa_addr->sa_family); + + /* If the address family is not supported or if this address + * causes the address buffer to overflow return EINVAL. + */ + if (!af || (walk_size + af->sockaddr_len) > addrs_size) { + err = -EINVAL; + goto out_free; + } + + err = sctp_verify_addr(sk, (union sctp_addr *)sa_addr, + af->sockaddr_len); + if (err) + goto out_free; + + memcpy(&to, sa_addr, af->sockaddr_len); + to.v4.sin_port = ntohs(to.v4.sin_port); + + /* Check if there already is a matching association on the + * endpoint (other than the one created here). + */ + asoc2 = sctp_endpoint_lookup_assoc(ep, &to, &transport); + if (asoc2 && asoc2 != asoc) { + if (asoc2->state >= SCTP_STATE_ESTABLISHED) + err = -EISCONN; + else + err = -EALREADY; + goto out_free; + } + + /* If we could not find a matching association on the endpoint, + * make sure that there is no peeled-off association matching + * the peer address even on another socket. + */ + if (sctp_endpoint_is_peeled_off(ep, &to)) { + err = -EADDRNOTAVAIL; + goto out_free; + } + + if (!asoc) { + /* If a bind() or sctp_bindx() is not called prior to + * an sctp_connectx() call, the system picks an + * ephemeral port and will choose an address set + * equivalent to binding with a wildcard address. + */ + if (!ep->base.bind_addr.port) { + if (sctp_autobind(sk)) { + err = -EAGAIN; + goto out_free; + } + } + + scope = sctp_scope(&to); + asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL); + if (!asoc) { + err = -ENOMEM; + goto out_free; + } + } + + /* Prime the peer's transport structures. */ + transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL, + SCTP_UNKNOWN); + if (!transport) { + err = -ENOMEM; + goto out_free; + } + + addrcnt++; + addr_buf += af->sockaddr_len; + walk_size += af->sockaddr_len; + } + + err = sctp_assoc_set_bind_addr_from_ep(asoc, GFP_KERNEL); + if (err < 0) { + goto out_free; + } + + err = sctp_primitive_ASSOCIATE(asoc, NULL); + if (err < 0) { + goto out_free; + } + + /* Initialize sk's dport and daddr for getpeername() */ + inet_sk(sk)->dport = htons(asoc->peer.port); + af = sctp_get_af_specific(to.sa.sa_family); + af->to_sk_daddr(&to, sk); + + timeo = sock_sndtimeo(sk, sk->sk_socket->file->f_flags & O_NONBLOCK); + err = sctp_wait_for_connect(asoc, &timeo); + + /* Don't free association on exit. */ + asoc = NULL; + +out_free: + + SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p" + " kaddrs: %p err: %d\n", + asoc, kaddrs, err); + if (asoc) + sctp_association_free(asoc); + return err; +} + +/* Helper for tunneling sctp_connectx() requests through sctp_setsockopt() + * + * API 8.9 + * int sctp_connectx(int sd, struct sockaddr *addrs, int addrcnt); + * + * If sd is an IPv4 socket, the addresses passed must be IPv4 addresses. + * If the sd is an IPv6 socket, the addresses passed can either be IPv4 + * or IPv6 addresses. + * + * A single address may be specified as INADDR_ANY or IN6ADDR_ANY, see + * Section 3.1.2 for this usage. + * + * addrs is a pointer to an array of one or more socket addresses. Each + * address is contained in its appropriate structure (i.e. struct + * sockaddr_in or struct sockaddr_in6) the family of the address type + * must be used to distengish the address length (note that this + * representation is termed a "packed array" of addresses). The caller + * specifies the number of addresses in the array with addrcnt. + * + * On success, sctp_connectx() returns 0. On failure, sctp_connectx() returns + * -1, and sets errno to the appropriate error code. + * + * For SCTP, the port given in each socket address must be the same, or + * sctp_connectx() will fail, setting errno to EINVAL. + * + * An application can use sctp_connectx to initiate an association with + * an endpoint that is multi-homed. Much like sctp_bindx() this call + * allows a caller to specify multiple addresses at which a peer can be + * reached. The way the SCTP stack uses the list of addresses to set up + * the association is implementation dependant. This function only + * specifies that the stack will try to make use of all the addresses in + * the list when needed. + * + * Note that the list of addresses passed in is only used for setting up + * the association. It does not necessarily equal the set of addresses + * the peer uses for the resulting association. If the caller wants to + * find out the set of peer addresses, it must use sctp_getpaddrs() to + * retrieve them after the association has been set up. + * + * Basically do nothing but copying the addresses from user to kernel + * land and invoking either sctp_connectx(). This is used for tunneling + * the sctp_connectx() request through sctp_setsockopt() from userspace. + * + * We don't use copy_from_user() for optimization: we first do the + * sanity checks (buffer size -fast- and access check-healthy + * pointer); if all of those succeed, then we can alloc the memory + * (expensive operation) needed to copy the data to kernel. Then we do + * the copying without checking the user space area + * (__copy_from_user()). + * + * On exit there is no need to do sockfd_put(), sys_setsockopt() does + * it. + * + * sk The sk of the socket + * addrs The pointer to the addresses in user land + * addrssize Size of the addrs buffer + * + * Returns 0 if ok, <0 errno code on error. + */ +SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, + struct sockaddr __user *addrs, + int addrs_size) +{ + int err = 0; + struct sockaddr *kaddrs; + + SCTP_DEBUG_PRINTK("%s - sk %p addrs %p addrs_size %d\n", + __FUNCTION__, sk, addrs, addrs_size); + + if (unlikely(addrs_size <= 0)) + return -EINVAL; + + /* Check the user passed a healthy pointer. */ + if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size))) + return -EFAULT; + + /* Alloc space for the address array in kernel memory. */ + kaddrs = (struct sockaddr *)kmalloc(addrs_size, GFP_KERNEL); + if (unlikely(!kaddrs)) + return -ENOMEM; + + if (__copy_from_user(kaddrs, addrs, addrs_size)) { + err = -EFAULT; + } else { + err = __sctp_connect(sk, kaddrs, addrs_size); + } + + kfree(kaddrs); + return err; +} + /* API 3.1.4 close() - UDP Style Syntax * Applications use close() to perform graceful shutdown (as described in * Section 10.1 of [SCTP]) on ALL the associations currently represented @@ -1095,7 +1337,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, sp = sctp_sk(sk); ep = sp->ep; - SCTP_DEBUG_PRINTK("Using endpoint: %s.\n", ep->debug_name); + SCTP_DEBUG_PRINTK("Using endpoint: %p.\n", ep); /* We cannot send a message over a TCP-style listening socket. */ if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) { @@ -1306,7 +1548,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, } /* Prime the peer's transport structures. */ - transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL); + transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL, SCTP_UNKNOWN); if (!transport) { err = -ENOMEM; goto out_free; @@ -2208,6 +2450,12 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, optlen, SCTP_BINDX_REM_ADDR); break; + case SCTP_SOCKOPT_CONNECTX: + /* 'optlen' is the size of the addresses buffer. */ + retval = sctp_setsockopt_connectx(sk, (struct sockaddr __user *)optval, + optlen); + break; + case SCTP_DISABLE_FRAGMENTS: retval = sctp_setsockopt_disable_fragments(sk, optval, optlen); break; @@ -2283,112 +2531,29 @@ out_nounlock: * * len: the size of the address. */ -SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *uaddr, +SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *addr, int addr_len) { - struct sctp_sock *sp; - struct sctp_endpoint *ep; - struct sctp_association *asoc; - struct sctp_transport *transport; - union sctp_addr to; - struct sctp_af *af; - sctp_scope_t scope; - long timeo; int err = 0; + struct sctp_af *af; sctp_lock_sock(sk); - SCTP_DEBUG_PRINTK("%s - sk: %p, sockaddr: %p, addr_len: %d)\n", - __FUNCTION__, sk, uaddr, addr_len); - - sp = sctp_sk(sk); - ep = sp->ep; - - /* connect() cannot be done on a socket that is already in ESTABLISHED - * state - UDP-style peeled off socket or a TCP-style socket that - * is already connected. - * It cannot be done even on a TCP-style listening socket. - */ - if (sctp_sstate(sk, ESTABLISHED) || - (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) { - err = -EISCONN; - goto out_unlock; - } - - err = sctp_verify_addr(sk, (union sctp_addr *)uaddr, addr_len); - if (err) - goto out_unlock; - - if (addr_len > sizeof(to)) - addr_len = sizeof(to); - memcpy(&to, uaddr, addr_len); - to.v4.sin_port = ntohs(to.v4.sin_port); - - asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport); - if (asoc) { - if (asoc->state >= SCTP_STATE_ESTABLISHED) - err = -EISCONN; - else - err = -EALREADY; - goto out_unlock; - } - - /* If we could not find a matching association on the endpoint, - * make sure that there is no peeled-off association matching the - * peer address even on another socket. - */ - if (sctp_endpoint_is_peeled_off(ep, &to)) { - err = -EADDRNOTAVAIL; - goto out_unlock; - } - - /* If a bind() or sctp_bindx() is not called prior to a connect() - * call, the system picks an ephemeral port and will choose an address - * set equivalent to binding with a wildcard address. - */ - if (!ep->base.bind_addr.port) { - if (sctp_autobind(sk)) { - err = -EAGAIN; - goto out_unlock; - } - } - - scope = sctp_scope(&to); - asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL); - if (!asoc) { - err = -ENOMEM; - goto out_unlock; - } + SCTP_DEBUG_PRINTK("%s - sk: %p, sockaddr: %p, addr_len: %d\n", + __FUNCTION__, sk, addr, addr_len); - /* Prime the peer's transport structures. */ - transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL); - if (!transport) { - sctp_association_free(asoc); - goto out_unlock; - } - err = sctp_assoc_set_bind_addr_from_ep(asoc, GFP_KERNEL); - if (err < 0) { - sctp_association_free(asoc); - goto out_unlock; - } - - err = sctp_primitive_ASSOCIATE(asoc, NULL); - if (err < 0) { - sctp_association_free(asoc); - goto out_unlock; + /* Validate addr_len before calling common connect/connectx routine. */ + af = sctp_get_af_specific(addr->sa_family); + if (!af || addr_len < af->sockaddr_len) { + err = -EINVAL; + } else { + /* Pass correct addr len to common routine (so it knows there + * is only one address being passed. + */ + err = __sctp_connect(sk, addr, af->sockaddr_len); } - /* Initialize sk's dport and daddr for getpeername() */ - inet_sk(sk)->dport = htons(asoc->peer.port); - af = sctp_get_af_specific(to.sa.sa_family); - af->to_sk_daddr(&to, sk); - - timeo = sock_sndtimeo(sk, sk->sk_socket->file->f_flags & O_NONBLOCK); - err = sctp_wait_for_connect(asoc, &timeo); - -out_unlock: sctp_release_sock(sk); - return err; } @@ -2677,12 +2842,15 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, /* Map ipv4 address into v4-mapped-on-v6 address. */ sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), (union sctp_addr *)&status.sstat_primary.spinfo_address); - status.sstat_primary.spinfo_state = transport->active; + status.sstat_primary.spinfo_state = transport->state; status.sstat_primary.spinfo_cwnd = transport->cwnd; status.sstat_primary.spinfo_srtt = transport->srtt; status.sstat_primary.spinfo_rto = jiffies_to_msecs(transport->rto); status.sstat_primary.spinfo_mtu = transport->pmtu; + if (status.sstat_primary.spinfo_state == SCTP_UNKNOWN) + status.sstat_primary.spinfo_state = SCTP_ACTIVE; + if (put_user(len, optlen)) { retval = -EFAULT; goto out; @@ -2733,12 +2901,15 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len, return -EINVAL; pinfo.spinfo_assoc_id = sctp_assoc2id(transport->asoc); - pinfo.spinfo_state = transport->active; + pinfo.spinfo_state = transport->state; pinfo.spinfo_cwnd = transport->cwnd; pinfo.spinfo_srtt = transport->srtt; pinfo.spinfo_rto = jiffies_to_msecs(transport->rto); pinfo.spinfo_mtu = transport->pmtu; + if (pinfo.spinfo_state == SCTP_UNKNOWN) + pinfo.spinfo_state = SCTP_ACTIVE; + if (put_user(len, optlen)) { retval = -EFAULT; goto out; @@ -3591,7 +3762,8 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, int retval = 0; int len; - SCTP_DEBUG_PRINTK("sctp_getsockopt(sk: %p, ...)\n", sk); + SCTP_DEBUG_PRINTK("sctp_getsockopt(sk: %p... optname: %d)\n", + sk, optname); /* I can hardly begin to describe how wrong this is. This is * so broken as to be worse than useless. The API draft @@ -4596,8 +4768,7 @@ out: return err; do_error: - if (asoc->counters[SCTP_COUNTER_INIT_ERROR] + 1 >= - asoc->max_init_attempts) + if (asoc->init_err_counter + 1 >= asoc->max_init_attempts) err = -ETIMEDOUT; else err = -ECONNREFUSED; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index f30882e1e96a..0ec0fde6e6c5 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -83,7 +83,9 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->last_time_used = jiffies; peer->last_time_ecne_reduced = jiffies; - peer->active = SCTP_ACTIVE; + peer->init_sent_count = 0; + + peer->state = SCTP_ACTIVE; peer->hb_allowed = 0; /* Initialize the default path max_retrans. */ -- cgit v1.2.3-55-g7522 From 72cb6962a91f2af9eef69a06198e1949c10259ae Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 20 Jun 2005 13:18:08 -0700 Subject: [IPSEC]: Add xfrm_init_state This patch adds xfrm_init_state which is simply a wrapper that calls xfrm_get_type and subsequently x->type->init_state. It also gets rid of the unused args argument. Abstracting it out allows us to add common initialisation code, e.g., to set family-specific flags. The add_time setting in xfrm_user.c was deleted because it's already set by xfrm_state_alloc. Signed-off-by: Herbert Xu Acked-by: James Morris Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 ++- net/ipv4/ah4.c | 2 +- net/ipv4/esp4.c | 2 +- net/ipv4/ipcomp.c | 11 +++-------- net/ipv4/xfrm4_tunnel.c | 2 +- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 2 +- net/ipv6/ipcomp6.c | 9 ++------- net/ipv6/xfrm6_tunnel.c | 2 +- net/key/af_key.c | 12 +++--------- net/xfrm/xfrm_policy.c | 1 - net/xfrm/xfrm_state.c | 21 +++++++++++++++++++++ net/xfrm/xfrm_user.c | 9 +-------- 13 files changed, 38 insertions(+), 40 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0e65e02b7a1d..77bfdde440f8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -225,7 +225,7 @@ struct xfrm_type struct module *owner; __u8 proto; - int (*init_state)(struct xfrm_state *x, void *args); + int (*init_state)(struct xfrm_state *x); void (*destructor)(struct xfrm_state *); int (*input)(struct xfrm_state *, struct xfrm_decap_state *, struct sk_buff *skb); int (*post_input)(struct xfrm_state *, struct xfrm_decap_state *, struct sk_buff *skb); @@ -839,6 +839,7 @@ extern int xfrm_replay_check(struct xfrm_state *x, u32 seq); extern void xfrm_replay_advance(struct xfrm_state *x, u32 seq); extern int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm_state_mtu(struct xfrm_state *x, int mtu); +extern int xfrm_init_state(struct xfrm_state *x); extern int xfrm4_rcv(struct sk_buff *skb); extern int xfrm4_output(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 0e98f2235b6e..514c85b2631a 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -200,7 +200,7 @@ static void ah4_err(struct sk_buff *skb, u32 info) xfrm_state_put(x); } -static int ah_init_state(struct xfrm_state *x, void *args) +static int ah_init_state(struct xfrm_state *x) { struct ah_data *ahp = NULL; struct xfrm_algo_desc *aalg_desc; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index eae84cc39d3f..ba57446d5d1f 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -362,7 +362,7 @@ static void esp_destroy(struct xfrm_state *x) kfree(esp); } -static int esp_init_state(struct xfrm_state *x, void *args) +static int esp_init_state(struct xfrm_state *x) { struct esp_data *esp = NULL; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 1a23c5263b99..2065944fd9e5 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -236,15 +236,10 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->props.mode = 1; t->props.saddr.a4 = x->props.saddr.a4; t->props.flags = x->props.flags; - - t->type = xfrm_get_type(IPPROTO_IPIP, t->props.family); - if (t->type == NULL) - goto error; - - if (t->type->init_state(t, NULL)) + + if (xfrm_init_state(t)) goto error; - t->km.state = XFRM_STATE_VALID; atomic_set(&t->tunnel_users, 1); out: return t; @@ -422,7 +417,7 @@ static void ipcomp_destroy(struct xfrm_state *x) kfree(ipcd); } -static int ipcomp_init_state(struct xfrm_state *x, void *args) +static int ipcomp_init_state(struct xfrm_state *x) { int err; struct ipcomp_data *ipcd; diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 413191f585f6..e1fe360ed27a 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -84,7 +84,7 @@ static void ipip_err(struct sk_buff *skb, u32 info) handler->err_handler(skb, &arg); } -static int ipip_init_state(struct xfrm_state *x, void *args) +static int ipip_init_state(struct xfrm_state *x) { if (!x->props.mode) return -EINVAL; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index e3ecf626cbf7..986fdfdccbcd 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -339,7 +339,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, xfrm_state_put(x); } -static int ah6_init_state(struct xfrm_state *x, void *args) +static int ah6_init_state(struct xfrm_state *x) { struct ah_data *ahp = NULL; struct xfrm_algo_desc *aalg_desc; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index be7095d6babe..324db62515a2 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -296,7 +296,7 @@ static void esp6_destroy(struct xfrm_state *x) kfree(esp); } -static int esp6_init_state(struct xfrm_state *x, void *args) +static int esp6_init_state(struct xfrm_state *x) { struct esp_data *esp = NULL; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 6cde5310cd76..423feb46ccc0 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -234,14 +234,9 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) t->props.mode = 1; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); - t->type = xfrm_get_type(IPPROTO_IPV6, t->props.family); - if (t->type == NULL) + if (xfrm_init_state(t)) goto error; - if (t->type->init_state(t, NULL)) - goto error; - - t->km.state = XFRM_STATE_VALID; atomic_set(&t->tunnel_users, 1); out: @@ -420,7 +415,7 @@ static void ipcomp6_destroy(struct xfrm_state *x) xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr); } -static int ipcomp6_init_state(struct xfrm_state *x, void *args) +static int ipcomp6_init_state(struct xfrm_state *x) { int err; struct ipcomp_data *ipcd; diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index ffcadd68b951..60c26c87277e 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -466,7 +466,7 @@ static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; } -static int xfrm6_tunnel_init_state(struct xfrm_state *x, void *args) +static int xfrm6_tunnel_init_state(struct xfrm_state *x) { if (!x->props.mode) return -EINVAL; diff --git a/net/key/af_key.c b/net/key/af_key.c index 98b72f2024ff..652dd09ccd3a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1096,17 +1096,11 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, } } - x->type = xfrm_get_type(proto, x->props.family); - if (x->type == NULL) { - err = -ENOPROTOOPT; - goto out; - } - if (x->type->init_state(x, NULL)) { - err = -EINVAL; + err = xfrm_init_state(x); + if (err) goto out; - } + x->km.seq = hdr->sadb_msg_seq; - x->km.state = XFRM_STATE_VALID; return x; out: diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 0a4260719a12..d65ed8684fc1 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -118,7 +118,6 @@ retry: xfrm_policy_put_afinfo(afinfo); return type; } -EXPORT_SYMBOL(xfrm_get_type); int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsigned short family) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 2537f26f097c..1845b73d69f9 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1055,6 +1055,27 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu) } EXPORT_SYMBOL(xfrm_state_mtu); + +int xfrm_init_state(struct xfrm_state *x) +{ + int err; + + err = -ENOENT; + x->type = xfrm_get_type(x->id.proto, x->props.family); + if (x->type == NULL) + goto error; + + err = x->type->init_state(x); + if (err) + goto error; + + x->km.state = XFRM_STATE_VALID; + +error: + return err; +} + +EXPORT_SYMBOL(xfrm_init_state); void __init xfrm_state_init(void) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5ce8558eac91..ecade4893a13 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -249,17 +249,10 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) goto error; - err = -ENOENT; - x->type = xfrm_get_type(x->id.proto, x->props.family); - if (x->type == NULL) - goto error; - - err = x->type->init_state(x, NULL); + err = xfrm_init_state(x); if (err) goto error; - x->curlft.add_time = (unsigned long) xtime.tv_sec; - x->km.state = XFRM_STATE_VALID; x->km.seq = p->seq; return x; -- cgit v1.2.3-55-g7522 From d094cd83c06e06e01d8edb540555f3f64e4081c2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 20 Jun 2005 13:19:41 -0700 Subject: [IPSEC]: Add xfrm_state_afinfo->init_flags This patch adds the xfrm_state_afinfo->init_flags hook which allows each address family to perform any common initialisation that does not require a corresponding destructor call. It will be used subsequently to set the XFRM_STATE_NOPMTUDISC flag in IPv4. It also fixes up the error codes returned by xfrm_init_state. Signed-off-by: Herbert Xu Acked-by: James Morris Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 + net/xfrm/xfrm_state.c | 20 ++++++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 77bfdde440f8..029522a4ceda 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -204,6 +204,7 @@ struct xfrm_state_afinfo { rwlock_t lock; struct list_head *state_bydst; struct list_head *state_byspi; + int (*init_flags)(struct xfrm_state *x); void (*init_tempsel)(struct xfrm_state *x, struct flowi *fl, struct xfrm_tmpl *tmpl, xfrm_address_t *daddr, xfrm_address_t *saddr); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1845b73d69f9..9d206c282cf1 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1058,10 +1058,26 @@ EXPORT_SYMBOL(xfrm_state_mtu); int xfrm_init_state(struct xfrm_state *x) { + struct xfrm_state_afinfo *afinfo; + int family = x->props.family; int err; - err = -ENOENT; - x->type = xfrm_get_type(x->id.proto, x->props.family); + err = -EAFNOSUPPORT; + afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + goto error; + + err = 0; + if (afinfo->init_flags) + err = afinfo->init_flags(x); + + xfrm_state_put_afinfo(afinfo); + + if (err) + goto error; + + err = -EPROTONOSUPPORT; + x->type = xfrm_get_type(x->id.proto, family); if (x->type == NULL) goto error; -- cgit v1.2.3-55-g7522 From dd87147eed934eaff92869f3d158697c7239d1d2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 20 Jun 2005 13:21:43 -0700 Subject: [IPSEC]: Add XFRM_STATE_NOPMTUDISC flag This patch adds the flag XFRM_STATE_NOPMTUDISC for xfrm states. It is similar to the nopmtudisc on IPIP/GRE tunnels. It only has an effect on IPv4 tunnel mode states. For these states, it will ensure that the DF flag is always cleared. This is primarily useful to work around ICMP blackholes. In future this flag could also allow a larger MTU to be set within the tunnel just like IPIP/GRE tunnels. This could be useful for short haul tunnels where temporary fragmentation outside the tunnel is desired over smaller fragments inside the tunnel. Signed-off-by: Herbert Xu Acked-by: James Morris Signed-off-by: David S. Miller --- include/linux/pfkeyv2.h | 1 + include/linux/xfrm.h | 1 + net/ipv4/xfrm4_output.c | 8 ++++++-- net/ipv4/xfrm4_state.c | 9 +++++++++ net/key/af_key.c | 4 ++++ 5 files changed, 21 insertions(+), 2 deletions(-) diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h index e6b519220245..724066778aff 100644 --- a/include/linux/pfkeyv2.h +++ b/include/linux/pfkeyv2.h @@ -245,6 +245,7 @@ struct sadb_x_nat_t_port { /* Security Association flags */ #define SADB_SAFLAGS_PFS 1 +#define SADB_SAFLAGS_NOPMTUDISC 0x20000000 #define SADB_SAFLAGS_DECAP_DSCP 0x40000000 #define SADB_SAFLAGS_NOECN 0x80000000 diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index d68391a9b9f3..f0d423300d84 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -196,6 +196,7 @@ struct xfrm_usersa_info { __u8 flags; #define XFRM_STATE_NOECN 1 #define XFRM_STATE_DECAP_DSCP 2 +#define XFRM_STATE_NOPMTUDISC 4 }; struct xfrm_usersa_id { diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index af2392ae5769..66620a95942a 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -33,6 +33,7 @@ static void xfrm4_encap(struct sk_buff *skb) struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; struct iphdr *iph, *top_iph; + int flags; iph = skb->nh.iph; skb->h.ipiph = iph; @@ -51,10 +52,13 @@ static void xfrm4_encap(struct sk_buff *skb) /* DS disclosed */ top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos); - if (x->props.flags & XFRM_STATE_NOECN) + + flags = x->props.flags; + if (flags & XFRM_STATE_NOECN) IP_ECN_clear(top_iph); - top_iph->frag_off = iph->frag_off & htons(IP_DF); + top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? + 0 : (iph->frag_off & htons(IP_DF)); if (!top_iph->frag_off) __ip_select_ident(top_iph, dst, 0); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 223a2e83853f..050611d7a967 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -7,12 +7,20 @@ * */ +#include #include #include #include static struct xfrm_state_afinfo xfrm4_state_afinfo; +static int xfrm4_init_flags(struct xfrm_state *x) +{ + if (ipv4_config.no_pmtu_disc) + x->props.flags |= XFRM_STATE_NOPMTUDISC; + return 0; +} + static void __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, struct xfrm_tmpl *tmpl, @@ -109,6 +117,7 @@ __xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, .lock = RW_LOCK_UNLOCKED, + .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, .state_lookup = __xfrm4_state_lookup, .find_acq = __xfrm4_find_acq, diff --git a/net/key/af_key.c b/net/key/af_key.c index 652dd09ccd3a..4879743b945a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -690,6 +690,8 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, sa->sadb_sa_flags |= SADB_SAFLAGS_NOECN; if (x->props.flags & XFRM_STATE_DECAP_DSCP) sa->sadb_sa_flags |= SADB_SAFLAGS_DECAP_DSCP; + if (x->props.flags & XFRM_STATE_NOPMTUDISC) + sa->sadb_sa_flags |= SADB_SAFLAGS_NOPMTUDISC; /* hard time */ if (hsc & 2) { @@ -974,6 +976,8 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, x->props.flags |= XFRM_STATE_NOECN; if (sa->sadb_sa_flags & SADB_SAFLAGS_DECAP_DSCP) x->props.flags |= XFRM_STATE_DECAP_DSCP; + if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC) + x->props.flags |= XFRM_STATE_NOPMTUDISC; lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1]; if (lifetime != NULL) { -- cgit v1.2.3-55-g7522 From f852640e74f71e6dd38146e1149ec1fe6da2fb07 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 20 Jun 2005 13:31:11 -0700 Subject: [AX25]: endian-annotate ax25_type_trans() Signed-off-by: Alexey Dobriyan Acked-by: Ralf Baechle Signed-off-by: David S. Miller --- include/net/ax25.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/ax25.h b/include/net/ax25.h index 9e6368a54547..828a3a93dda1 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -220,7 +220,7 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25) } } -static inline unsigned short ax25_type_trans(struct sk_buff *skb, struct net_device *dev) +static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev) { skb->dev = dev; skb->pkt_type = PACKET_HOST; -- cgit v1.2.3-55-g7522 From f6e276ee67c0ac9efafd24bc6f7a84aa359656df Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 20 Jun 2005 13:32:05 -0700 Subject: [ATALK]: endian annotations Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/atalk.h | 26 +++++++++++++------------- net/appletalk/aarp.c | 2 +- net/appletalk/ddp.c | 2 +- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/include/linux/atalk.h b/include/linux/atalk.h index 31d3fc25ccbd..09a1451c1159 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -20,7 +20,7 @@ #define SIOCATALKDIFADDR (SIOCPROTOPRIVATE + 0) struct atalk_addr { - __u16 s_net; + __be16 s_net; __u8 s_node; }; @@ -33,8 +33,8 @@ struct sockaddr_at { struct atalk_netrange { __u8 nr_phase; - __u16 nr_firstnet; - __u16 nr_lastnet; + __be16 nr_firstnet; + __be16 nr_lastnet; }; #ifdef __KERNEL__ @@ -70,8 +70,8 @@ struct atalk_iface { struct atalk_sock { /* struct sock has to be the first member of atalk_sock */ struct sock sk; - unsigned short dest_net; - unsigned short src_net; + __be16 dest_net; + __be16 src_net; unsigned char dest_node; unsigned char src_node; unsigned char dest_port; @@ -95,9 +95,9 @@ struct ddpehdr { deh_hops:4, deh_len:10; #endif - __u16 deh_sum; - __u16 deh_dnet; - __u16 deh_snet; + __be16 deh_sum; + __be16 deh_dnet; + __be16 deh_snet; __u8 deh_dnode; __u8 deh_snode; __u8 deh_dport; @@ -142,24 +142,24 @@ struct ddpshdr { /* AppleTalk AARP headers */ struct elapaarp { - __u16 hw_type; + __be16 hw_type; #define AARP_HW_TYPE_ETHERNET 1 #define AARP_HW_TYPE_TOKENRING 2 - __u16 pa_type; + __be16 pa_type; __u8 hw_len; __u8 pa_len; #define AARP_PA_ALEN 4 - __u16 function; + __be16 function; #define AARP_REQUEST 1 #define AARP_REPLY 2 #define AARP_PROBE 3 __u8 hw_src[ETH_ALEN] __attribute__ ((packed)); __u8 pa_src_zero __attribute__ ((packed)); - __u16 pa_src_net __attribute__ ((packed)); + __be16 pa_src_net __attribute__ ((packed)); __u8 pa_src_node __attribute__ ((packed)); __u8 hw_dst[ETH_ALEN] __attribute__ ((packed)); __u8 pa_dst_zero __attribute__ ((packed)); - __u16 pa_dst_net __attribute__ ((packed)); + __be16 pa_dst_net __attribute__ ((packed)); __u8 pa_dst_node __attribute__ ((packed)); }; diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 54640c01b50c..10d040461021 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -565,7 +565,7 @@ int aarp_send_ddp(struct net_device *dev, struct sk_buff *skb, * numbers we just happen to need. Now put the * length in the lower two. */ - *((__u16 *)skb->data) = htons(skb->len); + *((__be16 *)skb->data) = htons(skb->len); ft = 1; } /* diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 876dbac71060..192b529f86a4 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -401,7 +401,7 @@ out_err: } /* Find a match for a specific network:node pair */ -static struct atalk_iface *atalk_find_interface(int net, int node) +static struct atalk_iface *atalk_find_interface(__be16 net, int node) { struct atalk_iface *iface; -- cgit v1.2.3-55-g7522 From 246955fe4c38bd706ae30e37c64892c94213775d Mon Sep 17 00:00:00 2001 From: Robert Olsson Date: Mon, 20 Jun 2005 13:36:39 -0700 Subject: [NETLINK]: fib_lookup() via netlink Below is a more generic patch to do fib_lookup via netlink. For others we should say that we discussed this as a way to verify route selection. It's also possible there are others uses for this. In short the fist half of struct fib_result_nl is filled in by caller and netlink call fills in the other half and returns it. In case anyone is interested there is a corresponding user app to compare the full routing table this was used to test implementation of the LC-trie. Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + include/net/ip_fib.h | 14 +++++++++++++ net/ipv4/fib_frontend.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index e38407a23d04..561d4dc75836 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -14,6 +14,7 @@ #define NETLINK_SELINUX 7 /* SELinux event notifications */ #define NETLINK_ARPD 8 #define NETLINK_AUDIT 9 /* auditing */ +#define NETLINK_FIB_LOOKUP 10 #define NETLINK_ROUTE6 11 /* af_inet6 route comm channel */ #define NETLINK_IP6_FW 13 #define NETLINK_DNRTMSG 14 /* DECnet routing messages */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e5a5f6b62f88..a4208a336ac0 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -109,6 +109,20 @@ struct fib_result { #endif }; +struct fib_result_nl { + u32 fl_addr; /* To be looked up*/ + u32 fl_fwmark; + unsigned char fl_tos; + unsigned char fl_scope; + unsigned char tb_id_in; + + unsigned char tb_id; /* Results */ + unsigned char prefixlen; + unsigned char nh_sel; + unsigned char type; + unsigned char scope; + int err; +}; #ifdef CONFIG_IP_ROUTE_MULTIPATH diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 563e7d612706..cd8e45ab9580 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -516,6 +516,60 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) #undef BRD1_OK } +static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) +{ + + struct fib_result res; + struct flowi fl = { .nl_u = { .ip4_u = { .daddr = frn->fl_addr, + .fwmark = frn->fl_fwmark, + .tos = frn->fl_tos, + .scope = frn->fl_scope } } }; + if (tb) { + local_bh_disable(); + + frn->tb_id = tb->tb_id; + frn->err = tb->tb_lookup(tb, &fl, &res); + + if (!frn->err) { + frn->prefixlen = res.prefixlen; + frn->nh_sel = res.nh_sel; + frn->type = res.type; + frn->scope = res.scope; + } + local_bh_enable(); + } +} + +static void nl_fib_input(struct sock *sk, int len) +{ + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh = NULL; + struct fib_result_nl *frn; + int err; + u32 pid; + struct fib_table *tb; + + skb = skb_recv_datagram(sk, 0, 0, &err); + nlh = (struct nlmsghdr *)skb->data; + + frn = (struct fib_result_nl *) NLMSG_DATA(nlh); + tb = fib_get_table(frn->tb_id_in); + + nl_fib_lookup(frn, tb); + + pid = nlh->nlmsg_pid; /*pid of sending process */ + NETLINK_CB(skb).groups = 0; /* not in mcast group */ + NETLINK_CB(skb).pid = 0; /* from kernel */ + NETLINK_CB(skb).dst_pid = pid; + NETLINK_CB(skb).dst_groups = 0; /* unicast */ + netlink_unicast(sk, skb, pid, MSG_DONTWAIT); +} + +static void nl_fib_lookup_init(void) +{ + netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input); +} + static void fib_disable_ip(struct net_device *dev, int force) { if (fib_sync_down(0, dev, force)) @@ -604,6 +658,7 @@ void __init ip_fib_init(void) register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); + nl_fib_lookup_init(); } EXPORT_SYMBOL(inet_addr_type); -- cgit v1.2.3-55-g7522