summaryrefslogtreecommitdiffstats
path: root/fs/cifs/connect.c
diff options
context:
space:
mode:
authorSteve French2008-10-29 01:47:57 +0100
committerSteve French2008-10-29 01:47:57 +0100
commitedf1ae403896cb7750800508b14996ba6be39a53 (patch)
treeff792ea77e558d473a9f6515397728d31e73fd09 /fs/cifs/connect.c
parentMerge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block (diff)
downloadkernel-qcow2-linux-edf1ae403896cb7750800508b14996ba6be39a53.tar.gz
kernel-qcow2-linux-edf1ae403896cb7750800508b14996ba6be39a53.tar.xz
kernel-qcow2-linux-edf1ae403896cb7750800508b14996ba6be39a53.zip
[CIFS] Reduce number of socket retries in large write path
CIFS in some heavy stress conditions cifs could get EAGAIN repeatedly in smb_send2 which led to repeated retries and eventually failure of large writes which could lead to data corruption. There are three changes that were suggested by various network developers: 1) convert cifs from non-blocking to blocking tcp sendmsg (we left in the retry on failure) 2) change cifs to not set sendbuf and rcvbuf size for the socket (let tcp autotune the buffer sizes since that works much better in the TCP stack now) 3) if we have a partial frame sent in smb_send2, mark the tcp session as invalid (close the socket and reconnect) so we do not corrupt the remaining part of the SMB with the beginning of the next SMB. This does not appear to hurt performance measurably and has been run in various scenarios, but it definately removes a corruption that we were seeing in some high stress test cases. Acked-by: Shirish Pargaonkar <shirishp@us.ibm.com> Signed-off-by: Steve French <sfrench@us.ibm.com>
Diffstat (limited to 'fs/cifs/connect.c')
-rw-r--r--fs/cifs/connect.c50
1 files changed, 37 insertions, 13 deletions
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 71b7661e2260..e9f9248cb3fe 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -92,6 +92,8 @@ struct smb_vol {
bool seal:1; /* request transport encryption on share */
bool nodfs:1; /* Do not request DFS, even if available */
bool local_lease:1; /* check leases only on local system, not remote */
+ bool noblocksnd:1;
+ bool noautotune:1;
unsigned int rsize;
unsigned int wsize;
unsigned int sockopt;
@@ -102,9 +104,11 @@ struct smb_vol {
static int ipv4_connect(struct sockaddr_in *psin_server,
struct socket **csocket,
char *netb_name,
- char *server_netb_name);
+ char *server_netb_name,
+ bool noblocksnd,
+ bool nosndbuf); /* ipv6 never set sndbuf size */
static int ipv6_connect(struct sockaddr_in6 *psin_server,
- struct socket **csocket);
+ struct socket **csocket, bool noblocksnd);
/*
@@ -191,12 +195,13 @@ cifs_reconnect(struct TCP_Server_Info *server)
try_to_freeze();
if (server->protocolType == IPV6) {
rc = ipv6_connect(&server->addr.sockAddr6,
- &server->ssocket);
+ &server->ssocket, server->noautotune);
} else {
rc = ipv4_connect(&server->addr.sockAddr,
&server->ssocket,
server->workstation_RFC1001_name,
- server->server_RFC1001_name);
+ server->server_RFC1001_name,
+ server->noblocksnd, server->noautotune);
}
if (rc) {
cFYI(1, ("reconnect error %d", rc));
@@ -1192,6 +1197,10 @@ cifs_parse_mount_options(char *options, const char *devname,
/* ignore */
} else if (strnicmp(data, "rw", 2) == 0) {
vol->rw = true;
+ } else if (strnicmp(data, "noblocksend", 11) == 0) {
+ vol->noblocksnd = 1;
+ } else if (strnicmp(data, "noautotune", 10) == 0) {
+ vol->noautotune = 1;
} else if ((strnicmp(data, "suid", 4) == 0) ||
(strnicmp(data, "nosuid", 6) == 0) ||
(strnicmp(data, "exec", 4) == 0) ||
@@ -1518,7 +1527,8 @@ static void rfc1002mangle(char *target, char *source, unsigned int length)
static int
ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
- char *netbios_name, char *target_name)
+ char *netbios_name, char *target_name,
+ bool noblocksnd, bool noautotune)
{
int rc = 0;
int connected = 0;
@@ -1590,11 +1600,16 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
(*csocket)->sk->sk_sndbuf,
(*csocket)->sk->sk_rcvbuf, (*csocket)->sk->sk_rcvtimeo));
(*csocket)->sk->sk_rcvtimeo = 7 * HZ;
+ if (!noblocksnd)
+ (*csocket)->sk->sk_sndtimeo = 3 * HZ;
+
/* make the bufsizes depend on wsize/rsize and max requests */
- if ((*csocket)->sk->sk_sndbuf < (200 * 1024))
- (*csocket)->sk->sk_sndbuf = 200 * 1024;
- if ((*csocket)->sk->sk_rcvbuf < (140 * 1024))
- (*csocket)->sk->sk_rcvbuf = 140 * 1024;
+ if (noautotune) {
+ if ((*csocket)->sk->sk_sndbuf < (200 * 1024))
+ (*csocket)->sk->sk_sndbuf = 200 * 1024;
+ if ((*csocket)->sk->sk_rcvbuf < (140 * 1024))
+ (*csocket)->sk->sk_rcvbuf = 140 * 1024;
+ }
/* send RFC1001 sessinit */
if (psin_server->sin_port == htons(RFC1001_PORT)) {
@@ -1631,7 +1646,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
/* sizeof RFC1002_SESSION_REQUEST with no scope */
smb_buf->smb_buf_length = 0x81000044;
rc = smb_send(*csocket, smb_buf, 0x44,
- (struct sockaddr *)psin_server);
+ (struct sockaddr *)psin_server, noblocksnd);
kfree(ses_init_buf);
msleep(1); /* RFC1001 layer in at least one server
requires very short break before negprot
@@ -1651,7 +1666,8 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
}
static int
-ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
+ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket,
+ bool noblocksnd)
{
int rc = 0;
int connected = 0;
@@ -1720,6 +1736,9 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
the default. sock_setsockopt not used because it expects
user space buffer */
(*csocket)->sk->sk_rcvtimeo = 7 * HZ;
+ if (!noblocksnd)
+ (*csocket)->sk->sk_sndtimeo = 3 * HZ;
+
return rc;
}
@@ -1983,11 +2002,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
cFYI(1, ("attempting ipv6 connect"));
/* BB should we allow ipv6 on port 139? */
/* other OS never observed in Wild doing 139 with v6 */
- rc = ipv6_connect(&sin_server6, &csocket);
+ rc = ipv6_connect(&sin_server6, &csocket,
+ volume_info.noblocksnd);
} else
rc = ipv4_connect(&sin_server, &csocket,
volume_info.source_rfc1001_name,
- volume_info.target_rfc1001_name);
+ volume_info.target_rfc1001_name,
+ volume_info.noblocksnd,
+ volume_info.noautotune);
if (rc < 0) {
cERROR(1, ("Error connecting to IPv4 socket. "
"Aborting operation"));
@@ -2002,6 +2024,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
sock_release(csocket);
goto out;
} else {
+ srvTcp->noblocksnd = volume_info.noblocksnd;
+ srvTcp->noautotune = volume_info.noautotune;
memcpy(&srvTcp->addr.sockAddr, &sin_server,
sizeof(struct sockaddr_in));
atomic_set(&srvTcp->inFlight, 0);