osdl.org!shemminger | aba5acd | 2004-04-15 20:56:59 +0000 | [diff] [blame] | 1 | diff -ur ../vger3-011229/linux/net/unix/af_unix.c linux/net/unix/af_unix.c |
| 2 | --- ../vger3-011229/linux/net/unix/af_unix.c Mon Dec 3 20:24:03 2001 |
| 3 | +++ linux/net/unix/af_unix.c Sat Jan 5 04:30:19 2002 |
| 4 | @@ -112,6 +112,7 @@ |
| 5 | #include <asm/checksum.h> |
| 6 | |
| 7 | int sysctl_unix_max_dgram_qlen = 10; |
| 8 | +int sysctl_unix_stream_pages = MAX_SKB_FRAGS; |
| 9 | |
| 10 | unix_socket *unix_socket_table[UNIX_HASH_SIZE+1]; |
| 11 | rwlock_t unix_table_lock = RW_LOCK_UNLOCKED; |
| 12 | @@ -1123,9 +1124,6 @@ |
| 13 | struct scm_cookie scm; |
| 14 | memset(&scm, 0, sizeof(scm)); |
| 15 | unix_detach_fds(&scm, skb); |
| 16 | - |
| 17 | - /* Alas, it calls VFS */ |
| 18 | - /* So fscking what? fput() had been SMP-safe since the last Summer */ |
| 19 | scm_destroy(&scm); |
| 20 | sock_wfree(skb); |
| 21 | } |
| 22 | @@ -1140,6 +1138,67 @@ |
| 23 | scm->fp = NULL; |
| 24 | } |
| 25 | |
| 26 | +int datagram_copy_fromiovec(struct iovec *iov, struct sk_buff *skb, int size) |
| 27 | +{ |
| 28 | + struct sock *sk; |
| 29 | + struct sk_buff **tail, *skb1; |
| 30 | + int copy = min_t(int, size, skb_tailroom(skb)); |
| 31 | + |
| 32 | + if (memcpy_fromiovec(skb_put(skb, copy), iov, copy)) |
| 33 | + goto do_fault; |
| 34 | + |
| 35 | + if ((size -= copy) == 0) |
| 36 | + return 0; |
| 37 | + |
| 38 | + sk = skb->sk; |
| 39 | + skb1 = skb; |
| 40 | + tail = &skb_shinfo(skb)->frag_list; |
| 41 | + |
| 42 | + do { |
| 43 | + struct page *page; |
| 44 | + int i = skb_shinfo(skb1)->nr_frags; |
| 45 | + |
| 46 | + if (i == MAX_SKB_FRAGS) { |
| 47 | + skb1 = alloc_skb(0, sk->allocation); |
| 48 | + if (skb1 == NULL) |
| 49 | + goto do_oom; |
| 50 | + *tail = skb1; |
| 51 | + tail = &skb1->next; |
| 52 | + i = 0; |
| 53 | + skb->truesize += skb1->truesize; |
| 54 | + atomic_add(skb1->truesize, &sk->wmem_alloc); |
| 55 | + } |
| 56 | + |
| 57 | + page = alloc_pages(sk->allocation, 0); |
| 58 | + if (page == NULL) |
| 59 | + goto do_oom; |
| 60 | + |
| 61 | + copy = min_t(int, size, PAGE_SIZE); |
| 62 | + skb_shinfo(skb1)->nr_frags=i+1; |
| 63 | + skb_shinfo(skb1)->frags[i].page = page; |
| 64 | + skb_shinfo(skb1)->frags[i].page_offset = 0; |
| 65 | + skb_shinfo(skb1)->frags[i].size = copy; |
| 66 | + |
| 67 | + skb1->len += copy; |
| 68 | + skb1->data_len += copy; |
| 69 | + if (skb != skb1) { |
| 70 | + skb->len += copy; |
| 71 | + skb->data_len += copy; |
| 72 | + } |
| 73 | + skb->truesize += PAGE_SIZE; |
| 74 | + atomic_add(PAGE_SIZE, &sk->wmem_alloc); |
| 75 | + if (memcpy_fromiovec(page_address(page), iov, copy)) |
| 76 | + goto do_fault; |
| 77 | + } while ((size -= copy) > 0); |
| 78 | + return 0; |
| 79 | + |
| 80 | +do_oom: |
| 81 | + return -ENOMEM; |
| 82 | + |
| 83 | +do_fault: |
| 84 | + return -EFAULT; |
| 85 | +} |
| 86 | + |
| 87 | /* |
| 88 | * Send AF_UNIX data. |
| 89 | */ |
| 90 | @@ -1155,6 +1214,7 @@ |
| 91 | unsigned hash; |
| 92 | struct sk_buff *skb; |
| 93 | long timeo; |
| 94 | + int alloc; |
| 95 | |
| 96 | err = -EOPNOTSUPP; |
| 97 | if (msg->msg_flags&MSG_OOB) |
| 98 | @@ -1178,10 +1238,14 @@ |
| 99 | goto out; |
| 100 | |
| 101 | err = -EMSGSIZE; |
| 102 | - if ((unsigned)len > sk->sndbuf - 32) |
| 103 | + if ((unsigned)len > sk->sndbuf) |
| 104 | goto out; |
| 105 | |
| 106 | - skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err); |
| 107 | + alloc = len; |
| 108 | + if (alloc > SKB_MAX_HEAD(0)) |
| 109 | + alloc = SKB_MAX_HEAD(0); |
| 110 | + |
| 111 | + skb = sock_alloc_send_skb(sk, alloc, msg->msg_flags&MSG_DONTWAIT, &err); |
| 112 | if (skb==NULL) |
| 113 | goto out; |
| 114 | |
| 115 | @@ -1190,7 +1254,7 @@ |
| 116 | unix_attach_fds(scm, skb); |
| 117 | |
| 118 | skb->h.raw = skb->data; |
| 119 | - err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); |
| 120 | + err = datagram_copy_fromiovec(msg->msg_iov, skb, len); |
| 121 | if (err) |
| 122 | goto out_free; |
| 123 | |
| 124 | @@ -1275,74 +1339,57 @@ |
| 125 | return err; |
| 126 | } |
| 127 | |
| 128 | - |
| 129 | static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len, |
| 130 | struct scm_cookie *scm) |
| 131 | { |
| 132 | struct sock *sk = sock->sk; |
| 133 | unix_socket *other = NULL; |
| 134 | - struct sockaddr_un *sunaddr=msg->msg_name; |
| 135 | - int err,size; |
| 136 | struct sk_buff *skb; |
| 137 | + int err; |
| 138 | int sent=0; |
| 139 | |
| 140 | err = -EOPNOTSUPP; |
| 141 | if (msg->msg_flags&MSG_OOB) |
| 142 | goto out_err; |
| 143 | |
| 144 | - if (msg->msg_namelen) { |
| 145 | - err = (sk->state==TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP); |
| 146 | + err = -ENOTCONN; |
| 147 | + other = unix_peer_get(sk); |
| 148 | + if (!other) |
| 149 | goto out_err; |
| 150 | - } else { |
| 151 | - sunaddr = NULL; |
| 152 | - err = -ENOTCONN; |
| 153 | - other = unix_peer_get(sk); |
| 154 | - if (!other) |
| 155 | - goto out_err; |
| 156 | - } |
| 157 | |
| 158 | if (sk->shutdown&SEND_SHUTDOWN) |
| 159 | goto pipe_err; |
| 160 | |
| 161 | - while(sent < len) |
| 162 | - { |
| 163 | - /* |
| 164 | - * Optimisation for the fact that under 0.01% of X messages typically |
| 165 | - * need breaking up. |
| 166 | - */ |
| 167 | + while(sent < len) { |
| 168 | + int size, alloc; |
| 169 | |
| 170 | - size=len-sent; |
| 171 | + size = len-sent; |
| 172 | |
| 173 | /* Keep two messages in the pipe so it schedules better */ |
| 174 | - if (size > sk->sndbuf/2 - 64) |
| 175 | - size = sk->sndbuf/2 - 64; |
| 176 | + if (size > sk->sndbuf/2) |
| 177 | + size = sk->sndbuf/2; |
| 178 | |
| 179 | - if (size > SKB_MAX_ALLOC) |
| 180 | - size = SKB_MAX_ALLOC; |
| 181 | - |
| 182 | /* |
| 183 | * Grab a buffer |
| 184 | */ |
| 185 | - |
| 186 | - skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err); |
| 187 | + alloc = size; |
| 188 | + |
| 189 | + if (size > SKB_MAX_HEAD(0)) { |
| 190 | + alloc = SKB_MAX_HEAD(0); |
| 191 | + if (size > alloc + sysctl_unix_stream_pages*PAGE_SIZE) |
| 192 | + size = alloc + sysctl_unix_stream_pages*PAGE_SIZE; |
| 193 | + } |
| 194 | + |
| 195 | + skb=sock_alloc_send_skb(sk,alloc,msg->msg_flags&MSG_DONTWAIT, &err); |
| 196 | |
| 197 | if (skb==NULL) |
| 198 | goto out_err; |
| 199 | |
| 200 | - /* |
| 201 | - * If you pass two values to the sock_alloc_send_skb |
| 202 | - * it tries to grab the large buffer with GFP_NOFS |
| 203 | - * (which can fail easily), and if it fails grab the |
| 204 | - * fallback size buffer which is under a page and will |
| 205 | - * succeed. [Alan] |
| 206 | - */ |
| 207 | - size = min_t(int, size, skb_tailroom(skb)); |
| 208 | - |
| 209 | memcpy(UNIXCREDS(skb), &scm->creds, sizeof(struct ucred)); |
| 210 | if (scm->fp) |
| 211 | unix_attach_fds(scm, skb); |
| 212 | |
| 213 | - if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { |
| 214 | + if ((err = datagram_copy_fromiovec(msg->msg_iov, skb, size)) != 0) { |
| 215 | kfree_skb(skb); |
| 216 | goto out_err; |
| 217 | } |
| 218 | @@ -1418,13 +1465,10 @@ |
| 219 | |
| 220 | scm->creds = *UNIXCREDS(skb); |
| 221 | |
| 222 | - if (!(flags & MSG_PEEK)) |
| 223 | - { |
| 224 | + if (!(flags & MSG_PEEK)) { |
| 225 | if (UNIXCB(skb).fp) |
| 226 | unix_detach_fds(scm, skb); |
| 227 | - } |
| 228 | - else |
| 229 | - { |
| 230 | + } else { |
| 231 | /* It is questionable: on PEEK we could: |
| 232 | - do not return fds - good, but too simple 8) |
| 233 | - return fds, and do not return them on read (old strategy, |
| 234 | @@ -1483,13 +1527,10 @@ |
| 235 | return timeo; |
| 236 | } |
| 237 | |
| 238 | - |
| 239 | - |
| 240 | static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size, |
| 241 | int flags, struct scm_cookie *scm) |
| 242 | { |
| 243 | struct sock *sk = sock->sk; |
| 244 | - struct sockaddr_un *sunaddr=msg->msg_name; |
| 245 | int copied = 0; |
| 246 | int check_creds = 0; |
| 247 | int target; |
| 248 | @@ -1515,21 +1556,18 @@ |
| 249 | |
| 250 | down(&sk->protinfo.af_unix.readsem); |
| 251 | |
| 252 | - do |
| 253 | - { |
| 254 | + do { |
| 255 | int chunk; |
| 256 | struct sk_buff *skb; |
| 257 | |
| 258 | skb=skb_dequeue(&sk->receive_queue); |
| 259 | - if (skb==NULL) |
| 260 | - { |
| 261 | + if (skb==NULL) { |
| 262 | if (copied >= target) |
| 263 | break; |
| 264 | |
| 265 | /* |
| 266 | * POSIX 1003.1g mandates this order. |
| 267 | */ |
| 268 | - |
| 269 | if ((err = sock_error(sk)) != 0) |
| 270 | break; |
| 271 | if (sk->shutdown & RCV_SHUTDOWN) |
| 272 | @@ -1551,60 +1589,44 @@ |
| 273 | |
| 274 | if (check_creds) { |
| 275 | /* Never glue messages from different writers */ |
| 276 | - if (memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0) { |
| 277 | - skb_queue_head(&sk->receive_queue, skb); |
| 278 | - break; |
| 279 | - } |
| 280 | + if (memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0) |
| 281 | + goto out_put_back; |
| 282 | } else { |
| 283 | /* Copy credentials */ |
| 284 | scm->creds = *UNIXCREDS(skb); |
| 285 | check_creds = 1; |
| 286 | } |
| 287 | |
| 288 | - /* Copy address just once */ |
| 289 | - if (sunaddr) |
| 290 | - { |
| 291 | - unix_copy_addr(msg, skb->sk); |
| 292 | - sunaddr = NULL; |
| 293 | - } |
| 294 | + chunk = min_t(int, skb->len - sk->protinfo.af_unix.copied, size); |
| 295 | + err = skb_copy_datagram_iovec(skb, sk->protinfo.af_unix.copied, msg->msg_iov, chunk); |
| 296 | + if (err) |
| 297 | + goto out_put_back; |
| 298 | |
| 299 | - chunk = min_t(unsigned int, skb->len, size); |
| 300 | - if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { |
| 301 | - skb_queue_head(&sk->receive_queue, skb); |
| 302 | - if (copied == 0) |
| 303 | - copied = -EFAULT; |
| 304 | - break; |
| 305 | - } |
| 306 | copied += chunk; |
| 307 | size -= chunk; |
| 308 | |
| 309 | /* Mark read part of skb as used */ |
| 310 | - if (!(flags & MSG_PEEK)) |
| 311 | - { |
| 312 | - skb_pull(skb, chunk); |
| 313 | - |
| 314 | + if (!(flags & MSG_PEEK)) { |
| 315 | if (UNIXCB(skb).fp) |
| 316 | unix_detach_fds(scm, skb); |
| 317 | |
| 318 | /* put the skb back if we didn't use it up.. */ |
| 319 | - if (skb->len) |
| 320 | - { |
| 321 | - skb_queue_head(&sk->receive_queue, skb); |
| 322 | - break; |
| 323 | - } |
| 324 | + if ((sk->protinfo.af_unix.copied += chunk) < skb->len) |
| 325 | + goto out_put_back; |
| 326 | + |
| 327 | + sk->protinfo.af_unix.copied = 0; |
| 328 | |
| 329 | kfree_skb(skb); |
| 330 | |
| 331 | if (scm->fp) |
| 332 | break; |
| 333 | - } |
| 334 | - else |
| 335 | - { |
| 336 | + } else { |
| 337 | /* It is questionable, see note in unix_dgram_recvmsg. |
| 338 | */ |
| 339 | if (UNIXCB(skb).fp) |
| 340 | scm->fp = scm_fp_dup(UNIXCB(skb).fp); |
| 341 | |
| 342 | +out_put_back: |
| 343 | /* put message back and return */ |
| 344 | skb_queue_head(&sk->receive_queue, skb); |
| 345 | break; |
| 346 | @@ -1676,10 +1698,12 @@ |
| 347 | break; |
| 348 | } |
| 349 | |
| 350 | + down(&sk->protinfo.af_unix.readsem); |
| 351 | spin_lock(&sk->receive_queue.lock); |
| 352 | if((skb=skb_peek(&sk->receive_queue))!=NULL) |
| 353 | - amount=skb->len; |
| 354 | + amount=skb->len - sk->protinfo.af_unix.copied; |
| 355 | spin_unlock(&sk->receive_queue.lock); |
| 356 | + up(&sk->protinfo.af_unix.readsem); |
| 357 | err = put_user(amount, (int *)arg); |
| 358 | break; |
| 359 | } |
| 360 | @@ -1734,7 +1758,7 @@ |
| 361 | int i; |
| 362 | unix_socket *s; |
| 363 | |
| 364 | - len+= sprintf(buffer,"Num RefCount Protocol Flags Type St " |
| 365 | + len+= sprintf(buffer,"Peer RcvQueue WMem Flags Type St " |
| 366 | "Inode Path\n"); |
| 367 | |
| 368 | read_lock(&unix_table_lock); |
| 369 | @@ -1742,10 +1766,10 @@ |
| 370 | { |
| 371 | unix_state_rlock(s); |
| 372 | |
| 373 | - len+=sprintf(buffer+len,"%p: %08X %08X %08X %04X %02X %5ld", |
| 374 | - s, |
| 375 | - atomic_read(&s->refcnt), |
| 376 | - 0, |
| 377 | + len+=sprintf(buffer+len,"%08lX: %08X %08X %08X %04X %02X %5ld", |
| 378 | + unix_peer(s) ? sock_i_ino(unix_peer(s)) : 0, |
| 379 | + skb_queue_len(&s->receive_queue), |
| 380 | + atomic_read(&s->wmem_alloc), |
| 381 | s->state == TCP_LISTEN ? __SO_ACCEPTCON : 0, |
| 382 | s->type, |
| 383 | s->socket ? |
| 384 | diff -ur ../vger3-011229/linux/net/unix/sysctl_net_unix.c linux/net/unix/sysctl_net_unix.c |
| 385 | --- ../vger3-011229/linux/net/unix/sysctl_net_unix.c Tue Jan 30 21:20:16 2001 |
| 386 | +++ linux/net/unix/sysctl_net_unix.c Sat Jan 5 04:10:58 2002 |
| 387 | @@ -13,10 +13,14 @@ |
| 388 | #include <linux/sysctl.h> |
| 389 | |
| 390 | extern int sysctl_unix_max_dgram_qlen; |
| 391 | +extern int sysctl_unix_stream_pages; |
| 392 | |
| 393 | ctl_table unix_table[] = { |
| 394 | {NET_UNIX_MAX_DGRAM_QLEN, "max_dgram_qlen", |
| 395 | &sysctl_unix_max_dgram_qlen, sizeof(int), 0600, NULL, |
| 396 | + &proc_dointvec }, |
| 397 | + {NET_UNIX_STREAM_PAGES, "stream_pages", |
| 398 | + &sysctl_unix_stream_pages, sizeof(int), 0600, NULL, |
| 399 | &proc_dointvec }, |
| 400 | {0} |
| 401 | }; |