1 /* $OpenBSD: nfs_bio.c,v 1.46 2007/06/01 23:47:57 deraadt Exp $ */
2 /* $NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $ */
3
4 /*
5 * Copyright (c) 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Rick Macklem at The University of Guelph.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
36 */
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/resourcevar.h>
41 #include <sys/signalvar.h>
42 #include <sys/proc.h>
43 #include <sys/buf.h>
44 #include <sys/vnode.h>
45 #include <sys/mount.h>
46 #include <sys/kernel.h>
47 #include <sys/namei.h>
48
49 #include <uvm/uvm_extern.h>
50
51 #include <nfs/rpcv2.h>
52 #include <nfs/nfsproto.h>
53 #include <nfs/nfs.h>
54 #include <nfs/nfsmount.h>
55 #include <nfs/nfsnode.h>
56 #include <nfs/nfs_var.h>
57
58 extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
59 extern int nfs_numasync;
60 extern struct nfsstats nfsstats;
61 struct nfs_bufqhead nfs_bufq;
62
63 /*
64 * Vnode op for read using bio
65 * Any similarity to readip() is purely coincidental
66 */
67 int
68 nfs_bioread(vp, uio, ioflag, cred)
69 struct vnode *vp;
70 struct uio *uio;
71 int ioflag;
72 struct ucred *cred;
73 {
74 struct nfsnode *np = VTONFS(vp);
75 int biosize, diff;
76 struct buf *bp = NULL, *rabp;
77 struct vattr vattr;
78 struct proc *p;
79 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
80 daddr64_t lbn, bn, rabn;
81 caddr_t baddr;
82 int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin;
83 off_t offdiff;
84
85 #ifdef DIAGNOSTIC
86 if (uio->uio_rw != UIO_READ)
87 panic("nfs_read mode");
88 #endif
89 if (uio->uio_resid == 0)
90 return (0);
91 if (uio->uio_offset < 0)
92 return (EINVAL);
93 p = uio->uio_procp;
94 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
95 (void)nfs_fsinfo(nmp, vp, cred, p);
96 biosize = nmp->nm_rsize;
97 /*
98 * For nfs, cache consistency can only be maintained approximately.
99 * Although RFC1094 does not specify the criteria, the following is
100 * believed to be compatible with the reference port.
101 * For nfs:
102 * If the file's modify time on the server has changed since the
103 * last read rpc or you have written to the file,
104 * you may have lost data cache consistency with the
105 * server, so flush all of the file's data out of the cache.
106 * Then force a getattr rpc to ensure that you have up to date
107 * attributes.
108 * NB: This implies that cache data can be read when up to
109 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
110 * attributes this could be forced by setting n_attrstamp to 0 before
111 * the VOP_GETATTR() call.
112 */
113 if (np->n_flag & NMODIFIED) {
114 np->n_attrstamp = 0;
115 error = VOP_GETATTR(vp, &vattr, cred, p);
116 if (error)
117 return (error);
118 np->n_mtime = vattr.va_mtime.tv_sec;
119 } else {
120 error = VOP_GETATTR(vp, &vattr, cred, p);
121 if (error)
122 return (error);
123 if (np->n_mtime != vattr.va_mtime.tv_sec) {
124 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
125 if (error)
126 return (error);
127 np->n_mtime = vattr.va_mtime.tv_sec;
128 }
129 }
130
131 /*
132 * update the cache read creds for this vnode
133 */
134 if (np->n_rcred)
135 crfree(np->n_rcred);
136 np->n_rcred = cred;
137 crhold(cred);
138
139 do {
140 if ((vp->v_flag & VROOT) && vp->v_type == VLNK) {
141 return (nfs_readlinkrpc(vp, uio, cred));
142 }
143 baddr = (caddr_t)0;
144 switch (vp->v_type) {
145 case VREG:
146 nfsstats.biocache_reads++;
147 lbn = uio->uio_offset / biosize;
148 on = uio->uio_offset & (biosize - 1);
149 bn = lbn * (biosize / DEV_BSIZE);
150 not_readin = 1;
151
152 /*
153 * Start the read ahead(s), as required.
154 */
155 if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
156 for (nra = 0; nra < nmp->nm_readahead &&
157 (lbn + 1 + nra) * biosize < np->n_size; nra++) {
158 rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
159 if (!incore(vp, rabn)) {
160 rabp = nfs_getcacheblk(vp, rabn, biosize, p);
161 if (!rabp)
162 return (EINTR);
163 if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
164 rabp->b_flags |= (B_READ | B_ASYNC);
165 if (nfs_asyncio(rabp)) {
166 rabp->b_flags |= B_INVAL;
167 brelse(rabp);
168 }
169 } else
170 brelse(rabp);
171 }
172 }
173 }
174
175 /*
176 * If the block is in the cache and has the required data
177 * in a valid region, just copy it out.
178 * Otherwise, get the block and write back/read in,
179 * as required.
180 */
181 if ((bp = incore(vp, bn)) &&
182 (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
183 (B_BUSY | B_WRITEINPROG))
184 got_buf = 0;
185 else {
186 again:
187 bp = nfs_getcacheblk(vp, bn, biosize, p);
188 if (!bp)
189 return (EINTR);
190 got_buf = 1;
191 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
192 bp->b_flags |= B_READ;
193 not_readin = 0;
194 error = nfs_doio(bp, p);
195 if (error) {
196 brelse(bp);
197 return (error);
198 }
199 }
200 }
201 n = min((unsigned)(biosize - on), uio->uio_resid);
202 offdiff = np->n_size - uio->uio_offset;
203 if (offdiff < (off_t)n)
204 n = (int)offdiff;
205 if (not_readin && n > 0) {
206 if (on < bp->b_validoff || (on + n) > bp->b_validend) {
207 if (!got_buf) {
208 bp = nfs_getcacheblk(vp, bn, biosize, p);
209 if (!bp)
210 return (EINTR);
211 got_buf = 1;
212 }
213 bp->b_flags |= B_INVAFTERWRITE;
214 if (bp->b_dirtyend > 0) {
215 if ((bp->b_flags & B_DELWRI) == 0)
216 panic("nfsbioread");
217 if (VOP_BWRITE(bp) == EINTR)
218 return (EINTR);
219 } else
220 brelse(bp);
221 goto again;
222 }
223 }
224 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
225 if (diff < n)
226 n = diff;
227 break;
228 case VLNK:
229 nfsstats.biocache_readlinks++;
230 bp = nfs_getcacheblk(vp, 0, NFS_MAXPATHLEN, p);
231 if (!bp)
232 return (EINTR);
233 if ((bp->b_flags & B_DONE) == 0) {
234 bp->b_flags |= B_READ;
235 error = nfs_doio(bp, p);
236 if (error) {
237 brelse(bp);
238 return (error);
239 }
240 }
241 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
242 got_buf = 1;
243 on = 0;
244 break;
245 default:
246 printf(" nfsbioread: type %x unexpected\n",vp->v_type);
247 break;
248 }
249
250 if (n > 0) {
251 if (!baddr)
252 baddr = bp->b_data;
253 error = uiomove(baddr + on, (int)n, uio);
254 }
255 switch (vp->v_type) {
256 case VREG:
257 break;
258 case VLNK:
259 n = 0;
260 break;
261 default:
262 printf(" nfsbioread: type %x unexpected\n",vp->v_type);
263 }
264 if (got_buf)
265 brelse(bp);
266 } while (error == 0 && uio->uio_resid > 0 && n > 0);
267 return (error);
268 }
269
270 /*
271 * Vnode op for write using bio
272 */
273 int
274 nfs_write(v)
275 void *v;
276 {
277 struct vop_write_args *ap = v;
278 int biosize;
279 struct uio *uio = ap->a_uio;
280 struct proc *p = uio->uio_procp;
281 struct vnode *vp = ap->a_vp;
282 struct nfsnode *np = VTONFS(vp);
283 struct ucred *cred = ap->a_cred;
284 int ioflag = ap->a_ioflag;
285 struct buf *bp;
286 struct vattr vattr;
287 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
288 daddr64_t lbn, bn;
289 int n, on, error = 0, extended = 0, wrotedta = 0, truncated = 0;
290
291 #ifdef DIAGNOSTIC
292 if (uio->uio_rw != UIO_WRITE)
293 panic("nfs_write mode");
294 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
295 panic("nfs_write proc");
296 #endif
297 if (vp->v_type != VREG)
298 return (EIO);
299 if (np->n_flag & NWRITEERR) {
300 np->n_flag &= ~NWRITEERR;
301 return (np->n_error);
302 }
303 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
304 (void)nfs_fsinfo(nmp, vp, cred, p);
305 if (ioflag & (IO_APPEND | IO_SYNC)) {
306 if (np->n_flag & NMODIFIED) {
307 np->n_attrstamp = 0;
308 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
309 if (error)
310 return (error);
311 }
312 if (ioflag & IO_APPEND) {
313 np->n_attrstamp = 0;
314 error = VOP_GETATTR(vp, &vattr, cred, p);
315 if (error)
316 return (error);
317 uio->uio_offset = np->n_size;
318 }
319 }
320 if (uio->uio_offset < 0)
321 return (EINVAL);
322 if (uio->uio_resid == 0)
323 return (0);
324 /*
325 * Maybe this should be above the vnode op call, but so long as
326 * file servers have no limits, i don't think it matters
327 */
328 if (p && uio->uio_offset + uio->uio_resid >
329 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
330 psignal(p, SIGXFSZ);
331 return (EFBIG);
332 }
333
334 /*
335 * update the cache write creds for this node.
336 */
337 if (np->n_wcred)
338 crfree(np->n_wcred);
339 np->n_wcred = cred;
340 crhold(cred);
341
342 /*
343 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
344 * will be the same size within a filesystem. nfs_writerpc will
345 * still use nm_wsize when sizing the rpc's.
346 */
347 biosize = nmp->nm_rsize;
348 do {
349
350 /*
351 * XXX make sure we aren't cached in the VM page cache
352 */
353 uvm_vnp_uncache(vp);
354
355 nfsstats.biocache_writes++;
356 lbn = uio->uio_offset / biosize;
357 on = uio->uio_offset & (biosize-1);
358 n = min((unsigned)(biosize - on), uio->uio_resid);
359 bn = lbn * (biosize / DEV_BSIZE);
360 again:
361 bp = nfs_getcacheblk(vp, bn, biosize, p);
362 if (!bp)
363 return (EINTR);
364 np->n_flag |= NMODIFIED;
365 if (uio->uio_offset + n > np->n_size) {
366 np->n_size = uio->uio_offset + n;
367 uvm_vnp_setsize(vp, (u_long)np->n_size);
368 extended = 1;
369 } else if (uio->uio_offset + n < np->n_size)
370 truncated = 1;
371
372 /*
373 * If the new write will leave a contiguous dirty
374 * area, just update the b_dirtyoff and b_dirtyend,
375 * otherwise force a write rpc of the old dirty area.
376 */
377 if (bp->b_dirtyend > 0 &&
378 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
379 bp->b_proc = p;
380 if (VOP_BWRITE(bp) == EINTR)
381 return (EINTR);
382 goto again;
383 }
384
385 error = uiomove((char *)bp->b_data + on, n, uio);
386 if (error) {
387 bp->b_flags |= B_ERROR;
388 brelse(bp);
389 return (error);
390 }
391 if (bp->b_dirtyend > 0) {
392 bp->b_dirtyoff = min(on, bp->b_dirtyoff);
393 bp->b_dirtyend = max((on + n), bp->b_dirtyend);
394 } else {
395 bp->b_dirtyoff = on;
396 bp->b_dirtyend = on + n;
397 }
398 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
399 bp->b_validoff > bp->b_dirtyend) {
400 bp->b_validoff = bp->b_dirtyoff;
401 bp->b_validend = bp->b_dirtyend;
402 } else {
403 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
404 bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
405 }
406
407 wrotedta = 1;
408
409 /*
410 * Since this block is being modified, it must be written
411 * again and not just committed.
412 */
413
414 if (NFS_ISV3(vp)) {
415 rw_enter_write(&np->n_commitlock);
416 if (bp->b_flags & B_NEEDCOMMIT) {
417 bp->b_flags &= ~B_NEEDCOMMIT;
418 nfs_del_tobecommitted_range(vp, bp);
419 }
420 nfs_del_committed_range(vp, bp);
421 rw_exit_write(&np->n_commitlock);
422 } else
423 bp->b_flags &= ~B_NEEDCOMMIT;
424
425 /*
426 * If the lease is non-cachable or IO_SYNC do bwrite().
427 */
428 if (ioflag & IO_SYNC) {
429 bp->b_proc = p;
430 error = VOP_BWRITE(bp);
431 if (error)
432 return (error);
433 } else if ((n + on) == biosize) {
434 bp->b_proc = (struct proc *)0;
435 bp->b_flags |= B_ASYNC;
436 (void)nfs_writebp(bp, 0);
437 } else {
438 bdwrite(bp);
439 }
440 } while (uio->uio_resid > 0 && n > 0);
441
442 if (wrotedta)
443 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0) |
444 (truncated ? NOTE_TRUNCATE : 0));
445
446 return (0);
447 }
448
449 /*
450 * Get an nfs cache block.
451 * Allocate a new one if the block isn't currently in the cache
452 * and return the block marked busy. If the calling process is
453 * interrupted by a signal for an interruptible mount point, return
454 * NULL.
455 */
456 struct buf *
457 nfs_getcacheblk(vp, bn, size, p)
458 struct vnode *vp;
459 daddr64_t bn;
460 int size;
461 struct proc *p;
462 {
463 struct buf *bp;
464 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
465
466 if (nmp->nm_flag & NFSMNT_INT) {
467 bp = getblk(vp, bn, size, PCATCH, 0);
468 while (bp == (struct buf *)0) {
469 if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
470 return ((struct buf *)0);
471 bp = getblk(vp, bn, size, 0, 2 * hz);
472 }
473 } else
474 bp = getblk(vp, bn, size, 0, 0);
475 return (bp);
476 }
477
478 /*
479 * Flush and invalidate all dirty buffers. If another process is already
480 * doing the flush, just wait for completion.
481 */
482 int
483 nfs_vinvalbuf(vp, flags, cred, p, intrflg)
484 struct vnode *vp;
485 int flags;
486 struct ucred *cred;
487 struct proc *p;
488 int intrflg;
489 {
490 struct nfsnode *np = VTONFS(vp);
491 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
492 int error = 0, slpflag, slptimeo;
493
494 if ((nmp->nm_flag & NFSMNT_INT) == 0)
495 intrflg = 0;
496 if (intrflg) {
497 slpflag = PCATCH;
498 slptimeo = 2 * hz;
499 } else {
500 slpflag = 0;
501 slptimeo = 0;
502 }
503 /*
504 * First wait for any other process doing a flush to complete.
505 */
506 while (np->n_flag & NFLUSHINPROG) {
507 np->n_flag |= NFLUSHWANT;
508 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
509 slptimeo);
510 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
511 return (EINTR);
512 }
513
514 /*
515 * Now, flush as required.
516 */
517 np->n_flag |= NFLUSHINPROG;
518 error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
519 while (error) {
520 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
521 np->n_flag &= ~NFLUSHINPROG;
522 if (np->n_flag & NFLUSHWANT) {
523 np->n_flag &= ~NFLUSHWANT;
524 wakeup((caddr_t)&np->n_flag);
525 }
526 return (EINTR);
527 }
528 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
529 }
530 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
531 if (np->n_flag & NFLUSHWANT) {
532 np->n_flag &= ~NFLUSHWANT;
533 wakeup((caddr_t)&np->n_flag);
534 }
535 return (0);
536 }
537
538 /*
539 * Initiate asynchronous I/O. Return an error if no nfsiods are available.
540 * This is mainly to avoid queueing async I/O requests when the nfsiods
541 * are all hung on a dead server.
542 */
543 int
544 nfs_asyncio(bp)
545 struct buf *bp;
546 {
547 int i,s;
548
549 if (nfs_numasync == 0)
550 return (EIO);
551 for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
552 if (nfs_iodwant[i]) {
553 if ((bp->b_flags & B_READ) == 0) {
554 bp->b_flags |= B_WRITEINPROG;
555 }
556
557 TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
558 nfs_iodwant[i] = (struct proc *)0;
559 wakeup((caddr_t)&nfs_iodwant[i]);
560 return (0);
561 }
562
563 /*
564 * If it is a read or a write already marked B_WRITEINPROG or B_NOCACHE
565 * return EIO so the process will call nfs_doio() and do it
566 * synchronously.
567 */
568 if (bp->b_flags & (B_READ | B_WRITEINPROG | B_NOCACHE))
569 return (EIO);
570
571 /*
572 * Just turn the async write into a delayed write, instead of
573 * doing in synchronously. Hopefully, at least one of the nfsiods
574 * is currently doing a write for this file and will pick up the
575 * delayed writes before going back to sleep.
576 */
577 s = splbio();
578 buf_dirty(bp);
579 biodone(bp);
580 splx(s);
581 return (0);
582 }
583
584 /*
585 * Do an I/O operation to/from a cache block. This may be called
586 * synchronously or from an nfsiod.
587 */
588 int
589 nfs_doio(bp, p)
590 struct buf *bp;
591 struct proc *p;
592 {
593 struct uio *uiop;
594 struct vnode *vp;
595 struct nfsnode *np;
596 struct nfsmount *nmp;
597 int s, error = 0, diff, len, iomode, must_commit = 0;
598 struct uio uio;
599 struct iovec io;
600
601 vp = bp->b_vp;
602 np = VTONFS(vp);
603 nmp = VFSTONFS(vp->v_mount);
604 uiop = &uio;
605 uiop->uio_iov = &io;
606 uiop->uio_iovcnt = 1;
607 uiop->uio_segflg = UIO_SYSSPACE;
608 uiop->uio_procp = p;
609
610 /*
611 * Historically, paging was done with physio, but no more...
612 */
613 if (bp->b_flags & B_PHYS) {
614 /*
615 * ...though reading /dev/drum still gets us here.
616 */
617 io.iov_len = uiop->uio_resid = bp->b_bcount;
618 /* mapping was done by vmapbuf() */
619 io.iov_base = bp->b_data;
620 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
621 if (bp->b_flags & B_READ) {
622 uiop->uio_rw = UIO_READ;
623 nfsstats.read_physios++;
624 error = nfs_readrpc(vp, uiop);
625 } else {
626 iomode = NFSV3WRITE_DATASYNC;
627 uiop->uio_rw = UIO_WRITE;
628 nfsstats.write_physios++;
629 error = nfs_writerpc(vp, uiop, &iomode, &must_commit);
630 }
631 if (error) {
632 bp->b_flags |= B_ERROR;
633 bp->b_error = error;
634 }
635 } else if (bp->b_flags & B_READ) {
636 io.iov_len = uiop->uio_resid = bp->b_bcount;
637 io.iov_base = bp->b_data;
638 uiop->uio_rw = UIO_READ;
639 switch (vp->v_type) {
640 case VREG:
641 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
642 nfsstats.read_bios++;
643 error = nfs_readrpc(vp, uiop);
644 if (!error) {
645 bp->b_validoff = 0;
646 if (uiop->uio_resid) {
647 /*
648 * If len > 0, there is a hole in the file and
649 * no writes after the hole have been pushed to
650 * the server yet.
651 * Just zero fill the rest of the valid area.
652 */
653 diff = bp->b_bcount - uiop->uio_resid;
654 len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT)
655 + diff);
656 if (len > 0) {
657 len = min(len, uiop->uio_resid);
658 bzero((char *)bp->b_data + diff, len);
659 bp->b_validend = diff + len;
660 } else
661 bp->b_validend = diff;
662 } else
663 bp->b_validend = bp->b_bcount;
664 }
665 if (p && (vp->v_flag & VTEXT) &&
666 (np->n_mtime != np->n_vattr.va_mtime.tv_sec)) {
667 uprintf("Process killed due to text file modification\n");
668 psignal(p, SIGKILL);
669 }
670 break;
671 case VLNK:
672 uiop->uio_offset = (off_t)0;
673 nfsstats.readlink_bios++;
674 error = nfs_readlinkrpc(vp, uiop, curproc->p_ucred);
675 break;
676 default:
677 printf("nfs_doio: type %x unexpected\n", vp->v_type);
678 break;
679 };
680 if (error) {
681 bp->b_flags |= B_ERROR;
682 bp->b_error = error;
683 }
684 } else {
685 io.iov_len = uiop->uio_resid = bp->b_dirtyend
686 - bp->b_dirtyoff;
687 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
688 + bp->b_dirtyoff;
689 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
690 uiop->uio_rw = UIO_WRITE;
691 nfsstats.write_bios++;
692 if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)
693 iomode = NFSV3WRITE_UNSTABLE;
694 else
695 iomode = NFSV3WRITE_FILESYNC;
696 bp->b_flags |= B_WRITEINPROG;
697 error = nfs_writerpc(vp, uiop, &iomode, &must_commit);
698
699 rw_enter_write(&np->n_commitlock);
700 if (!error && iomode == NFSV3WRITE_UNSTABLE) {
701 bp->b_flags |= B_NEEDCOMMIT;
702 nfs_add_tobecommitted_range(vp, bp);
703 } else {
704 bp->b_flags &= ~B_NEEDCOMMIT;
705 nfs_del_committed_range(vp, bp);
706 }
707 rw_exit_write(&np->n_commitlock);
708
709 bp->b_flags &= ~B_WRITEINPROG;
710
711 /*
712 * For an interrupted write, the buffer is still valid and the
713 * write hasn't been pushed to the server yet, so we can't set
714 * B_ERROR and report the interruption by setting B_EINTR. For
715 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
716 * is essentially a noop.
717 * For the case of a V3 write rpc not being committed to stable
718 * storage, the block is still dirty and requires either a commit
719 * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC
720 * before the block is reused. This is indicated by setting the
721 * B_DELWRI and B_NEEDCOMMIT flags.
722 */
723 if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
724 s = splbio();
725 buf_dirty(bp);
726 splx(s);
727
728 if (!(bp->b_flags & B_ASYNC) && error)
729 bp->b_flags |= B_EINTR;
730 } else {
731 if (error) {
732 bp->b_flags |= B_ERROR;
733 bp->b_error = np->n_error = error;
734 np->n_flag |= NWRITEERR;
735 }
736 bp->b_dirtyoff = bp->b_dirtyend = 0;
737 }
738 }
739 bp->b_resid = uiop->uio_resid;
740 if (must_commit)
741 nfs_clearcommit(vp->v_mount);
742 s = splbio();
743 biodone(bp);
744 splx(s);
745 return (error);
746 }