1 /* $OpenBSD: ffs_balloc.c,v 1.34 2007/06/01 18:54:27 pedro Exp $ */
2 /* $NetBSD: ffs_balloc.c,v 1.3 1996/02/09 22:22:21 christos Exp $ */
3
4 /*
5 * Copyright (c) 2002 Networks Associates Technology, Inc.
6 * All rights reserved.
7 *
8 * This software was developed for the FreeBSD Project by Marshall
9 * Kirk McKusick and Network Associates Laboratories, the Security
10 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
11 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
12 * research program.
13 *
14 * Copyright (c) 1982, 1986, 1989, 1993
15 * The Regents of the University of California. All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution.
25 * 3. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)ffs_balloc.c 8.4 (Berkeley) 9/23/93
42 */
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/buf.h>
47 #include <sys/proc.h>
48 #include <sys/file.h>
49 #include <sys/mount.h>
50 #include <sys/vnode.h>
51
52 #include <uvm/uvm_extern.h>
53
54 #include <ufs/ufs/quota.h>
55 #include <ufs/ufs/inode.h>
56 #include <ufs/ufs/ufsmount.h>
57 #include <ufs/ufs/ufs_extern.h>
58
59 #include <ufs/ffs/fs.h>
60 #include <ufs/ffs/ffs_extern.h>
61
62 int ffs1_balloc(struct inode *, off_t, int, struct ucred *, int, struct buf **);
63 #ifdef FFS2
64 int ffs2_balloc(struct inode *, off_t, int, struct ucred *, int, struct buf **);
65 #endif
66
67 /*
68 * Balloc defines the structure of file system storage
69 * by allocating the physical blocks on a device given
70 * the inode and the logical block number in a file.
71 */
72 int
73 ffs1_balloc(struct inode *ip, off_t startoffset, int size, struct ucred *cred,
74 int flags, struct buf **bpp)
75 {
76 daddr_t lbn;
77 struct fs *fs;
78 daddr_t nb;
79 struct buf *bp, *nbp;
80 struct vnode *vp;
81 struct proc *p;
82 struct indir indirs[NIADDR + 2];
83 int32_t newb, *bap, pref;
84 int deallocated, osize, nsize, num, i, error;
85 int32_t *allocib, *blkp, *allocblk, allociblk[NIADDR+1];
86 int unwindidx = -1;
87
88 vp = ITOV(ip);
89 fs = ip->i_fs;
90 p = curproc;
91 lbn = lblkno(fs, startoffset);
92 size = blkoff(fs, startoffset) + size;
93 if (size > fs->fs_bsize)
94 panic("ffs1_balloc: blk too big");
95 if (bpp != NULL)
96 *bpp = NULL;
97 if (lbn < 0)
98 return (EFBIG);
99
100 /*
101 * If the next write will extend the file into a new block,
102 * and the file is currently composed of a fragment
103 * this fragment has to be extended to be a full block.
104 */
105 nb = lblkno(fs, ip->i_ffs1_size);
106 if (nb < NDADDR && nb < lbn) {
107 osize = blksize(fs, ip, nb);
108 if (osize < fs->fs_bsize && osize > 0) {
109 error = ffs_realloccg(ip, nb,
110 ffs1_blkpref(ip, nb, (int)nb, &ip->i_ffs1_db[0]),
111 osize, (int)fs->fs_bsize, cred, bpp, &newb);
112 if (error)
113 return (error);
114 if (DOINGSOFTDEP(vp))
115 softdep_setup_allocdirect(ip, nb, newb,
116 ip->i_ffs1_db[nb], fs->fs_bsize, osize,
117 bpp ? *bpp : NULL);
118
119 ip->i_ffs1_size = lblktosize(fs, nb + 1);
120 uvm_vnp_setsize(vp, ip->i_ffs1_size);
121 ip->i_ffs1_db[nb] = newb;
122 ip->i_flag |= IN_CHANGE | IN_UPDATE;
123 if (bpp != NULL) {
124 if (flags & B_SYNC)
125 bwrite(*bpp);
126 else
127 bawrite(*bpp);
128 }
129 }
130 }
131 /*
132 * The first NDADDR blocks are direct blocks
133 */
134 if (lbn < NDADDR) {
135 nb = ip->i_ffs1_db[lbn];
136 if (nb != 0 && ip->i_ffs1_size >= lblktosize(fs, lbn + 1)) {
137 /*
138 * The block is an already-allocated direct block
139 * and the file already extends past this block,
140 * thus this must be a whole block.
141 * Just read the block (if requested).
142 */
143
144 if (bpp != NULL) {
145 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
146 bpp);
147 if (error) {
148 brelse(*bpp);
149 return (error);
150 }
151 }
152 return (0);
153 }
154 if (nb != 0) {
155 /*
156 * Consider need to reallocate a fragment.
157 */
158 osize = fragroundup(fs, blkoff(fs, ip->i_ffs1_size));
159 nsize = fragroundup(fs, size);
160 if (nsize <= osize) {
161 /*
162 * The existing block is already
163 * at least as big as we want.
164 * Just read the block (if requested).
165 */
166 if (bpp != NULL) {
167 error = bread(vp, lbn, fs->fs_bsize,
168 NOCRED, bpp);
169 if (error) {
170 brelse(*bpp);
171 return (error);
172 }
173 (*bpp)->b_bcount = osize;
174 }
175 return (0);
176 } else {
177 /*
178 * The existing block is smaller than we
179 * want, grow it.
180 */
181 error = ffs_realloccg(ip, lbn,
182 ffs1_blkpref(ip, lbn, (int)lbn,
183 &ip->i_ffs1_db[0]),
184 osize, nsize, cred, bpp, &newb);
185 if (error)
186 return (error);
187 if (DOINGSOFTDEP(vp))
188 softdep_setup_allocdirect(ip, lbn,
189 newb, nb, nsize, osize,
190 bpp ? *bpp : NULL);
191 }
192 } else {
193 /*
194 * The block was not previously allocated,
195 * allocate a new block or fragment.
196 */
197
198 if (ip->i_ffs1_size < lblktosize(fs, lbn + 1))
199 nsize = fragroundup(fs, size);
200 else
201 nsize = fs->fs_bsize;
202 error = ffs_alloc(ip, lbn,
203 ffs1_blkpref(ip, lbn, (int)lbn, &ip->i_ffs1_db[0]),
204 nsize, cred, &newb);
205 if (error)
206 return (error);
207 if (bpp != NULL) {
208 *bpp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
209 if (nsize < fs->fs_bsize)
210 (*bpp)->b_bcount = nsize;
211 (*bpp)->b_blkno = fsbtodb(fs, newb);
212 if (flags & B_CLRBUF)
213 clrbuf(*bpp);
214 }
215 if (DOINGSOFTDEP(vp))
216 softdep_setup_allocdirect(ip, lbn, newb, 0,
217 nsize, 0, bpp ? *bpp : NULL);
218 }
219 ip->i_ffs1_db[lbn] = newb;
220 ip->i_flag |= IN_CHANGE | IN_UPDATE;
221 return (0);
222 }
223
224 /*
225 * Determine the number of levels of indirection.
226 */
227 pref = 0;
228 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
229 return(error);
230 #ifdef DIAGNOSTIC
231 if (num < 1)
232 panic ("ffs1_balloc: ufs_bmaparray returned indirect block");
233 #endif
234 /*
235 * Fetch the first indirect block allocating if necessary.
236 */
237 --num;
238 nb = ip->i_ffs1_ib[indirs[0].in_off];
239
240 allocib = NULL;
241 allocblk = allociblk;
242 if (nb == 0) {
243 pref = ffs1_blkpref(ip, lbn, 0, (daddr_t *)0);
244 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
245 cred, &newb);
246 if (error)
247 goto fail;
248 nb = newb;
249
250 *allocblk++ = nb;
251 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
252 bp->b_blkno = fsbtodb(fs, nb);
253 clrbuf(bp);
254
255 if (DOINGSOFTDEP(vp)) {
256 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
257 newb, 0, fs->fs_bsize, 0, bp);
258 bdwrite(bp);
259 } else {
260 /*
261 * Write synchronously so that indirect blocks
262 * never point at garbage.
263 */
264 if ((error = bwrite(bp)) != 0)
265 goto fail;
266 }
267 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
268 *allocib = nb;
269 ip->i_flag |= IN_CHANGE | IN_UPDATE;
270 }
271
272 /*
273 * Fetch through the indirect blocks, allocating as necessary.
274 */
275 for (i = 1;;) {
276 error = bread(vp,
277 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
278 if (error) {
279 brelse(bp);
280 goto fail;
281 }
282 bap = (daddr_t *)bp->b_data;
283 nb = bap[indirs[i].in_off];
284 if (i == num)
285 break;
286 i++;
287 if (nb != 0) {
288 brelse(bp);
289 continue;
290 }
291 if (pref == 0)
292 pref = ffs1_blkpref(ip, lbn, 0, (daddr_t *)0);
293 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
294 &newb);
295 if (error) {
296 brelse(bp);
297 goto fail;
298 }
299 nb = newb;
300 *allocblk++ = nb;
301 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
302 nbp->b_blkno = fsbtodb(fs, nb);
303 clrbuf(nbp);
304
305 if (DOINGSOFTDEP(vp)) {
306 softdep_setup_allocindir_meta(nbp, ip, bp,
307 indirs[i - 1].in_off, nb);
308 bdwrite(nbp);
309 } else {
310 /*
311 * Write synchronously so that indirect blocks
312 * never point at garbage.
313 */
314 if ((error = bwrite(nbp)) != 0) {
315 brelse(bp);
316 goto fail;
317 }
318 }
319 bap[indirs[i - 1].in_off] = nb;
320 if (allocib == NULL && unwindidx < 0)
321 unwindidx = i - 1;
322 /*
323 * If required, write synchronously, otherwise use
324 * delayed write.
325 */
326 if (flags & B_SYNC) {
327 bwrite(bp);
328 } else {
329 bdwrite(bp);
330 }
331 }
332 /*
333 * Get the data block, allocating if necessary.
334 */
335 if (nb == 0) {
336 pref = ffs1_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
337 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
338 &newb);
339 if (error) {
340 brelse(bp);
341 goto fail;
342 }
343 nb = newb;
344 *allocblk++ = nb;
345 if (bpp != NULL) {
346 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
347 nbp->b_blkno = fsbtodb(fs, nb);
348 if (flags & B_CLRBUF)
349 clrbuf(nbp);
350 *bpp = nbp;
351 }
352 if (DOINGSOFTDEP(vp))
353 softdep_setup_allocindir_page(ip, lbn, bp,
354 indirs[i].in_off, nb, 0, bpp ? *bpp : NULL);
355 bap[indirs[i].in_off] = nb;
356 /*
357 * If required, write synchronously, otherwise use
358 * delayed write.
359 */
360 if (flags & B_SYNC) {
361 bwrite(bp);
362 } else {
363 bdwrite(bp);
364 }
365 return (0);
366 }
367 brelse(bp);
368 if (bpp != NULL) {
369 if (flags & B_CLRBUF) {
370 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
371 if (error) {
372 brelse(nbp);
373 goto fail;
374 }
375 } else {
376 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
377 nbp->b_blkno = fsbtodb(fs, nb);
378 }
379 *bpp = nbp;
380 }
381 return (0);
382
383 fail:
384 /*
385 * If we have failed to allocate any blocks, simply return the error.
386 * This is the usual case and avoids the need to fsync the file.
387 */
388 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
389 return (error);
390 /*
391 * If we have failed part way through block allocation, we have to
392 * deallocate any indirect blocks that we have allocated. We have to
393 * fsync the file before we start to get rid of all of its
394 * dependencies so that we do not leave them dangling. We have to sync
395 * it at the end so that the softdep code does not find any untracked
396 * changes. Although this is really slow, running out of disk space is
397 * not expected to be a common occurence. The error return from fsync
398 * is ignored as we already have an error to return to the user.
399 */
400 VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p);
401 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
402 ffs_blkfree(ip, *blkp, fs->fs_bsize);
403 deallocated += fs->fs_bsize;
404 }
405 if (allocib != NULL) {
406 *allocib = 0;
407 } else if (unwindidx >= 0) {
408 int r;
409
410 r = bread(vp, indirs[unwindidx].in_lbn,
411 (int)fs->fs_bsize, NOCRED, &bp);
412 if (r)
413 panic("Could not unwind indirect block, error %d", r);
414 bap = (daddr_t *)bp->b_data;
415 bap[indirs[unwindidx].in_off] = 0;
416 if (flags & B_SYNC) {
417 bwrite(bp);
418 } else {
419 bdwrite(bp);
420 }
421 }
422 if (deallocated) {
423 /*
424 * Restore user's disk quota because allocation failed.
425 */
426 (void)ufs_quota_free_blocks(ip, btodb(deallocated), cred);
427
428 ip->i_ffs1_blocks -= btodb(deallocated);
429 ip->i_flag |= IN_CHANGE | IN_UPDATE;
430 }
431 VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p);
432 return (error);
433 }
434
435 #ifdef FFS2
436 int
437 ffs2_balloc(struct inode *ip, off_t off, int size, struct ucred *cred,
438 int flags, struct buf **bpp)
439 {
440 daddr_t lbn, lastlbn, nb, newb, *blkp;
441 daddr_t pref, *allocblk, allociblk[NIADDR + 1];
442 daddr64_t *bap, *allocib;
443 int deallocated, osize, nsize, num, i, error, unwindidx, r;
444 struct buf *bp, *nbp;
445 struct indir indirs[NIADDR + 2];
446 struct fs *fs;
447 struct vnode *vp;
448 struct proc *p;
449
450 vp = ITOV(ip);
451 fs = ip->i_fs;
452 p = curproc;
453 unwindidx = -1;
454
455 lbn = lblkno(fs, off);
456 size = blkoff(fs, off) + size;
457
458 if (size > fs->fs_bsize)
459 panic("ffs2_balloc: block too big");
460
461 if (bpp != NULL)
462 *bpp = NULL;
463
464 if (lbn < 0)
465 return (EFBIG);
466
467 /*
468 * If the next write will extend the file into a new block, and the
469 * file is currently composed of a fragment, this fragment has to be
470 * extended to be a full block.
471 */
472 lastlbn = lblkno(fs, ip->i_ffs2_size);
473 if (lastlbn < NDADDR && lastlbn < lbn) {
474 nb = lastlbn;
475 osize = blksize(fs, ip, nb);
476 if (osize < fs->fs_bsize && osize > 0) {
477 error = ffs_realloccg(ip, nb, ffs2_blkpref(ip,
478 lastlbn, nb, &ip->i_ffs2_db[0]), osize,
479 (int) fs->fs_bsize, cred, bpp, &newb);
480 if (error)
481 return (error);
482
483 if (DOINGSOFTDEP(vp))
484 softdep_setup_allocdirect(ip, nb, newb,
485 ip->i_ffs2_db[nb], fs->fs_bsize, osize,
486 bpp ? *bpp : NULL);
487
488 ip->i_ffs2_size = lblktosize(fs, nb + 1);
489 uvm_vnp_setsize(vp, ip->i_ffs2_size);
490 ip->i_ffs2_db[nb] = newb;
491 ip->i_flag |= IN_CHANGE | IN_UPDATE;
492
493 if (bpp) {
494 if (flags & B_SYNC)
495 bwrite(*bpp);
496 else
497 bawrite(*bpp);
498 }
499 }
500 }
501
502 /*
503 * The first NDADDR blocks are direct.
504 */
505 if (lbn < NDADDR) {
506
507 nb = ip->i_ffs2_db[lbn];
508
509 if (nb != 0 && ip->i_ffs2_size >= lblktosize(fs, lbn + 1)) {
510 /*
511 * The direct block is already allocated and the file
512 * extends past this block, thus this must be a whole
513 * block. Just read it, if requested.
514 */
515 if (bpp != NULL) {
516 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
517 bpp);
518 if (error) {
519 brelse(*bpp);
520 return (error);
521 }
522 }
523
524 return (0);
525 }
526
527 if (nb != 0) {
528 /*
529 * Consider the need to allocate a fragment.
530 */
531 osize = fragroundup(fs, blkoff(fs, ip->i_ffs2_size));
532 nsize = fragroundup(fs, size);
533
534 if (nsize <= osize) {
535 /*
536 * The existing block is already at least as
537 * big as we want. Just read it, if requested.
538 */
539 if (bpp != NULL) {
540 error = bread(vp, lbn, fs->fs_bsize,
541 NOCRED, bpp);
542 if (error) {
543 brelse(*bpp);
544 return (error);
545 }
546 (*bpp)->b_bcount = osize;
547 }
548
549 return (0);
550 } else {
551 /*
552 * The existing block is smaller than we want,
553 * grow it.
554 */
555 error = ffs_realloccg(ip, lbn,
556 ffs2_blkpref(ip, lbn, (int) lbn,
557 &ip->i_ffs2_db[0]), osize, nsize, cred,
558 bpp, &newb);
559 if (error)
560 return (error);
561
562 if (DOINGSOFTDEP(vp))
563 softdep_setup_allocdirect(ip, lbn,
564 newb, nb, nsize, osize,
565 bpp ? *bpp : NULL);
566 }
567 } else {
568 /*
569 * The block was not previously allocated, allocate a
570 * new block or fragment.
571 */
572 if (ip->i_ffs2_size < lblktosize(fs, lbn + 1))
573 nsize = fragroundup(fs, size);
574 else
575 nsize = fs->fs_bsize;
576
577 error = ffs_alloc(ip, lbn, ffs2_blkpref(ip, lbn,
578 (int) lbn, &ip->i_ffs2_db[0]), nsize, cred, &newb);
579 if (error)
580 return (error);
581
582 if (bpp != NULL) {
583 bp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
584 if (nsize < fs->fs_bsize)
585 bp->b_bcount = nsize;
586 bp->b_blkno = fsbtodb(fs, newb);
587 if (flags & B_CLRBUF)
588 clrbuf(bp);
589 *bpp = bp;
590 }
591
592 if (DOINGSOFTDEP(vp))
593 softdep_setup_allocdirect(ip, lbn, newb, 0,
594 nsize, 0, bpp ? *bpp : NULL);
595 }
596
597 ip->i_ffs2_db[lbn] = newb;
598 ip->i_flag |= IN_CHANGE | IN_UPDATE;
599
600 return (0);
601 }
602
603 /*
604 * Determine the number of levels of indirection.
605 */
606 pref = 0;
607 error = ufs_getlbns(vp, lbn, indirs, &num);
608 if (error)
609 return (error);
610
611 #ifdef DIAGNOSTIC
612 if (num < 1)
613 panic("ffs2_balloc: ufs_bmaparray returned indirect block");
614 #endif
615
616 /*
617 * Fetch the first indirect block allocating it necessary.
618 */
619 --num;
620 nb = ip->i_ffs2_ib[indirs[0].in_off];
621 allocib = NULL;
622 allocblk = allociblk;
623
624 if (nb == 0) {
625 pref = ffs2_blkpref(ip, lbn, 0, NULL);
626 error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, cred,
627 &newb);
628 if (error)
629 goto fail;
630
631 nb = newb;
632 *allocblk++ = nb;
633 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
634 bp->b_blkno = fsbtodb(fs, nb);
635 clrbuf(bp);
636
637 if (DOINGSOFTDEP(vp)) {
638 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
639 newb, 0, fs->fs_bsize, 0, bp);
640 bdwrite(bp);
641 } else {
642 /*
643 * Write synchronously so that indirect blocks never
644 * point at garbage.
645 */
646 error = bwrite(bp);
647 if (error)
648 goto fail;
649 }
650
651 unwindidx = 0;
652 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
653 *allocib = nb;
654 ip->i_flag |= IN_CHANGE | IN_UPDATE;
655 }
656
657 /*
658 * Fetch through the indirect blocks, allocating as necessary.
659 */
660 for (i = 1;;) {
661 error = bread(vp, indirs[i].in_lbn, (int) fs->fs_bsize,
662 NOCRED, &bp);
663 if (error) {
664 brelse(bp);
665 goto fail;
666 }
667
668 bap = (int64_t *) bp->b_data;
669 nb = bap[indirs[i].in_off];
670
671 if (i == num)
672 break;
673
674 i++;
675
676 if (nb != 0) {
677 brelse(bp);
678 continue;
679 }
680
681 if (pref == 0)
682 pref = ffs2_blkpref(ip, lbn, 0, NULL);
683
684 error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, cred,
685 &newb);
686 if (error) {
687 brelse(bp);
688 goto fail;
689 }
690
691 nb = newb;
692 *allocblk++ = nb;
693 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
694 nbp->b_blkno = fsbtodb(fs, nb);
695 clrbuf(nbp);
696
697 if (DOINGSOFTDEP(vp)) {
698 softdep_setup_allocindir_meta(nbp, ip, bp,
699 indirs[i - 1].in_off, nb);
700 bdwrite(nbp);
701 } else {
702 /*
703 * Write synchronously so that indirect blocks never
704 * point at garbage.
705 */
706 error = bwrite(nbp);
707 if (error) {
708 brelse(bp);
709 goto fail;
710 }
711 }
712
713 if (unwindidx < 0)
714 unwindidx = i - 1;
715
716 bap[indirs[i - 1].in_off] = nb;
717
718 /*
719 * If required, write synchronously, otherwise use delayed
720 * write.
721 */
722 if (flags & B_SYNC)
723 bwrite(bp);
724 else
725 bdwrite(bp);
726 }
727
728 /*
729 * Get the data block, allocating if necessary.
730 */
731 if (nb == 0) {
732 pref = ffs2_blkpref(ip, lbn, indirs[num].in_off, &bap[0]);
733
734 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
735 &newb);
736 if (error) {
737 brelse(bp);
738 goto fail;
739 }
740
741 nb = newb;
742 *allocblk++ = nb;
743
744 if (bpp != NULL) {
745 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
746 nbp->b_blkno = fsbtodb(fs, nb);
747 if (flags & B_CLRBUF)
748 clrbuf(nbp);
749 *bpp = nbp;
750 }
751
752 if (DOINGSOFTDEP(vp))
753 softdep_setup_allocindir_page(ip, lbn, bp,
754 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
755
756 bap[indirs[num].in_off] = nb;
757
758 if (allocib == NULL && unwindidx < 0)
759 unwindidx = i - 1;
760
761 /*
762 * If required, write synchronously, otherwise use delayed
763 * write.
764 */
765 if (flags & B_SYNC)
766 bwrite(bp);
767 else
768 bdwrite(bp);
769
770 return (0);
771 }
772
773 brelse(bp);
774
775 if (bpp != NULL) {
776 if (flags & B_CLRBUF) {
777 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
778 if (error) {
779 brelse(nbp);
780 goto fail;
781 }
782 } else {
783 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
784 nbp->b_blkno = fsbtodb(fs, nb);
785 clrbuf(nbp);
786 }
787
788 *bpp = nbp;
789 }
790
791 return (0);
792
793 fail:
794 /*
795 * If we have failed to allocate any blocks, simply return the error.
796 * This is the usual case and avoids the need to fsync the file.
797 */
798 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
799 return (error);
800 /*
801 * If we have failed part way through block allocation, we have to
802 * deallocate any indirect blocks that we have allocated. We have to
803 * fsync the file before we start to get rid of all of its
804 * dependencies so that we do not leave them dangling. We have to sync
805 * it at the end so that the softdep code does not find any untracked
806 * changes. Although this is really slow, running out of disk space is
807 * not expected to be a common occurence. The error return from fsync
808 * is ignored as we already have an error to return to the user.
809 */
810 VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p);
811 if (unwindidx >= 0) {
812 /*
813 * First write out any buffers we've created to resolve their
814 * softdeps. This must be done in reverse order of creation so
815 * that we resolve the dependencies in one pass.
816 * Write the cylinder group buffers for these buffers too.
817 */
818 for (i = num; i >= unwindidx; i--) {
819 if (i == 0)
820 break;
821
822 bp = getblk(vp, indirs[i].in_lbn, (int) fs->fs_bsize,
823 0, 0);
824 if (bp->b_flags & B_DELWRI) {
825 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
826 dbtofsb(fs, bp->b_blkno))));
827 bwrite(bp);
828 bp = getblk(ip->i_devvp, nb,
829 (int) fs->fs_cgsize, 0, 0);
830 if (bp->b_flags & B_DELWRI)
831 bwrite(bp);
832 else {
833 bp->b_flags |= B_INVAL;
834 brelse(bp);
835 }
836 } else {
837 bp->b_flags |= B_INVAL;
838 brelse(bp);
839 }
840 }
841
842 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
843 ip->i_flag |= IN_CHANGE | IN_UPDATE;
844 ffs_update(ip, NULL, NULL, MNT_WAIT);
845 }
846
847 /*
848 * Now that any dependencies that we created have been
849 * resolved, we can undo the partial allocation.
850 */
851 if (unwindidx == 0) {
852 *allocib = 0;
853 ip->i_flag |= IN_CHANGE | IN_UPDATE;
854 if (DOINGSOFTDEP(vp))
855 ffs_update(ip, NULL, NULL, MNT_WAIT);
856 } else {
857 r = bread(vp, indirs[unwindidx].in_lbn,
858 (int) fs->fs_bsize, NOCRED, &bp);
859 if (r)
860 panic("ffs2_balloc: unwind failed");
861
862 bap = (int64_t *) bp->b_data;
863 bap[indirs[unwindidx].in_off] = 0;
864 bwrite(bp);
865 }
866
867 for (i = unwindidx + 1; i <= num; i++) {
868 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
869 0);
870 bp->b_flags |= B_INVAL;
871 brelse(bp);
872 }
873 }
874
875 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
876 ffs_blkfree(ip, *blkp, fs->fs_bsize);
877 deallocated += fs->fs_bsize;
878 }
879
880 if (deallocated) {
881 /*
882 * Restore user's disk quota because allocation failed.
883 */
884 (void) ufs_quota_free_blocks(ip, btodb(deallocated), cred);
885
886 ip->i_ffs2_blocks -= btodb(deallocated);
887 ip->i_flag |= IN_CHANGE | IN_UPDATE;
888 }
889 VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p);
890 return (error);
891 }
892 #endif /* FFS2 */
893
894 /*
895 * Balloc defines the structure of file system storage by allocating the
896 * physical blocks given the inode and the logical block number in a file.
897 */
898 int
899 ffs_balloc(struct inode *ip, off_t off, int size, struct ucred *cred,
900 int flags, struct buf **bpp)
901 {
902 #ifdef FFS2
903 if (ip->i_fs->fs_magic == FS_UFS2_MAGIC)
904 return (ffs2_balloc(ip, off, size, cred, flags, bpp));
905 else
906 #endif
907 return (ffs1_balloc(ip, off, size, cred, flags, bpp));
908 }