summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorAnton Arapov <anton@redhat.com>2012-06-08 12:58:00 +0200
committerAnton Arapov <anton@redhat.com>2012-06-08 12:58:00 +0200
commit6792a3f47a2e42d7164292bf7f1a55cfc4c91652 (patch)
treeb90c002bfbbeaec92f5d8a2383dcabf6524016f7 /fs
parentfe2895d3d55146cac65b273c0f83e2c7e543cd0e (diff)
downloadkernel-uprobes-6792a3f47a2e42d7164292bf7f1a55cfc4c91652.tar.gz
kernel-uprobes-6792a3f47a2e42d7164292bf7f1a55cfc4c91652.tar.xz
kernel-uprobes-6792a3f47a2e42d7164292bf7f1a55cfc4c91652.zip
fedora kernel: b920e9b748c595f970bf80ede7832d39f8d567dav3.4.1-2
Signed-off-by: Anton Arapov <anton@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c16
-rw-r--r--fs/9p/vfs_super.c5
-rw-r--r--fs/Kconfig5
-rw-r--r--fs/Makefile1
-rw-r--r--fs/adfs/super.c3
-rw-r--r--fs/affs/super.c7
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/fsclient.c8
-rw-r--r--fs/afs/mntpt.c4
-rw-r--r--fs/afs/super.c7
-rw-r--r--fs/aio.c175
-rw-r--r--fs/anon_inodes.c109
-rw-r--r--fs/attr.c2
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/autofs4/init.c6
-rw-r--r--fs/autofs4/inode.c10
-rw-r--r--fs/autofs4/waitq.c2
-rw-r--r--fs/bad_inode.c2
-rw-r--r--fs/befs/linuxvfs.c3
-rw-r--r--fs/bfs/inode.c3
-rw-r--r--fs/binfmt_aout.c37
-rw-r--r--fs/binfmt_elf.c92
-rw-r--r--fs/binfmt_elf_fdpic.c25
-rw-r--r--fs/binfmt_em86.c3
-rw-r--r--fs/binfmt_flat.c19
-rw-r--r--fs/binfmt_misc.c10
-rw-r--r--fs/binfmt_script.c3
-rw-r--r--fs/binfmt_som.c16
-rw-r--r--fs/bio-integrity.c10
-rw-r--r--fs/bio.c9
-rw-r--r--fs/block_dev.c11
-rw-r--r--fs/btrfs/async-thread.c15
-rw-r--r--fs/btrfs/async-thread.h4
-rw-r--r--fs/btrfs/backref.c34
-rw-r--r--fs/btrfs/check-integrity.c1
-rw-r--r--fs/btrfs/compression.c52
-rw-r--r--fs/btrfs/compression.h2
-rw-r--r--fs/btrfs/ctree.c412
-rw-r--r--fs/btrfs/ctree.h171
-rw-r--r--fs/btrfs/delayed-inode.c33
-rw-r--r--fs/btrfs/delayed-ref.c33
-rw-r--r--fs/btrfs/dir-item.c10
-rw-r--r--fs/btrfs/disk-io.c673
-rw-r--r--fs/btrfs/disk-io.h13
-rw-r--r--fs/btrfs/export.c2
-rw-r--r--fs/btrfs/extent-tree.c749
-rw-r--r--fs/btrfs/extent_io.c1103
-rw-r--r--fs/btrfs/extent_io.h64
-rw-r--r--fs/btrfs/file-item.c61
-rw-r--r--fs/btrfs/file.c61
-rw-r--r--fs/btrfs/free-space-cache.c26
-rw-r--r--fs/btrfs/inode-item.c6
-rw-r--r--fs/btrfs/inode-map.c25
-rw-r--r--fs/btrfs/inode.c537
-rw-r--r--fs/btrfs/ioctl.c195
-rw-r--r--fs/btrfs/ioctl.h4
-rw-r--r--fs/btrfs/locking.c6
-rw-r--r--fs/btrfs/locking.h4
-rw-r--r--fs/btrfs/lzo.c4
-rw-r--r--fs/btrfs/ordered-data.c60
-rw-r--r--fs/btrfs/ordered-data.h24
-rw-r--r--fs/btrfs/orphan.c2
-rw-r--r--fs/btrfs/reada.c58
-rw-r--r--fs/btrfs/relocation.c134
-rw-r--r--fs/btrfs/root-tree.c25
-rw-r--r--fs/btrfs/scrub.c1405
-rw-r--r--fs/btrfs/struct-funcs.c53
-rw-r--r--fs/btrfs/super.c207
-rw-r--r--fs/btrfs/transaction.c224
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/btrfs/tree-log.c98
-rw-r--r--fs/btrfs/tree-log.h2
-rw-r--r--fs/btrfs/volumes.c273
-rw-r--r--fs/btrfs/volumes.h4
-rw-r--r--fs/btrfs/zlib.c4
-rw-r--r--fs/buffer.c22
-rw-r--r--fs/cachefiles/namei.c3
-rw-r--r--fs/ceph/inode.c11
-rw-r--r--fs/ceph/mds_client.c7
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/ceph/super.c22
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/ceph/xattr.c202
-rw-r--r--fs/cifs/README6
-rw-r--r--fs/cifs/cifs_debug.c71
-rw-r--r--fs/cifs/cifs_debug.h4
-rw-r--r--fs/cifs/cifsfs.c53
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h76
-rw-r--r--fs/cifs/cifsproto.h29
-rw-r--r--fs/cifs/cifssmb.c163
-rw-r--r--fs/cifs/connect.c1481
-rw-r--r--fs/cifs/dir.c17
-rw-r--r--fs/cifs/file.c272
-rw-r--r--fs/cifs/misc.c119
-rw-r--r--fs/cifs/netmisc.c6
-rw-r--r--fs/cifs/transport.c305
-rw-r--r--fs/coda/inode.c7
-rw-r--r--fs/coda/psdev.c1
-rw-r--r--fs/coda/upcall.c1
-rw-r--r--fs/compat.c27
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/configfs/configfs_internal.h7
-rw-r--r--fs/configfs/dir.c72
-rw-r--r--fs/configfs/inode.c62
-rw-r--r--fs/configfs/mount.c16
-rw-r--r--fs/configfs/symlink.c12
-rw-r--r--fs/cramfs/inode.c12
-rw-r--r--fs/dcache.c75
-rw-r--r--fs/dcookies.c2
-rw-r--r--fs/debugfs/file.c16
-rw-r--r--fs/debugfs/inode.c149
-rw-r--r--fs/devpts/inode.c88
-rw-r--r--fs/dlm/debug_fs.c9
-rw-r--r--fs/dlm/dir.c17
-rw-r--r--fs/dlm/lock.c20
-rw-r--r--fs/dlm/lock.h3
-rw-r--r--fs/dlm/lowcomms.c24
-rw-r--r--fs/ecryptfs/file.c9
-rw-r--r--fs/ecryptfs/main.c19
-rw-r--r--fs/ecryptfs/super.c1
-rw-r--r--fs/efs/super.c3
-rw-r--r--fs/eventfd.c2
-rw-r--r--fs/eventpoll.c32
-rw-r--r--fs/exec.c41
-rw-r--r--fs/exofs/dir.c4
-rw-r--r--fs/exofs/namei.c13
-rw-r--r--fs/exofs/super.c11
-rw-r--r--fs/ext2/dir.c4
-rw-r--r--fs/ext2/ext2.h631
-rw-r--r--fs/ext2/namei.c13
-rw-r--r--fs/ext2/super.c4
-rw-r--r--fs/ext2/xattr_security.c5
-rw-r--r--fs/ext2/xattr_trusted.c5
-rw-r--r--fs/ext2/xip.c2
-rw-r--r--fs/ext3/acl.c8
-rw-r--r--fs/ext3/balloc.c94
-rw-r--r--fs/ext3/bitmap.c4
-rw-r--r--fs/ext3/dir.c7
-rw-r--r--fs/ext3/ext3.h1322
-rw-r--r--fs/ext3/ext3_jbd.c2
-rw-r--r--fs/ext3/file.c6
-rw-r--r--fs/ext3/fsync.c8
-rw-r--r--fs/ext3/hash.c4
-rw-r--r--fs/ext3/ialloc.c13
-rw-r--r--fs/ext3/inode.c21
-rw-r--r--fs/ext3/ioctl.c7
-rw-r--r--fs/ext3/namei.c14
-rw-r--r--fs/ext3/resize.c5
-rw-r--r--fs/ext3/super.c21
-rw-r--r--fs/ext3/symlink.c4
-rw-r--r--fs/ext3/xattr.c7
-rw-r--r--fs/ext3/xattr_security.c6
-rw-r--r--fs/ext3/xattr_trusted.c6
-rw-r--r--fs/ext3/xattr_user.c5
-rw-r--r--fs/ext4/balloc.c63
-rw-r--r--fs/ext4/dir.c227
-rw-r--r--fs/ext4/ext4.h38
-rw-r--r--fs/ext4/ext4_extents.h4
-rw-r--r--fs/ext4/ext4_jbd2.h72
-rw-r--r--fs/ext4/extents.c328
-rw-r--r--fs/ext4/hash.c4
-rw-r--r--fs/ext4/ialloc.c260
-rw-r--r--fs/ext4/inode.c49
-rw-r--r--fs/ext4/mballoc.c342
-rw-r--r--fs/ext4/mballoc.h20
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/mmp.c4
-rw-r--r--fs/ext4/namei.c2
-rw-r--r--fs/ext4/resize.c23
-rw-r--r--fs/ext4/super.c1108
-rw-r--r--fs/ext4/xattr.c25
-rw-r--r--fs/fat/inode.c8
-rw-r--r--fs/fat/namei_vfat.c83
-rw-r--r--fs/fcntl.c18
-rw-r--r--fs/file.c54
-rw-r--r--fs/file_table.c3
-rw-r--r--fs/freevxfs/vxfs_super.c3
-rw-r--r--fs/fs-writeback.c26
-rw-r--r--fs/fs_struct.c31
-rw-r--r--fs/fuse/dev.c4
-rw-r--r--fs/fuse/dir.c25
-rw-r--r--fs/fuse/file.c133
-rw-r--r--fs/fuse/inode.c10
-rw-r--r--fs/gfs2/Kconfig7
-rw-r--r--fs/gfs2/aops.c16
-rw-r--r--fs/gfs2/bmap.c10
-rw-r--r--fs/gfs2/dir.c4
-rw-r--r--fs/gfs2/file.c16
-rw-r--r--fs/gfs2/glock.c210
-rw-r--r--fs/gfs2/incore.h50
-rw-r--r--fs/gfs2/inode.c17
-rw-r--r--fs/gfs2/lock_dlm.c133
-rw-r--r--fs/gfs2/log.c244
-rw-r--r--fs/gfs2/log.h5
-rw-r--r--fs/gfs2/lops.c103
-rw-r--r--fs/gfs2/main.c18
-rw-r--r--fs/gfs2/ops_fstype.c12
-rw-r--r--fs/gfs2/quota.c6
-rw-r--r--fs/gfs2/rgrp.c191
-rw-r--r--fs/gfs2/rgrp.h10
-rw-r--r--fs/gfs2/super.c3
-rw-r--r--fs/gfs2/trace_gfs2.h60
-rw-r--r--fs/gfs2/util.c1
-rw-r--r--fs/gfs2/util.h3
-rw-r--r--fs/gfs2/xattr.c16
-rw-r--r--fs/hfs/super.c6
-rw-r--r--fs/hfsplus/super.c17
-rw-r--r--fs/hostfs/hostfs.h3
-rw-r--r--fs/hostfs/hostfs_kern.c9
-rw-r--r--fs/hostfs/hostfs_user.c4
-rw-r--r--fs/hpfs/super.c6
-rw-r--r--fs/hppfs/hppfs.c9
-rw-r--r--fs/hugetlbfs/inode.c72
-rw-r--r--fs/inode.c28
-rw-r--r--fs/ioctl.c2
-rw-r--r--fs/isofs/inode.c3
-rw-r--r--fs/jbd/journal.c14
-rw-r--r--fs/jbd/transaction.c4
-rw-r--r--fs/jbd2/checkpoint.c140
-rw-r--r--fs/jbd2/commit.c52
-rw-r--r--fs/jbd2/journal.c376
-rw-r--r--fs/jbd2/recovery.c5
-rw-r--r--fs/jbd2/revoke.c12
-rw-r--r--fs/jbd2/transaction.c50
-rw-r--r--fs/jffs2/acl.c2
-rw-r--r--fs/jffs2/background.c29
-rw-r--r--fs/jffs2/build.c6
-rw-r--r--fs/jffs2/compr.c32
-rw-r--r--fs/jffs2/compr_lzo.c1
-rw-r--r--fs/jffs2/compr_rubin.c2
-rw-r--r--fs/jffs2/compr_zlib.c45
-rw-r--r--fs/jffs2/debug.c22
-rw-r--r--fs/jffs2/debug.h50
-rw-r--r--fs/jffs2/dir.c41
-rw-r--r--fs/jffs2/erase.c72
-rw-r--r--fs/jffs2/file.c33
-rw-r--r--fs/jffs2/fs.c73
-rw-r--r--fs/jffs2/gc.c324
-rw-r--r--fs/jffs2/malloc.c2
-rw-r--r--fs/jffs2/nodelist.c30
-rw-r--r--fs/jffs2/nodemgmt.c214
-rw-r--r--fs/jffs2/os-linux.h4
-rw-r--r--fs/jffs2/read.c70
-rw-r--r--fs/jffs2/readinode.c2
-rw-r--r--fs/jffs2/scan.c229
-rw-r--r--fs/jffs2/security.c4
-rw-r--r--fs/jffs2/summary.c16
-rw-r--r--fs/jffs2/super.c30
-rw-r--r--fs/jffs2/symlink.c7
-rw-r--r--fs/jffs2/wbuf.c148
-rw-r--r--fs/jffs2/write.c113
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/jfs/namei.c13
-rw-r--r--fs/jfs/super.c12
-rw-r--r--fs/libfs.c19
-rw-r--r--fs/lockd/clnt4xdr.c2
-rw-r--r--fs/lockd/clntlock.c3
-rw-r--r--fs/lockd/clntxdr.c8
-rw-r--r--fs/lockd/host.c42
-rw-r--r--fs/lockd/mon.c21
-rw-r--r--fs/lockd/netns.h12
-rw-r--r--fs/lockd/svc.c117
-rw-r--r--fs/lockd/svclock.c59
-rw-r--r--fs/logfs/dir.c21
-rw-r--r--fs/logfs/readwrite.c38
-rw-r--r--fs/logfs/segment.c4
-rw-r--r--fs/logfs/super.c12
-rw-r--r--fs/minix/dir.c4
-rw-r--r--fs/minix/inode.c38
-rw-r--r--fs/minix/minix.h1
-rw-r--r--fs/minix/namei.c14
-rw-r--r--fs/mpage.c2
-rw-r--r--fs/namei.c314
-rw-r--r--fs/ncpfs/file.c1
-rw-r--r--fs/ncpfs/inode.c7
-rw-r--r--fs/ncpfs/mmap.c1
-rw-r--r--fs/nfs/Kconfig20
-rw-r--r--fs/nfs/blocklayout/blocklayout.c165
-rw-r--r--fs/nfs/blocklayout/blocklayout.h11
-rw-r--r--fs/nfs/blocklayout/blocklayoutdev.c46
-rw-r--r--fs/nfs/blocklayout/blocklayoutdm.c33
-rw-r--r--fs/nfs/blocklayout/extents.c2
-rw-r--r--fs/nfs/cache_lib.c61
-rw-r--r--fs/nfs/cache_lib.h10
-rw-r--r--fs/nfs/callback.c19
-rw-r--r--fs/nfs/callback.h3
-rw-r--r--fs/nfs/callback_proc.c99
-rw-r--r--fs/nfs/callback_xdr.c17
-rw-r--r--fs/nfs/client.c253
-rw-r--r--fs/nfs/delegation.c57
-rw-r--r--fs/nfs/delegation.h3
-rw-r--r--fs/nfs/dir.c10
-rw-r--r--fs/nfs/direct.c7
-rw-r--r--fs/nfs/dns_resolve.c130
-rw-r--r--fs/nfs/dns_resolve.h14
-rw-r--r--fs/nfs/file.c3
-rw-r--r--fs/nfs/fscache.c2
-rw-r--r--fs/nfs/getroot.c7
-rw-r--r--fs/nfs/idmap.c721
-rw-r--r--fs/nfs/inode.c116
-rw-r--r--fs/nfs/internal.h23
-rw-r--r--fs/nfs/mount_clnt.c16
-rw-r--r--fs/nfs/namespace.c98
-rw-r--r--fs/nfs/netns.h27
-rw-r--r--fs/nfs/nfs2xdr.c2
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs3proc.c24
-rw-r--r--fs/nfs/nfs3xdr.c4
-rw-r--r--fs/nfs/nfs4_fs.h66
-rw-r--r--fs/nfs/nfs4filelayout.c271
-rw-r--r--fs/nfs/nfs4filelayout.h7
-rw-r--r--fs/nfs/nfs4filelayoutdev.c92
-rw-r--r--fs/nfs/nfs4namespace.c96
-rw-r--r--fs/nfs/nfs4proc.c660
-rw-r--r--fs/nfs/nfs4state.c350
-rw-r--r--fs/nfs/nfs4xdr.c750
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfs/objlayout/objio_osd.c54
-rw-r--r--fs/nfs/objlayout/objlayout.c144
-rw-r--r--fs/nfs/objlayout/objlayout.h2
-rw-r--r--fs/nfs/pagelist.c92
-rw-r--r--fs/nfs/pnfs.c48
-rw-r--r--fs/nfs/pnfs.h98
-rw-r--r--fs/nfs/pnfs_dev.c4
-rw-r--r--fs/nfs/proc.c24
-rw-r--r--fs/nfs/read.c15
-rw-r--r--fs/nfs/super.c172
-rw-r--r--fs/nfs/unlink.c45
-rw-r--r--fs/nfs/write.c214
-rw-r--r--fs/nfsd/current_stateid.h28
-rw-r--r--fs/nfsd/export.c2
-rw-r--r--fs/nfsd/fault_inject.c2
-rw-r--r--fs/nfsd/netns.h34
-rw-r--r--fs/nfsd/nfs4callback.c27
-rw-r--r--fs/nfsd/nfs4idmap.c53
-rw-r--r--fs/nfsd/nfs4proc.c118
-rw-r--r--fs/nfsd/nfs4recover.c647
-rw-r--r--fs/nfsd/nfs4state.c367
-rw-r--r--fs/nfsd/nfs4xdr.c132
-rw-r--r--fs/nfsd/nfsctl.c28
-rw-r--r--fs/nfsd/nfsd.h7
-rw-r--r--fs/nfsd/nfssvc.c48
-rw-r--r--fs/nfsd/state.h47
-rw-r--r--fs/nfsd/stats.c5
-rw-r--r--fs/nfsd/vfs.c44
-rw-r--r--fs/nfsd/vfs.h2
-rw-r--r--fs/nfsd/xdr4.h34
-rw-r--r--fs/nilfs2/cpfile.c94
-rw-r--r--fs/nilfs2/dat.c38
-rw-r--r--fs/nilfs2/dir.c4
-rw-r--r--fs/nilfs2/ifile.c4
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/namei.c11
-rw-r--r--fs/nilfs2/page.c8
-rw-r--r--fs/nilfs2/recovery.c4
-rw-r--r--fs/nilfs2/segbuf.c4
-rw-r--r--fs/nilfs2/sufile.c68
-rw-r--r--fs/nilfs2/super.c4
-rw-r--r--fs/notify/notification.c3
-rw-r--r--fs/ntfs/aops.c20
-rw-r--r--fs/ntfs/attrib.c20
-rw-r--r--fs/ntfs/file.c16
-rw-r--r--fs/ntfs/layout.h4
-rw-r--r--fs/ntfs/super.c17
-rw-r--r--fs/ocfs2/aops.c16
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c14
-rw-r--r--fs/ocfs2/ioctl.c2
-rw-r--r--fs/ocfs2/super.c51
-rw-r--r--fs/omfs/inode.c6
-rw-r--r--fs/open.c4
-rw-r--r--fs/openpromfs/inode.c3
-rw-r--r--fs/pipe.c9
-rw-r--r--fs/posix_acl.c2
-rw-r--r--fs/proc/array.c119
-rw-r--r--fs/proc/base.c80
-rw-r--r--fs/proc/inode.c17
-rw-r--r--fs/proc/internal.h12
-rw-r--r--fs/proc/kcore.c8
-rw-r--r--fs/proc/namespaces.c6
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/proc/proc_sysctl.c1259
-rw-r--r--fs/proc/root.c9
-rw-r--r--fs/proc/stat.c96
-rw-r--r--fs/proc/task_mmu.c365
-rw-r--r--fs/proc/task_nommu.c69
-rw-r--r--fs/proc/vmcore.c23
-rw-r--r--fs/pstore/inode.c57
-rw-r--r--fs/pstore/platform.c30
-rw-r--r--fs/qnx4/inode.c88
-rw-r--r--fs/qnx4/namei.c9
-rw-r--r--fs/qnx4/qnx4.h2
-rw-r--r--fs/qnx6/Kconfig26
-rw-r--r--fs/qnx6/Makefile7
-rw-r--r--fs/qnx6/README8
-rw-r--r--fs/qnx6/dir.c291
-rw-r--r--fs/qnx6/inode.c698
-rw-r--r--fs/qnx6/namei.c42
-rw-r--r--fs/qnx6/qnx6.h135
-rw-r--r--fs/qnx6/super_mmi.c150
-rw-r--r--fs/quota/dquot.c190
-rw-r--r--fs/quota/quota.c3
-rw-r--r--fs/ramfs/inode.c30
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/readdir.c2
-rw-r--r--fs/reiserfs/acl.h76
-rw-r--r--fs/reiserfs/bitmap.c4
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/do_balan.c2
-rw-r--r--fs/reiserfs/file.c6
-rw-r--r--fs/reiserfs/fix_node.c2
-rw-r--r--fs/reiserfs/hashes.c2
-rw-r--r--fs/reiserfs/ibalance.c2
-rw-r--r--fs/reiserfs/inode.c6
-rw-r--r--fs/reiserfs/ioctl.c2
-rw-r--r--fs/reiserfs/item_ops.c2
-rw-r--r--fs/reiserfs/journal.c3
-rw-r--r--fs/reiserfs/lbalance.c4
-rw-r--r--fs/reiserfs/lock.c2
-rw-r--r--fs/reiserfs/namei.c6
-rw-r--r--fs/reiserfs/objectid.c3
-rw-r--r--fs/reiserfs/prints.c4
-rw-r--r--fs/reiserfs/procfs.c3
-rw-r--r--fs/reiserfs/reiserfs.h2923
-rw-r--r--fs/reiserfs/resize.c3
-rw-r--r--fs/reiserfs/stree.c6
-rw-r--r--fs/reiserfs/super.c12
-rw-r--r--fs/reiserfs/tail_conversion.c6
-rw-r--r--fs/reiserfs/xattr.c6
-rw-r--r--fs/reiserfs/xattr.h122
-rw-r--r--fs/reiserfs/xattr_acl.c6
-rw-r--r--fs/reiserfs/xattr_security.c4
-rw-r--r--fs/reiserfs/xattr_trusted.c4
-rw-r--r--fs/reiserfs/xattr_user.c4
-rw-r--r--fs/romfs/storage.c2
-rw-r--r--fs/romfs/super.c6
-rw-r--r--fs/select.c4
-rw-r--r--fs/seq_file.c114
-rw-r--r--fs/splice.c9
-rw-r--r--fs/squashfs/block.c3
-rw-r--r--fs/squashfs/dir.c7
-rw-r--r--fs/squashfs/file.c8
-rw-r--r--fs/squashfs/namei.c5
-rw-r--r--fs/squashfs/squashfs_fs.h19
-rw-r--r--fs/squashfs/super.c8
-rw-r--r--fs/squashfs/symlink.c4
-rw-r--r--fs/stack.c2
-rw-r--r--fs/stat.c4
-rw-r--r--fs/statfs.c2
-rw-r--r--fs/super.c5
-rw-r--r--fs/sync.c2
-rw-r--r--fs/sysfs/dir.c227
-rw-r--r--fs/sysfs/group.c6
-rw-r--r--fs/sysfs/mount.c5
-rw-r--r--fs/sysfs/sysfs.h17
-rw-r--r--fs/sysv/namei.c12
-rw-r--r--fs/sysv/super.c27
-rw-r--r--fs/sysv/sysv.h1
-rw-r--r--fs/ubifs/debug.c410
-rw-r--r--fs/ubifs/debug.h3
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/ubifs/file.c4
-rw-r--r--fs/ubifs/recovery.c3
-rw-r--r--fs/ubifs/sb.c19
-rw-r--r--fs/ubifs/super.c6
-rw-r--r--fs/ubifs/ubifs.h11
-rw-r--r--fs/udf/balloc.c84
-rw-r--r--fs/udf/file.c4
-rw-r--r--fs/udf/ialloc.c1
-rw-r--r--fs/udf/inode.c20
-rw-r--r--fs/udf/namei.c13
-rw-r--r--fs/udf/super.c11
-rw-r--r--fs/udf/udf_i.h1
-rw-r--r--fs/ufs/inode.c1
-rw-r--r--fs/ufs/namei.c14
-rw-r--r--fs/ufs/super.c8
-rw-r--r--fs/xattr.c42
-rw-r--r--fs/xattr_acl.c2
-rw-r--r--fs/xfs/Makefile3
-rw-r--r--fs/xfs/xfs_alloc.c36
-rw-r--r--fs/xfs/xfs_alloc.h12
-rw-r--r--fs/xfs/xfs_aops.c183
-rw-r--r--fs/xfs/xfs_aops.h4
-rw-r--r--fs/xfs/xfs_attr.c16
-rw-r--r--fs/xfs/xfs_attr_leaf.c40
-rw-r--r--fs/xfs/xfs_bmap.c22
-rw-r--r--fs/xfs/xfs_buf.c17
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_da_btree.c32
-rw-r--r--fs/xfs/xfs_dfrag.c24
-rw-r--r--fs/xfs/xfs_dir2_block.c1
-rw-r--r--fs/xfs/xfs_discard.c61
-rw-r--r--fs/xfs/xfs_dquot.c418
-rw-r--r--fs/xfs/xfs_dquot.h49
-rw-r--r--fs/xfs/xfs_file.c84
-rw-r--r--fs/xfs/xfs_iget.c31
-rw-r--r--fs/xfs/xfs_inode.c94
-rw-r--r--fs/xfs/xfs_inode.h27
-rw-r--r--fs/xfs/xfs_inode_item.c297
-rw-r--r--fs/xfs/xfs_inode_item.h16
-rw-r--r--fs/xfs/xfs_ioctl.c28
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_iomap.c19
-rw-r--r--fs/xfs/xfs_iops.c71
-rw-r--r--fs/xfs/xfs_itable.c24
-rw-r--r--fs/xfs/xfs_log.c615
-rw-r--r--fs/xfs/xfs_log.h16
-rw-r--r--fs/xfs/xfs_log_priv.h28
-rw-r--r--fs/xfs/xfs_log_recover.c6
-rw-r--r--fs/xfs/xfs_mount.c8
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_qm.c628
-rw-r--r--fs/xfs/xfs_qm.h49
-rw-r--r--fs/xfs/xfs_qm_bhv.c42
-rw-r--r--fs/xfs/xfs_qm_stats.c105
-rw-r--r--fs/xfs/xfs_qm_stats.h53
-rw-r--r--fs/xfs/xfs_qm_syscalls.c130
-rw-r--r--fs/xfs/xfs_quota.h2
-rw-r--r--fs/xfs/xfs_quota_priv.h11
-rw-r--r--fs/xfs/xfs_rename.c11
-rw-r--r--fs/xfs/xfs_rtalloc.c9
-rw-r--r--fs/xfs/xfs_sb.h1
-rw-r--r--fs/xfs/xfs_stats.c99
-rw-r--r--fs/xfs/xfs_stats.h10
-rw-r--r--fs/xfs/xfs_super.c204
-rw-r--r--fs/xfs/xfs_super.h8
-rw-r--r--fs/xfs/xfs_sync.c46
-rw-r--r--fs/xfs/xfs_sync.h2
-rw-r--r--fs/xfs/xfs_trace.h106
-rw-r--r--fs/xfs/xfs_trans.c31
-rw-r--r--fs/xfs/xfs_trans_ail.c83
-rw-r--r--fs/xfs/xfs_trans_buf.c25
-rw-r--r--fs/xfs/xfs_trans_dquot.c21
-rw-r--r--fs/xfs/xfs_trans_inode.c8
-rw-r--r--fs/xfs/xfs_trans_priv.h3
-rw-r--r--fs/xfs/xfs_utils.c2
-rw-r--r--fs/xfs/xfs_vnode.h1
-rw-r--r--fs/xfs/xfs_vnodeops.c16
-rw-r--r--fs/xfs/xfs_vnodeops.h3
539 files changed, 26539 insertions, 13592 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 1964f98e74b..b85efa77394 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -594,21 +594,21 @@ static int __init init_v9fs(void)
int err;
pr_info("Installing v9fs 9p2000 file system support\n");
/* TODO: Setup list of registered trasnport modules */
- err = register_filesystem(&v9fs_fs_type);
- if (err < 0) {
- pr_err("Failed to register filesystem\n");
- return err;
- }
err = v9fs_cache_register();
if (err < 0) {
pr_err("Failed to register v9fs for caching\n");
- goto out_fs_unreg;
+ return err;
}
err = v9fs_sysfs_init();
if (err < 0) {
pr_err("Failed to register with sysfs\n");
+ goto out_cache;
+ }
+ err = register_filesystem(&v9fs_fs_type);
+ if (err < 0) {
+ pr_err("Failed to register filesystem\n");
goto out_sysfs_cleanup;
}
@@ -617,8 +617,8 @@ static int __init init_v9fs(void)
out_sysfs_cleanup:
v9fs_sysfs_cleanup();
-out_fs_unreg:
- unregister_filesystem(&v9fs_fs_type);
+out_cache:
+ v9fs_cache_unregister();
return err;
}
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 7b0cd87b07c..8c92a9ba833 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -155,9 +155,8 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
goto release_sb;
}
- root = d_alloc_root(inode);
+ root = d_make_root(inode);
if (!root) {
- iput(inode);
retval = -ENOMEM;
goto release_sb;
}
@@ -260,7 +259,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
if (v9fs_proto_dotl(v9ses)) {
res = p9_client_statfs(fid, &rs);
if (res == 0) {
- buf->f_type = V9FS_MAGIC;
+ buf->f_type = rs.type;
buf->f_bsize = rs.bsize;
buf->f_blocks = rs.blocks;
buf->f_bfree = rs.bfree;
diff --git a/fs/Kconfig b/fs/Kconfig
index d621f02a3f9..f95ae3a027f 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -4,6 +4,10 @@
menu "File systems"
+# Use unaligned word dcache accesses
+config DCACHE_WORD_ACCESS
+ bool
+
if BLOCK
source "fs/ext2/Kconfig"
@@ -210,6 +214,7 @@ source "fs/minix/Kconfig"
source "fs/omfs/Kconfig"
source "fs/hpfs/Kconfig"
source "fs/qnx4/Kconfig"
+source "fs/qnx6/Kconfig"
source "fs/romfs/Kconfig"
source "fs/pstore/Kconfig"
source "fs/sysv/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 93804d4d66e..2fb97793467 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_UBIFS_FS) += ubifs/
obj-$(CONFIG_AFFS_FS) += affs/
obj-$(CONFIG_ROMFS_FS) += romfs/
obj-$(CONFIG_QNX4FS_FS) += qnx4/
+obj-$(CONFIG_QNX6FS_FS) += qnx6/
obj-$(CONFIG_AUTOFS4_FS) += autofs4/
obj-$(CONFIG_ADFS_FS) += adfs/
obj-$(CONFIG_FUSE_FS) += fuse/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 8e3b36ace30..06fdcc9382c 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -483,10 +483,9 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_d_op = &adfs_dentry_operations;
root = adfs_iget(sb, &root_obj);
- sb->s_root = d_alloc_root(root);
+ sb->s_root = d_make_root(root);
if (!sb->s_root) {
int i;
- iput(root);
for (i = 0; i < asb->s_map_size; i++)
brelse(asb->s_map[i].dm_bh);
kfree(asb->s_map);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 8ba73fed796..0782653a05a 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -473,7 +473,7 @@ got_root:
root_inode = affs_iget(sb, root_block);
if (IS_ERR(root_inode)) {
ret = PTR_ERR(root_inode);
- goto out_error_noinode;
+ goto out_error;
}
if (AFFS_SB(sb)->s_flags & SF_INTL)
@@ -481,7 +481,7 @@ got_root:
else
sb->s_d_op = &affs_dentry_operations;
- sb->s_root = d_alloc_root(root_inode);
+ sb->s_root = d_make_root(root_inode);
if (!sb->s_root) {
printk(KERN_ERR "AFFS: Get root inode failed\n");
goto out_error;
@@ -494,9 +494,6 @@ got_root:
* Begin the cascaded cleanup ...
*/
out_error:
- if (root_inode)
- iput(root_inode);
-out_error_noinode:
kfree(sbi->s_bitmap);
affs_brelse(root_bh);
kfree(sbi->s_prefix);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 14d89fa58fe..8f6e9234d56 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -251,7 +251,7 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
ASSERT(key != NULL);
vnode = AFS_FS_I(mapping->host);
- if (vnode->flags & AFS_VNODE_DELETED) {
+ if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
_leave(" = -ESTALE");
return -ESTALE;
}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 2f213d109c2..b960ff05ea0 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -365,10 +365,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
_debug("extract data");
if (call->count > 0) {
page = call->reply3;
- buffer = kmap_atomic(page, KM_USER0);
+ buffer = kmap_atomic(page);
ret = afs_extract_data(call, skb, last, buffer,
call->count);
- kunmap_atomic(buffer, KM_USER0);
+ kunmap_atomic(buffer);
switch (ret) {
case 0: break;
case -EAGAIN: return 0;
@@ -411,9 +411,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
if (call->count < PAGE_SIZE) {
_debug("clear");
page = call->reply3;
- buffer = kmap_atomic(page, KM_USER0);
+ buffer = kmap_atomic(page);
memset(buffer + call->count, 0, PAGE_SIZE - call->count);
- kunmap_atomic(buffer, KM_USER0);
+ kunmap_atomic(buffer);
}
_leave(" = 0 [done]");
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 8f4ce2658b7..298cf8919ec 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -200,9 +200,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
if (PageError(page))
goto error;
- buf = kmap_atomic(page, KM_USER0);
+ buf = kmap_atomic(page);
memcpy(devname, buf, size);
- kunmap_atomic(buf, KM_USER0);
+ kunmap_atomic(buf);
page_cache_release(page);
page = NULL;
}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 983ec59fc80..f02b31e7e64 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -301,7 +301,6 @@ static int afs_fill_super(struct super_block *sb,
{
struct afs_super_info *as = sb->s_fs_info;
struct afs_fid fid;
- struct dentry *root = NULL;
struct inode *inode = NULL;
int ret;
@@ -327,18 +326,16 @@ static int afs_fill_super(struct super_block *sb,
set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
ret = -ENOMEM;
- root = d_alloc_root(inode);
- if (!root)
+ sb->s_root = d_make_root(inode);
+ if (!sb->s_root)
goto error;
sb->s_d_op = &afs_fs_dentry_operations;
- sb->s_root = root;
_leave(" = 0");
return 0;
error:
- iput(inode);
_leave(" = %d", ret);
return ret;
}
diff --git a/fs/aio.c b/fs/aio.c
index b9d64d89a04..e7f2fad7b4c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -13,7 +13,7 @@
#include <linux/errno.h>
#include <linux/time.h>
#include <linux/aio_abi.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/syscalls.h>
#include <linux/backing-dev.h>
#include <linux/uio.h>
@@ -93,9 +93,8 @@ static void aio_free_ring(struct kioctx *ctx)
put_page(info->ring_pages[i]);
if (info->mmap_size) {
- down_write(&ctx->mm->mmap_sem);
- do_munmap(ctx->mm, info->mmap_base, info->mmap_size);
- up_write(&ctx->mm->mmap_sem);
+ BUG_ON(ctx->mm != current->mm);
+ vm_munmap(info->mmap_base, info->mmap_size);
}
if (info->ring_pages && info->ring_pages != info->internal_pages)
@@ -160,7 +159,7 @@ static int aio_setup_ring(struct kioctx *ctx)
info->nr = nr_events; /* trusted copy */
- ring = kmap_atomic(info->ring_pages[0], KM_USER0);
+ ring = kmap_atomic(info->ring_pages[0]);
ring->nr = nr_events; /* user copy */
ring->id = ctx->user_id;
ring->head = ring->tail = 0;
@@ -168,47 +167,38 @@ static int aio_setup_ring(struct kioctx *ctx)
ring->compat_features = AIO_RING_COMPAT_FEATURES;
ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
ring->header_length = sizeof(struct aio_ring);
- kunmap_atomic(ring, KM_USER0);
+ kunmap_atomic(ring);
return 0;
}
/* aio_ring_event: returns a pointer to the event at the given index from
- * kmap_atomic(, km). Release the pointer with put_aio_ring_event();
+ * kmap_atomic(). Release the pointer with put_aio_ring_event();
*/
#define AIO_EVENTS_PER_PAGE (PAGE_SIZE / sizeof(struct io_event))
#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
-#define aio_ring_event(info, nr, km) ({ \
+#define aio_ring_event(info, nr) ({ \
unsigned pos = (nr) + AIO_EVENTS_OFFSET; \
struct io_event *__event; \
__event = kmap_atomic( \
- (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE], km); \
+ (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE]); \
__event += pos % AIO_EVENTS_PER_PAGE; \
__event; \
})
-#define put_aio_ring_event(event, km) do { \
+#define put_aio_ring_event(event) do { \
struct io_event *__event = (event); \
(void)__event; \
- kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \
+ kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK)); \
} while(0)
static void ctx_rcu_free(struct rcu_head *head)
{
struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
- unsigned nr_events = ctx->max_reqs;
-
kmem_cache_free(kioctx_cachep, ctx);
-
- if (nr_events) {
- spin_lock(&aio_nr_lock);
- BUG_ON(aio_nr - nr_events > aio_nr);
- aio_nr -= nr_events;
- spin_unlock(&aio_nr_lock);
- }
}
/* __put_ioctx
@@ -217,13 +207,19 @@ static void ctx_rcu_free(struct rcu_head *head)
*/
static void __put_ioctx(struct kioctx *ctx)
{
+ unsigned nr_events = ctx->max_reqs;
BUG_ON(ctx->reqs_active);
- cancel_delayed_work(&ctx->wq);
- cancel_work_sync(&ctx->wq.work);
+ cancel_delayed_work_sync(&ctx->wq);
aio_free_ring(ctx);
mmdrop(ctx->mm);
ctx->mm = NULL;
+ if (nr_events) {
+ spin_lock(&aio_nr_lock);
+ BUG_ON(aio_nr - nr_events > aio_nr);
+ aio_nr -= nr_events;
+ spin_unlock(&aio_nr_lock);
+ }
pr_debug("__put_ioctx: freeing %p\n", ctx);
call_rcu(&ctx->rcu_head, ctx_rcu_free);
}
@@ -247,7 +243,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
{
struct mm_struct *mm;
struct kioctx *ctx;
- int did_sync = 0;
+ int err = -ENOMEM;
/* Prevent overflows */
if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
@@ -256,7 +252,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
return ERR_PTR(-EINVAL);
}
- if ((unsigned long)nr_events > aio_max_nr)
+ if (!nr_events || (unsigned long)nr_events > aio_max_nr)
return ERR_PTR(-EAGAIN);
ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
@@ -280,25 +276,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
goto out_freectx;
/* limit the number of system wide aios */
- do {
- spin_lock_bh(&aio_nr_lock);
- if (aio_nr + nr_events > aio_max_nr ||
- aio_nr + nr_events < aio_nr)
- ctx->max_reqs = 0;
- else
- aio_nr += ctx->max_reqs;
- spin_unlock_bh(&aio_nr_lock);
- if (ctx->max_reqs || did_sync)
- break;
-
- /* wait for rcu callbacks to have completed before giving up */
- synchronize_rcu();
- did_sync = 1;
- ctx->max_reqs = nr_events;
- } while (1);
-
- if (ctx->max_reqs == 0)
+ spin_lock(&aio_nr_lock);
+ if (aio_nr + nr_events > aio_max_nr ||
+ aio_nr + nr_events < aio_nr) {
+ spin_unlock(&aio_nr_lock);
goto out_cleanup;
+ }
+ aio_nr += ctx->max_reqs;
+ spin_unlock(&aio_nr_lock);
/* now link into global list. */
spin_lock(&mm->ioctx_lock);
@@ -310,27 +295,27 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
return ctx;
out_cleanup:
- __put_ioctx(ctx);
- return ERR_PTR(-EAGAIN);
-
+ err = -EAGAIN;
+ aio_free_ring(ctx);
out_freectx:
mmdrop(mm);
kmem_cache_free(kioctx_cachep, ctx);
- ctx = ERR_PTR(-ENOMEM);
-
- dprintk("aio: error allocating ioctx %p\n", ctx);
- return ctx;
+ dprintk("aio: error allocating ioctx %d\n", err);
+ return ERR_PTR(err);
}
-/* aio_cancel_all
+/* kill_ctx
* Cancels all outstanding aio requests on an aio context. Used
* when the processes owning a context have all exited to encourage
* the rapid destruction of the kioctx.
*/
-static void aio_cancel_all(struct kioctx *ctx)
+static void kill_ctx(struct kioctx *ctx)
{
int (*cancel)(struct kiocb *, struct io_event *);
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
struct io_event res;
+
spin_lock_irq(&ctx->ctx_lock);
ctx->dead = 1;
while (!list_empty(&ctx->active_reqs)) {
@@ -346,15 +331,7 @@ static void aio_cancel_all(struct kioctx *ctx)
spin_lock_irq(&ctx->ctx_lock);
}
}
- spin_unlock_irq(&ctx->ctx_lock);
-}
-
-static void wait_for_all_aios(struct kioctx *ctx)
-{
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
- spin_lock_irq(&ctx->ctx_lock);
if (!ctx->reqs_active)
goto out;
@@ -404,19 +381,24 @@ void exit_aio(struct mm_struct *mm)
ctx = hlist_entry(mm->ioctx_list.first, struct kioctx, list);
hlist_del_rcu(&ctx->list);
- aio_cancel_all(ctx);
-
- wait_for_all_aios(ctx);
- /*
- * Ensure we don't leave the ctx on the aio_wq
- */
- cancel_work_sync(&ctx->wq.work);
+ kill_ctx(ctx);
if (1 != atomic_read(&ctx->users))
printk(KERN_DEBUG
"exit_aio:ioctx still alive: %d %d %d\n",
atomic_read(&ctx->users), ctx->dead,
ctx->reqs_active);
+ /*
+ * We don't need to bother with munmap() here -
+ * exit_mmap(mm) is coming and it'll unmap everything.
+ * Since aio_free_ring() uses non-zero ->mmap_size
+ * as indicator that it needs to unmap the area,
+ * just set it to 0; aio_free_ring() is the only
+ * place that uses ->mmap_size, so it's safe.
+ * That way we get all munmap done to current->mm -
+ * all other callers have ctx->mm == current->mm.
+ */
+ ctx->ring_info.mmap_size = 0;
put_ioctx(ctx);
}
}
@@ -920,7 +902,7 @@ static void aio_kick_handler(struct work_struct *work)
unuse_mm(mm);
set_fs(oldfs);
/*
- * we're in a worker thread already, don't use queue_delayed_work,
+ * we're in a worker thread already; no point using non-zero delay
*/
if (requeue)
queue_delayed_work(aio_wq, &ctx->wq, 0);
@@ -1019,10 +1001,10 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
if (kiocbIsCancelled(iocb))
goto put_rq;
- ring = kmap_atomic(info->ring_pages[0], KM_IRQ1);
+ ring = kmap_atomic(info->ring_pages[0]);
tail = info->tail;
- event = aio_ring_event(info, tail, KM_IRQ0);
+ event = aio_ring_event(info, tail);
if (++tail >= info->nr)
tail = 0;
@@ -1043,8 +1025,8 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
info->tail = tail;
ring->tail = tail;
- put_aio_ring_event(event, KM_IRQ0);
- kunmap_atomic(ring, KM_IRQ1);
+ put_aio_ring_event(event);
+ kunmap_atomic(ring);
pr_debug("added to ring %p at [%lu]\n", iocb, tail);
@@ -1089,7 +1071,7 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
unsigned long head;
int ret = 0;
- ring = kmap_atomic(info->ring_pages[0], KM_USER0);
+ ring = kmap_atomic(info->ring_pages[0]);
dprintk("in aio_read_evt h%lu t%lu m%lu\n",
(unsigned long)ring->head, (unsigned long)ring->tail,
(unsigned long)ring->nr);
@@ -1101,18 +1083,18 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
head = ring->head % info->nr;
if (head != ring->tail) {
- struct io_event *evp = aio_ring_event(info, head, KM_USER1);
+ struct io_event *evp = aio_ring_event(info, head);
*ent = *evp;
head = (head + 1) % info->nr;
smp_mb(); /* finish reading the event before updatng the head */
ring->head = head;
ret = 1;
- put_aio_ring_event(evp, KM_USER1);
+ put_aio_ring_event(evp);
}
spin_unlock(&info->ring_lock);
out:
- kunmap_atomic(ring, KM_USER0);
+ kunmap_atomic(ring);
dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret,
(unsigned long)ring->head, (unsigned long)ring->tail);
return ret;
@@ -1290,8 +1272,7 @@ static void io_destroy(struct kioctx *ioctx)
if (likely(!was_dead))
put_ioctx(ioctx); /* twice for the list */
- aio_cancel_all(ioctx);
- wait_for_all_aios(ioctx);
+ kill_ctx(ioctx);
/*
* Wake up any waiters. The setting of ctx->dead must be seen
@@ -1299,7 +1280,6 @@ static void io_destroy(struct kioctx *ioctx)
* locking done by the above calls to ensure this consistency.
*/
wake_up_all(&ioctx->wait);
- put_ioctx(ioctx); /* once for the lookup */
}
/* sys_io_setup:
@@ -1336,11 +1316,9 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
ret = PTR_ERR(ioctx);
if (!IS_ERR(ioctx)) {
ret = put_user(ioctx->user_id, ctxp);
- if (!ret) {
- put_ioctx(ioctx);
- return 0;
- }
- io_destroy(ioctx);
+ if (ret)
+ io_destroy(ioctx);
+ put_ioctx(ioctx);
}
out:
@@ -1358,6 +1336,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
struct kioctx *ioctx = lookup_ioctx(ctx);
if (likely(NULL != ioctx)) {
io_destroy(ioctx);
+ put_ioctx(ioctx);
return 0;
}
pr_debug("EINVAL: io_destroy: invalid context id\n");
@@ -1477,6 +1456,10 @@ static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat)
if (ret < 0)
goto out;
+ ret = rw_verify_area(type, kiocb->ki_filp, &kiocb->ki_pos, ret);
+ if (ret < 0)
+ goto out;
+
kiocb->ki_nr_segs = kiocb->ki_nbytes;
kiocb->ki_cur_seg = 0;
/* ki_nbytes/left now reflect bytes instead of segs */
@@ -1488,11 +1471,17 @@ out:
return ret;
}
-static ssize_t aio_setup_single_vector(struct kiocb *kiocb)
+static ssize_t aio_setup_single_vector(int type, struct file * file, struct kiocb *kiocb)
{
+ int bytes;
+
+ bytes = rw_verify_area(type, file, &kiocb->ki_pos, kiocb->ki_left);
+ if (bytes < 0)
+ return bytes;
+
kiocb->ki_iovec = &kiocb->ki_inline_vec;
kiocb->ki_iovec->iov_base = kiocb->ki_buf;
- kiocb->ki_iovec->iov_len = kiocb->ki_left;
+ kiocb->ki_iovec->iov_len = bytes;
kiocb->ki_nr_segs = 1;
kiocb->ki_cur_seg = 0;
return 0;
@@ -1517,10 +1506,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
if (unlikely(!access_ok(VERIFY_WRITE, kiocb->ki_buf,
kiocb->ki_left)))
break;
- ret = security_file_permission(file, MAY_READ);
- if (unlikely(ret))
- break;
- ret = aio_setup_single_vector(kiocb);
+ ret = aio_setup_single_vector(READ, file, kiocb);
if (ret)
break;
ret = -EINVAL;
@@ -1535,10 +1521,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
if (unlikely(!access_ok(VERIFY_READ, kiocb->ki_buf,
kiocb->ki_left)))
break;
- ret = security_file_permission(file, MAY_WRITE);
- if (unlikely(ret))
- break;
- ret = aio_setup_single_vector(kiocb);
+ ret = aio_setup_single_vector(WRITE, file, kiocb);
if (ret)
break;
ret = -EINVAL;
@@ -1549,9 +1532,6 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_READ)))
break;
- ret = security_file_permission(file, MAY_READ);
- if (unlikely(ret))
- break;
ret = aio_setup_vectored_rw(READ, kiocb, compat);
if (ret)
break;
@@ -1563,9 +1543,6 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_WRITE)))
break;
- ret = security_file_permission(file, MAY_WRITE);
- if (unlikely(ret))
- break;
ret = aio_setup_vectored_rw(WRITE, kiocb, compat);
if (ret)
break;
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index f11e43ed907..28d39fb84ae 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -39,19 +39,6 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
.d_dname = anon_inodefs_dname,
};
-static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
-{
- return mount_pseudo(fs_type, "anon_inode:", NULL,
- &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
-}
-
-static struct file_system_type anon_inode_fs_type = {
- .name = "anon_inodefs",
- .mount = anon_inodefs_mount,
- .kill_sb = kill_anon_super,
-};
-
/*
* nop .set_page_dirty method so that people can use .page_mkwrite on
* anon inodes.
@@ -65,6 +52,62 @@ static const struct address_space_operations anon_aops = {
.set_page_dirty = anon_set_page_dirty,
};
+/*
+ * A single inode exists for all anon_inode files. Contrary to pipes,
+ * anon_inode inodes have no associated per-instance data, so we need
+ * only allocate one of them.
+ */
+static struct inode *anon_inode_mkinode(struct super_block *s)
+{
+ struct inode *inode = new_inode_pseudo(s);
+
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ inode->i_ino = get_next_ino();
+ inode->i_fop = &anon_inode_fops;
+
+ inode->i_mapping->a_ops = &anon_aops;
+
+ /*
+ * Mark the inode dirty from the very beginning,
+ * that way it will never be moved to the dirty
+ * list because mark_inode_dirty() will think
+ * that it already _is_ on the dirty list.
+ */
+ inode->i_state = I_DIRTY;
+ inode->i_mode = S_IRUSR | S_IWUSR;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
+ inode->i_flags |= S_PRIVATE;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ return inode;
+}
+
+static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+{
+ struct dentry *root;
+ root = mount_pseudo(fs_type, "anon_inode:", NULL,
+ &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
+ if (!IS_ERR(root)) {
+ struct super_block *s = root->d_sb;
+ anon_inode_inode = anon_inode_mkinode(s);
+ if (IS_ERR(anon_inode_inode)) {
+ dput(root);
+ deactivate_locked_super(s);
+ root = ERR_CAST(anon_inode_inode);
+ }
+ }
+ return root;
+}
+
+static struct file_system_type anon_inode_fs_type = {
+ .name = "anon_inodefs",
+ .mount = anon_inodefs_mount,
+ .kill_sb = kill_anon_super,
+};
+
/**
* anon_inode_getfile - creates a new file instance by hooking it up to an
* anonymous inode, and a dentry that describe the "class"
@@ -180,38 +223,6 @@ err_put_unused_fd:
}
EXPORT_SYMBOL_GPL(anon_inode_getfd);
-/*
- * A single inode exists for all anon_inode files. Contrary to pipes,
- * anon_inode inodes have no associated per-instance data, so we need
- * only allocate one of them.
- */
-static struct inode *anon_inode_mkinode(void)
-{
- struct inode *inode = new_inode_pseudo(anon_inode_mnt->mnt_sb);
-
- if (!inode)
- return ERR_PTR(-ENOMEM);
-
- inode->i_ino = get_next_ino();
- inode->i_fop = &anon_inode_fops;
-
- inode->i_mapping->a_ops = &anon_aops;
-
- /*
- * Mark the inode dirty from the very beginning,
- * that way it will never be moved to the dirty
- * list because mark_inode_dirty() will think
- * that it already _is_ on the dirty list.
- */
- inode->i_state = I_DIRTY;
- inode->i_mode = S_IRUSR | S_IWUSR;
- inode->i_uid = current_fsuid();
- inode->i_gid = current_fsgid();
- inode->i_flags |= S_PRIVATE;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- return inode;
-}
-
static int __init anon_inode_init(void)
{
int error;
@@ -224,16 +235,8 @@ static int __init anon_inode_init(void)
error = PTR_ERR(anon_inode_mnt);
goto err_unregister_filesystem;
}
- anon_inode_inode = anon_inode_mkinode();
- if (IS_ERR(anon_inode_inode)) {
- error = PTR_ERR(anon_inode_inode);
- goto err_mntput;
- }
-
return 0;
-err_mntput:
- kern_unmount(anon_inode_mnt);
err_unregister_filesystem:
unregister_filesystem(&anon_inode_fs_type);
err_exit:
diff --git a/fs/attr.c b/fs/attr.c
index 95053ad8abc..73f69a6ce9e 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -5,7 +5,7 @@
* changes by Thomas Schoebel-Theuer
*/
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/string.h>
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index d06d95a5c63..aa9103f8f01 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -230,7 +230,7 @@ static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
fdt = files_fdtable(files);
BUG_ON(fdt->fd[fd] != NULL);
rcu_assign_pointer(fdt->fd[fd], file);
- FD_SET(fd, fdt->close_on_exec);
+ __set_close_on_exec(fd, fdt);
spin_unlock(&files->file_lock);
}
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index c038727b405..cddc74b9cdb 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -31,11 +31,11 @@ static int __init init_autofs4_fs(void)
{
int err;
+ autofs_dev_ioctl_init();
+
err = register_filesystem(&autofs_fs_type);
if (err)
- return err;
-
- autofs_dev_ioctl_init();
+ autofs_dev_ioctl_exit();
return err;
}
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 9ef53a68ea0..6e488ebe778 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -245,12 +245,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
if (!ino)
goto fail_free;
root_inode = autofs4_get_inode(s, S_IFDIR | 0755);
- if (!root_inode)
- goto fail_ino;
-
- root = d_alloc_root(root_inode);
+ root = d_make_root(root_inode);
if (!root)
- goto fail_iput;
+ goto fail_ino;
pipe = NULL;
root->d_fsdata = ino;
@@ -315,9 +312,6 @@ fail_fput:
fail_dput:
dput(root);
goto fail_free;
-fail_iput:
- printk("autofs: get root dentry failed\n");
- iput(root_inode);
fail_ino:
kfree(ino);
fail_free:
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index f624cd0ff84..da8876d38a7 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -91,7 +91,7 @@ static int autofs4_write(struct autofs_sb_info *sbi,
return (bytes > 0);
}
-
+
static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
struct autofs_wait_queue *wq,
int type)
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 22e9a78872f..37268c5bb98 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -9,7 +9,7 @@
*/
#include <linux/fs.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/stat.h>
#include <linux/time.h>
#include <linux/namei.h>
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 6e6d536767f..e18da23d42b 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -852,9 +852,8 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
ret = PTR_ERR(root);
goto unacquire_priv_sbp;
}
- sb->s_root = d_alloc_root(root);
+ sb->s_root = d_make_root(root);
if (!sb->s_root) {
- iput(root);
befs_error(sb, "get root inode failed");
goto unacquire_priv_sbp;
}
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index b0391bc402b..e23dc7c8b88 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -367,9 +367,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
ret = PTR_ERR(inode);
goto out2;
}
- s->s_root = d_alloc_root(inode);
+ s->s_root = d_make_root(inode);
if (!s->s_root) {
- iput(inode);
ret = -ENOMEM;
goto out2;
}
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 1ff94054d35..d146e181d10 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -26,7 +26,6 @@
#include <linux/coredump.h>
#include <linux/slab.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/a.out-core.h>
@@ -51,9 +50,7 @@ static int set_brk(unsigned long start, unsigned long end)
end = PAGE_ALIGN(end);
if (end > start) {
unsigned long addr;
- down_write(&current->mm->mmap_sem);
- addr = do_brk(start, end - start);
- up_write(&current->mm->mmap_sem);
+ addr = vm_brk(start, end - start);
if (BAD_ADDR(addr))
return addr;
}
@@ -267,7 +264,6 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
}
install_exec_creds(bprm);
- current->flags &= ~PF_FORKNOEXEC;
if (N_MAGIC(ex) == OMAGIC) {
unsigned long text_addr, map_size;
@@ -282,9 +278,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
pos = 32;
map_size = ex.a_text+ex.a_data;
#endif
- down_write(&current->mm->mmap_sem);
- error = do_brk(text_addr & PAGE_MASK, map_size);
- up_write(&current->mm->mmap_sem);
+ error = vm_brk(text_addr & PAGE_MASK, map_size);
if (error != (text_addr & PAGE_MASK)) {
send_sig(SIGKILL, current, 0);
return error;
@@ -315,9 +309,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
loff_t pos = fd_offset;
- down_write(&current->mm->mmap_sem);
- do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
- up_write(&current->mm->mmap_sem);
+ vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
bprm->file->f_op->read(bprm->file,
(char __user *)N_TXTADDR(ex),
ex.a_text+ex.a_data, &pos);
@@ -327,24 +319,20 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
goto beyond_if;
}
- down_write(&current->mm->mmap_sem);
- error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
+ error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
PROT_READ | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
fd_offset);
- up_write(&current->mm->mmap_sem);
if (error != N_TXTADDR(ex)) {
send_sig(SIGKILL, current, 0);
return error;
}
- down_write(&current->mm->mmap_sem);
- error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
+ error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
fd_offset + ex.a_text);
- up_write(&current->mm->mmap_sem);
if (error != N_DATADDR(ex)) {
send_sig(SIGKILL, current, 0);
return error;
@@ -414,9 +402,7 @@ static int load_aout_library(struct file *file)
"N_TXTOFF is not page aligned. Please convert library: %s\n",
file->f_path.dentry->d_name.name);
}
- down_write(&current->mm->mmap_sem);
- do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
- up_write(&current->mm->mmap_sem);
+ vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
file->f_op->read(file, (char __user *)start_addr,
ex.a_text + ex.a_data, &pos);
@@ -427,12 +413,10 @@ static int load_aout_library(struct file *file)
goto out;
}
/* Now use mmap to map the library into memory. */
- down_write(&current->mm->mmap_sem);
- error = do_mmap(file, start_addr, ex.a_text + ex.a_data,
+ error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
N_TXTOFF(ex));
- up_write(&current->mm->mmap_sem);
retval = error;
if (error != start_addr)
goto out;
@@ -440,9 +424,7 @@ static int load_aout_library(struct file *file)
len = PAGE_ALIGN(ex.a_text + ex.a_data);
bss = ex.a_text + ex.a_data + ex.a_bss;
if (bss > len) {
- down_write(&current->mm->mmap_sem);
- error = do_brk(start_addr + len, bss - len);
- up_write(&current->mm->mmap_sem);
+ error = vm_brk(start_addr + len, bss - len);
retval = error;
if (error != start_addr + len)
goto out;
@@ -454,7 +436,8 @@ out:
static int __init init_aout_binfmt(void)
{
- return register_binfmt(&aout_format);
+ register_binfmt(&aout_format);
+ return 0;
}
static void __exit exit_aout_binfmt(void)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index fa7483fc4a4..16f73541707 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -35,6 +35,7 @@
#include <asm/uaccess.h>
#include <asm/param.h>
#include <asm/page.h>
+#include <asm/exec.h>
static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
static int load_elf_library(struct file *);
@@ -81,9 +82,7 @@ static int set_brk(unsigned long start, unsigned long end)
end = ELF_PAGEALIGN(end);
if (end > start) {
unsigned long addr;
- down_write(&current->mm->mmap_sem);
- addr = do_brk(start, end - start);
- up_write(&current->mm->mmap_sem);
+ addr = vm_brk(start, end - start);
if (BAD_ADDR(addr))
return addr;
}
@@ -513,9 +512,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
/* Map the last of the bss segment */
- down_write(&current->mm->mmap_sem);
- error = do_brk(elf_bss, last_bss - elf_bss);
- up_write(&current->mm->mmap_sem);
+ error = vm_brk(elf_bss, last_bss - elf_bss);
if (BAD_ADDR(error))
goto out_close;
}
@@ -711,17 +708,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
if (retval)
goto out_free_dentry;
-#ifdef CONFIG_X86_32
- /*
- * Turn off the CS limit completely if exec-shield disabled or
- * NX active:
- */
- if (disable_nx || executable_stack != EXSTACK_DISABLE_X || (__supported_pte_mask & _PAGE_NX))
- arch_add_exec_range(current->mm, -1);
-#endif
-
/* OK, This is the point of no return */
- current->flags &= ~PF_FORKNOEXEC;
current->mm->def_flags = def_flags;
/* Do this immediately, since STACK_TOP as used in setup_arg_pages
@@ -943,7 +930,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
install_exec_creds(bprm);
- current->flags &= ~PF_FORKNOEXEC;
retval = create_elf_tables(bprm, &loc->elf_ex,
load_addr, interp_load_addr);
if (retval < 0) {
@@ -972,10 +958,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
and some applications "depend" upon this behavior.
Since we do not have the power to recompile these, we
emulate the SVr4 behavior. Sigh. */
- down_write(&current->mm->mmap_sem);
- error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
+ error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE, 0);
- up_write(&current->mm->mmap_sem);
}
#ifdef ELF_PLAT_INIT
@@ -1060,8 +1044,7 @@ static int load_elf_library(struct file *file)
eppnt++;
/* Now use mmap to map the library into memory. */
- down_write(&current->mm->mmap_sem);
- error = do_mmap(file,
+ error = vm_mmap(file,
ELF_PAGESTART(eppnt->p_vaddr),
(eppnt->p_filesz +
ELF_PAGEOFFSET(eppnt->p_vaddr)),
@@ -1069,7 +1052,6 @@ static int load_elf_library(struct file *file)
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
(eppnt->p_offset -
ELF_PAGEOFFSET(eppnt->p_vaddr)));
- up_write(&current->mm->mmap_sem);
if (error != ELF_PAGESTART(eppnt->p_vaddr))
goto out_free_ph;
@@ -1082,11 +1064,8 @@ static int load_elf_library(struct file *file)
len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
ELF_MIN_ALIGN - 1);
bss = eppnt->p_memsz + eppnt->p_vaddr;
- if (bss > len) {
- down_write(&current->mm->mmap_sem);
- do_brk(len, bss - len);
- up_write(&current->mm->mmap_sem);
- }
+ if (bss > len)
+ vm_brk(len, bss - len);
error = 0;
out_free_ph:
@@ -1104,6 +1083,29 @@ out:
*/
/*
+ * The purpose of always_dump_vma() is to make sure that special kernel mappings
+ * that are useful for post-mortem analysis are included in every core dump.
+ * In that way we ensure that the core dump is fully interpretable later
+ * without matching up the same kernel and hardware config to see what PC values
+ * meant. These special mappings include - vDSO, vsyscall, and other
+ * architecture specific mappings
+ */
+static bool always_dump_vma(struct vm_area_struct *vma)
+{
+ /* Any vsyscall mappings? */
+ if (vma == get_gate_vma(vma->vm_mm))
+ return true;
+ /*
+ * arch_vma_name() returns non-NULL for special architecture mappings,
+ * such as vDSO sections.
+ */
+ if (arch_vma_name(vma))
+ return true;
+
+ return false;
+}
+
+/*
* Decide what to dump of a segment, part, all or none.
*/
static unsigned long vma_dump_size(struct vm_area_struct *vma,
@@ -1111,10 +1113,13 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
{
#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
- /* The vma can be set up to tell us the answer directly. */
- if (vma->vm_flags & VM_ALWAYSDUMP)
+ /* always dump the vdso and vsyscall sections */
+ if (always_dump_vma(vma))
goto whole;
+ if (vma->vm_flags & VM_NODUMP)
+ return 0;
+
/* Hugetlb memory check */
if (vma->vm_flags & VM_HUGETLB) {
if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
@@ -1399,6 +1404,22 @@ static void do_thread_regset_writeback(struct task_struct *task,
regset->writeback(task, regset, 1);
}
+#ifndef PR_REG_SIZE
+#define PR_REG_SIZE(S) sizeof(S)
+#endif
+
+#ifndef PRSTATUS_SIZE
+#define PRSTATUS_SIZE(S) sizeof(S)
+#endif
+
+#ifndef PR_REG_PTR
+#define PR_REG_PTR(S) (&((S)->pr_reg))
+#endif
+
+#ifndef SET_PR_FPVALID
+#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
+#endif
+
static int fill_thread_core_info(struct elf_thread_core_info *t,
const struct user_regset_view *view,
long signr, size_t *total)
@@ -1413,11 +1434,11 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
*/
fill_prstatus(&t->prstatus, t->task, signr);
(void) view->regsets[0].get(t->task, &view->regsets[0],
- 0, sizeof(t->prstatus.pr_reg),
- &t->prstatus.pr_reg, NULL);
+ 0, PR_REG_SIZE(t->prstatus.pr_reg),
+ PR_REG_PTR(&t->prstatus), NULL);
fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
- sizeof(t->prstatus), &t->prstatus);
+ PRSTATUS_SIZE(t->prstatus), &t->prstatus);
*total += notesize(&t->notes[0]);
do_thread_regset_writeback(t->task, &view->regsets[0]);
@@ -1447,7 +1468,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
regset->core_note_type,
size, data);
else {
- t->prstatus.pr_fpvalid = 1;
+ SET_PR_FPVALID(&t->prstatus, 1);
fill_note(&t->notes[i], "CORE",
NT_PRFPREG, size, data);
}
@@ -2086,7 +2107,8 @@ out:
static int __init init_elf_binfmt(void)
{
- return register_binfmt(&elf_format);
+ register_binfmt(&elf_format);
+ return 0;
}
static void __exit exit_elf_binfmt(void)
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 30745f459fa..d390a0fffc6 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -39,6 +39,7 @@
#include <asm/uaccess.h>
#include <asm/param.h>
#include <asm/pgalloc.h>
+#include <asm/exec.h>
typedef char *elf_caddr_t;
@@ -91,7 +92,8 @@ static struct linux_binfmt elf_fdpic_format = {
static int __init init_elf_fdpic_binfmt(void)
{
- return register_binfmt(&elf_fdpic_format);
+ register_binfmt(&elf_fdpic_format);
+ return 0;
}
static void __exit exit_elf_fdpic_binfmt(void)
@@ -334,8 +336,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
current->mm->context.exec_fdpic_loadmap = 0;
current->mm->context.interp_fdpic_loadmap = 0;
- current->flags &= ~PF_FORKNOEXEC;
-
#ifdef CONFIG_MMU
elf_fdpic_arch_lay_out_mm(&exec_params,
&interp_params,
@@ -390,21 +390,17 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
(executable_stack == EXSTACK_DEFAULT && VM_STACK_FLAGS & VM_EXEC))
stack_prot |= PROT_EXEC;
- down_write(&current->mm->mmap_sem);
- current->mm->start_brk = do_mmap(NULL, 0, stack_size, stack_prot,
+ current->mm->start_brk = vm_mmap(NULL, 0, stack_size, stack_prot,
MAP_PRIVATE | MAP_ANONYMOUS |
MAP_UNINITIALIZED | MAP_GROWSDOWN,
0);
if (IS_ERR_VALUE(current->mm->start_brk)) {
- up_write(&current->mm->mmap_sem);
retval = current->mm->start_brk;
current->mm->start_brk = 0;
goto error_kill;
}
- up_write(&current->mm->mmap_sem);
-
current->mm->brk = current->mm->start_brk;
current->mm->context.end_brk = current->mm->start_brk;
current->mm->context.end_brk +=
@@ -413,7 +409,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
#endif
install_exec_creds(bprm);
- current->flags &= ~PF_FORKNOEXEC;
if (create_elf_fdpic_tables(bprm, current->mm,
&exec_params, &interp_params) < 0)
goto error_kill;
@@ -956,10 +951,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
if (params->flags & ELF_FDPIC_FLAG_EXECUTABLE)
mflags |= MAP_EXECUTABLE;
- down_write(&mm->mmap_sem);
- maddr = do_mmap(NULL, load_addr, top - base,
+ maddr = vm_mmap(NULL, load_addr, top - base,
PROT_READ | PROT_WRITE | PROT_EXEC, mflags, 0);
- up_write(&mm->mmap_sem);
if (IS_ERR_VALUE(maddr))
return (int) maddr;
@@ -1097,10 +1090,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
/* create the mapping */
disp = phdr->p_vaddr & ~PAGE_MASK;
- down_write(&mm->mmap_sem);
- maddr = do_mmap(file, maddr, phdr->p_memsz + disp, prot, flags,
+ maddr = vm_mmap(file, maddr, phdr->p_memsz + disp, prot, flags,
phdr->p_offset - disp);
- up_write(&mm->mmap_sem);
kdebug("mmap[%d] <file> sz=%lx pr=%x fl=%x of=%lx --> %08lx",
loop, phdr->p_memsz + disp, prot, flags,
@@ -1144,10 +1135,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
unsigned long xmaddr;
flags |= MAP_FIXED | MAP_ANONYMOUS;
- down_write(&mm->mmap_sem);
- xmaddr = do_mmap(NULL, xaddr, excess - excess1,
+ xmaddr = vm_mmap(NULL, xaddr, excess - excess1,
prot, flags, 0);
- up_write(&mm->mmap_sem);
kdebug("mmap[%d] <anon>"
" ad=%lx sz=%lx pr=%x fl=%x of=0 --> %08lx",
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index b8e8b0acf9b..2790c7e1912 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -100,7 +100,8 @@ static struct linux_binfmt em86_format = {
static int __init init_em86_binfmt(void)
{
- return register_binfmt(&em86_format);
+ register_binfmt(&em86_format);
+ return 0;
}
static void __exit exit_em86_binfmt(void)
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 1bffbe0ed77..6b2daf99fab 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -15,7 +15,7 @@
* JAN/99 -- coded full program relocation (gerg@snapgear.com)
*/
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
@@ -37,7 +37,6 @@
#include <linux/syscalls.h>
#include <asm/byteorder.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <asm/cacheflush.h>
@@ -543,10 +542,8 @@ static int load_flat_file(struct linux_binprm * bprm,
*/
DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n");
- down_write(&current->mm->mmap_sem);
- textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
+ textpos = vm_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
MAP_PRIVATE|MAP_EXECUTABLE, 0);
- up_write(&current->mm->mmap_sem);
if (!textpos || IS_ERR_VALUE(textpos)) {
if (!textpos)
textpos = (unsigned long) -ENOMEM;
@@ -557,10 +554,8 @@ static int load_flat_file(struct linux_binprm * bprm,
len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
len = PAGE_ALIGN(len);
- down_write(&current->mm->mmap_sem);
- realdatastart = do_mmap(0, 0, len,
+ realdatastart = vm_mmap(0, 0, len,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
- up_write(&current->mm->mmap_sem);
if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) {
if (!realdatastart)
@@ -604,10 +599,8 @@ static int load_flat_file(struct linux_binprm * bprm,
len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
len = PAGE_ALIGN(len);
- down_write(&current->mm->mmap_sem);
- textpos = do_mmap(0, 0, len,
+ textpos = vm_mmap(0, 0, len,
PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
- up_write(&current->mm->mmap_sem);
if (!textpos || IS_ERR_VALUE(textpos)) {
if (!textpos)
@@ -902,7 +895,6 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
libinfo.lib_list[j].start_data:UNLOADED_LIB;
install_exec_creds(bprm);
- current->flags &= ~PF_FORKNOEXEC;
set_binfmt(&flat_format);
@@ -950,7 +942,8 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
static int __init init_flat_binfmt(void)
{
- return register_binfmt(&flat_format);
+ register_binfmt(&flat_format);
+ return 0;
}
/****************************************************************************/
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index a9198dfd5f8..613aa061823 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -19,6 +19,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h>
+#include <linux/magic.h>
#include <linux/binfmts.h>
#include <linux/slab.h>
#include <linux/ctype.h>
@@ -699,7 +700,7 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent)
[3] = {"register", &bm_register_operations, S_IWUSR},
/* last one */ {""}
};
- int err = simple_fill_super(sb, 0x42494e4d, bm_files);
+ int err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
if (!err)
sb->s_op = &s_ops;
return err;
@@ -726,11 +727,8 @@ static struct file_system_type bm_fs_type = {
static int __init init_misc_binfmt(void)
{
int err = register_filesystem(&bm_fs_type);
- if (!err) {
- err = insert_binfmt(&misc_format);
- if (err)
- unregister_filesystem(&bm_fs_type);
- }
+ if (!err)
+ insert_binfmt(&misc_format);
return err;
}
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 396a9884591..d3b8c1f6315 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -105,7 +105,8 @@ static struct linux_binfmt script_format = {
static int __init init_script_binfmt(void)
{
- return register_binfmt(&script_format);
+ register_binfmt(&script_format);
+ return 0;
}
static void __exit exit_script_binfmt(void)
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index cc8560f6c9b..4517aaff61b 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -147,10 +147,8 @@ static int map_som_binary(struct file *file,
code_size = SOM_PAGEALIGN(hpuxhdr->exec_tsize);
current->mm->start_code = code_start;
current->mm->end_code = code_start + code_size;
- down_write(&current->mm->mmap_sem);
- retval = do_mmap(file, code_start, code_size, prot,
+ retval = vm_mmap(file, code_start, code_size, prot,
flags, SOM_PAGESTART(hpuxhdr->exec_tfile));
- up_write(&current->mm->mmap_sem);
if (retval < 0 && retval > -1024)
goto out;
@@ -158,20 +156,16 @@ static int map_som_binary(struct file *file,
data_size = SOM_PAGEALIGN(hpuxhdr->exec_dsize);
current->mm->start_data = data_start;
current->mm->end_data = bss_start = data_start + data_size;
- down_write(&current->mm->mmap_sem);
- retval = do_mmap(file, data_start, data_size,
+ retval = vm_mmap(file, data_start, data_size,
prot | PROT_WRITE, flags,
SOM_PAGESTART(hpuxhdr->exec_dfile));
- up_write(&current->mm->mmap_sem);
if (retval < 0 && retval > -1024)
goto out;
som_brk = bss_start + SOM_PAGEALIGN(hpuxhdr->exec_bsize);
current->mm->start_brk = current->mm->brk = som_brk;
- down_write(&current->mm->mmap_sem);
- retval = do_mmap(NULL, bss_start, som_brk - bss_start,
+ retval = vm_mmap(NULL, bss_start, som_brk - bss_start,
prot | PROT_WRITE, MAP_FIXED | MAP_PRIVATE, 0);
- up_write(&current->mm->mmap_sem);
if (retval > 0 || retval < -1024)
retval = 0;
out:
@@ -225,7 +219,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
goto out_free;
/* OK, This is the point of no return */
- current->flags &= ~PF_FORKNOEXEC;
current->personality = PER_HPUX;
setup_new_exec(bprm);
@@ -289,7 +282,8 @@ static int load_som_library(struct file *f)
static int __init init_som_binfmt(void)
{
- return register_binfmt(&som_format);
+ register_binfmt(&som_format);
+ return 0;
}
static void __exit exit_som_binfmt(void)
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index c2183f3917c..e85c04b9f61 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -357,7 +357,7 @@ static void bio_integrity_generate(struct bio *bio)
bix.sector_size = bi->sector_size;
bio_for_each_segment(bv, bio, i) {
- void *kaddr = kmap_atomic(bv->bv_page, KM_USER0);
+ void *kaddr = kmap_atomic(bv->bv_page);
bix.data_buf = kaddr + bv->bv_offset;
bix.data_size = bv->bv_len;
bix.prot_buf = prot_buf;
@@ -371,7 +371,7 @@ static void bio_integrity_generate(struct bio *bio)
total += sectors * bi->tuple_size;
BUG_ON(total > bio->bi_integrity->bip_size);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
}
}
@@ -498,7 +498,7 @@ static int bio_integrity_verify(struct bio *bio)
bix.sector_size = bi->sector_size;
bio_for_each_segment(bv, bio, i) {
- void *kaddr = kmap_atomic(bv->bv_page, KM_USER0);
+ void *kaddr = kmap_atomic(bv->bv_page);
bix.data_buf = kaddr + bv->bv_offset;
bix.data_size = bv->bv_len;
bix.prot_buf = prot_buf;
@@ -507,7 +507,7 @@ static int bio_integrity_verify(struct bio *bio)
ret = bi->verify_fn(&bix);
if (ret) {
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
return ret;
}
@@ -517,7 +517,7 @@ static int bio_integrity_verify(struct bio *bio)
total += sectors * bi->tuple_size;
BUG_ON(total > bio->bi_integrity->bip_size);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
}
return ret;
diff --git a/fs/bio.c b/fs/bio.c
index b980ecde026..84da8853904 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -22,7 +22,7 @@
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/mempool.h>
#include <linux/workqueue.h>
#include <scsi/sg.h> /* for struct sg_iovec */
@@ -505,9 +505,14 @@ EXPORT_SYMBOL(bio_clone);
int bio_get_nr_vecs(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
- return min_t(unsigned,
+ int nr_pages;
+
+ nr_pages = min_t(unsigned,
queue_max_segments(q),
queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
+
+ return min_t(unsigned, nr_pages, BIO_MAX_PAGES);
+
}
EXPORT_SYMBOL(bio_get_nr_vecs);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5e9f198f771..ba11c30f302 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -16,6 +16,7 @@
#include <linux/blkdev.h>
#include <linux/module.h>
#include <linux/blkpg.h>
+#include <linux/magic.h>
#include <linux/buffer_head.h>
#include <linux/swap.h>
#include <linux/pagevec.h>
@@ -69,7 +70,7 @@ static void bdev_inode_switch_bdi(struct inode *inode,
spin_unlock(&dst->wb.list_lock);
}
-static sector_t max_block(struct block_device *bdev)
+sector_t blkdev_max_block(struct block_device *bdev)
{
sector_t retval = ~((sector_t)0);
loff_t sz = i_size_read(bdev->bd_inode);
@@ -109,7 +110,7 @@ void invalidate_bdev(struct block_device *bdev)
/* 99% of the time, we don't need to flush the cleancache on the bdev.
* But, for the strange corners, lets be cautious
*/
- cleancache_flush_inode(mapping);
+ cleancache_invalidate_inode(mapping);
}
EXPORT_SYMBOL(invalidate_bdev);
@@ -162,7 +163,7 @@ static int
blkdev_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
- if (iblock >= max_block(I_BDEV(inode))) {
+ if (iblock >= blkdev_max_block(I_BDEV(inode))) {
if (create)
return -EIO;
@@ -184,7 +185,7 @@ static int
blkdev_get_blocks(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
- sector_t end_block = max_block(I_BDEV(inode));
+ sector_t end_block = blkdev_max_block(I_BDEV(inode));
unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
if ((iblock + max_blocks) > end_block) {
@@ -506,7 +507,7 @@ static const struct super_operations bdev_sops = {
static struct dentry *bd_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, 0x62646576);
+ return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
}
static struct file_system_type bd_type = {
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 0cc20b35c1c..42704149b72 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -171,11 +171,11 @@ out:
spin_unlock_irqrestore(&workers->lock, flags);
}
-static noinline int run_ordered_completions(struct btrfs_workers *workers,
+static noinline void run_ordered_completions(struct btrfs_workers *workers,
struct btrfs_work *work)
{
if (!workers->ordered)
- return 0;
+ return;
set_bit(WORK_DONE_BIT, &work->flags);
@@ -213,7 +213,6 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
}
spin_unlock(&workers->order_lock);
- return 0;
}
static void put_worker(struct btrfs_worker_thread *worker)
@@ -399,7 +398,7 @@ again:
/*
* this will wait for all the worker threads to shutdown
*/
-int btrfs_stop_workers(struct btrfs_workers *workers)
+void btrfs_stop_workers(struct btrfs_workers *workers)
{
struct list_head *cur;
struct btrfs_worker_thread *worker;
@@ -427,7 +426,6 @@ int btrfs_stop_workers(struct btrfs_workers *workers)
put_worker(worker);
}
spin_unlock_irq(&workers->lock);
- return 0;
}
/*
@@ -615,14 +613,14 @@ found:
* it was taken from. It is intended for use with long running work functions
* that make some progress and want to give the cpu up for others.
*/
-int btrfs_requeue_work(struct btrfs_work *work)
+void btrfs_requeue_work(struct btrfs_work *work)
{
struct btrfs_worker_thread *worker = work->worker;
unsigned long flags;
int wake = 0;
if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
- goto out;
+ return;
spin_lock_irqsave(&worker->lock, flags);
if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
@@ -649,9 +647,6 @@ int btrfs_requeue_work(struct btrfs_work *work)
if (wake)
wake_up_process(worker->task);
spin_unlock_irqrestore(&worker->lock, flags);
-out:
-
- return 0;
}
void btrfs_set_work_high_prio(struct btrfs_work *work)
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index f34cc31fa3c..063698b90ce 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -111,9 +111,9 @@ struct btrfs_workers {
void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
int btrfs_start_workers(struct btrfs_workers *workers);
-int btrfs_stop_workers(struct btrfs_workers *workers);
+void btrfs_stop_workers(struct btrfs_workers *workers);
void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
struct btrfs_workers *async_starter);
-int btrfs_requeue_work(struct btrfs_work *work);
+void btrfs_requeue_work(struct btrfs_work *work);
void btrfs_set_work_high_prio(struct btrfs_work *work);
#endif
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 56136d9046f..bcec0675023 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -22,6 +22,7 @@
#include "ulist.h"
#include "transaction.h"
#include "delayed-ref.h"
+#include "locking.h"
/*
* this structure records all encountered refs on the way up to the root
@@ -893,18 +894,22 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
s64 bytes_left = size - 1;
struct extent_buffer *eb = eb_in;
struct btrfs_key found_key;
+ int leave_spinning = path->leave_spinning;
if (bytes_left >= 0)
dest[bytes_left] = '\0';
+ path->leave_spinning = 1;
while (1) {
len = btrfs_inode_ref_name_len(eb, iref);
bytes_left -= len;
if (bytes_left >= 0)
read_extent_buffer(eb, dest + bytes_left,
(unsigned long)(iref + 1), len);
- if (eb != eb_in)
+ if (eb != eb_in) {
+ btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
+ }
ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
if (ret > 0)
ret = -ENOENT;
@@ -919,8 +924,11 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
slot = path->slots[0];
eb = path->nodes[0];
/* make sure we can use eb after releasing the path */
- if (eb != eb_in)
+ if (eb != eb_in) {
atomic_inc(&eb->refs);
+ btrfs_tree_read_lock(eb);
+ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+ }
btrfs_release_path(path);
iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
@@ -931,6 +939,7 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
}
btrfs_release_path(path);
+ path->leave_spinning = leave_spinning;
if (ret)
return ERR_PTR(ret);
@@ -1247,7 +1256,7 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
struct btrfs_path *path,
iterate_irefs_t *iterate, void *ctx)
{
- int ret;
+ int ret = 0;
int slot;
u32 cur;
u32 len;
@@ -1259,7 +1268,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
struct btrfs_inode_ref *iref;
struct btrfs_key found_key;
- while (1) {
+ while (!ret) {
+ path->leave_spinning = 1;
ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
&found_key);
if (ret < 0)
@@ -1275,6 +1285,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
eb = path->nodes[0];
/* make sure we can use eb after releasing the path */
atomic_inc(&eb->refs);
+ btrfs_tree_read_lock(eb);
+ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
btrfs_release_path(path);
item = btrfs_item_nr(eb, slot);
@@ -1288,13 +1300,12 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
(unsigned long long)found_key.objectid,
(unsigned long long)fs_root->objectid);
ret = iterate(parent, iref, eb, ctx);
- if (ret) {
- free_extent_buffer(eb);
+ if (ret)
break;
- }
len = sizeof(*iref) + name_len;
iref = (struct btrfs_inode_ref *)((char *)iref + len);
}
+ btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
}
@@ -1359,12 +1370,6 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath)
inode_to_path, ipath);
}
-/*
- * allocates space to return multiple file system paths for an inode.
- * total_bytes to allocate are passed, note that space usable for actual path
- * information will be total_bytes - sizeof(struct inode_fs_paths).
- * the returned pointer must be freed with free_ipath() in the end.
- */
struct btrfs_data_container *init_data_container(u32 total_bytes)
{
struct btrfs_data_container *data;
@@ -1420,5 +1425,8 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
void free_ipath(struct inode_fs_paths *ipath)
{
+ if (!ipath)
+ return;
+ kfree(ipath->fspath);
kfree(ipath);
}
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index d986824bb2b..c053e90f200 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -89,7 +89,6 @@
#include "disk-io.h"
#include "transaction.h"
#include "extent_io.h"
-#include "disk-io.h"
#include "volumes.h"
#include "print-tree.h"
#include "locking.h"
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d02c27cd14c..86eff48dab7 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -120,10 +120,10 @@ static int check_compressed_csum(struct inode *inode,
page = cb->compressed_pages[i];
csum = ~(u32)0;
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE);
btrfs_csum_final(csum, (char *)&csum);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
if (csum != *cb_sum) {
printk(KERN_INFO "btrfs csum failed ino %llu "
@@ -226,8 +226,8 @@ out:
* Clear the writeback bits on all of the file
* pages for a compressed write
*/
-static noinline int end_compressed_writeback(struct inode *inode, u64 start,
- unsigned long ram_size)
+static noinline void end_compressed_writeback(struct inode *inode, u64 start,
+ unsigned long ram_size)
{
unsigned long index = start >> PAGE_CACHE_SHIFT;
unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT;
@@ -253,7 +253,6 @@ static noinline int end_compressed_writeback(struct inode *inode, u64 start,
index += ret;
}
/* the inode may be gone now */
- return 0;
}
/*
@@ -392,20 +391,21 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
*/
atomic_inc(&cb->pending_bios);
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
if (!skip_sum) {
ret = btrfs_csum_one_bio(root, inode, bio,
start, 1);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
bio_put(bio);
bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
+ BUG_ON(!bio);
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
@@ -421,15 +421,15 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio_get(bio);
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
if (!skip_sum) {
ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
bio_put(bio);
return 0;
@@ -497,7 +497,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
* sure they map to this compressed extent on disk.
*/
set_page_extent_mapped(page);
- lock_extent(tree, last_offset, end, GFP_NOFS);
+ lock_extent(tree, last_offset, end);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, last_offset,
PAGE_CACHE_SIZE);
@@ -507,7 +507,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
(last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
(em->block_start >> 9) != cb->orig_bio->bi_sector) {
free_extent_map(em);
- unlock_extent(tree, last_offset, end, GFP_NOFS);
+ unlock_extent(tree, last_offset, end);
unlock_page(page);
page_cache_release(page);
break;
@@ -521,10 +521,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
if (zero_offset) {
int zeros;
zeros = PAGE_CACHE_SIZE - zero_offset;
- userpage = kmap_atomic(page, KM_USER0);
+ userpage = kmap_atomic(page);
memset(userpage + zero_offset, 0, zeros);
flush_dcache_page(page);
- kunmap_atomic(userpage, KM_USER0);
+ kunmap_atomic(userpage);
}
}
@@ -535,7 +535,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
nr_pages++;
page_cache_release(page);
} else {
- unlock_extent(tree, last_offset, end, GFP_NOFS);
+ unlock_extent(tree, last_offset, end);
unlock_page(page);
page_cache_release(page);
break;
@@ -662,7 +662,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_get(comp_bio);
ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
/*
* inc the count before we submit the bio so
@@ -675,19 +675,20 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
ret = btrfs_lookup_bio_sums(root, inode,
comp_bio, sums);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
sums += (comp_bio->bi_size + root->sectorsize - 1) /
root->sectorsize;
ret = btrfs_map_bio(root, READ, comp_bio,
mirror_num, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
bio_put(comp_bio);
comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,
GFP_NOFS);
+ BUG_ON(!comp_bio);
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
@@ -698,15 +699,15 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_get(comp_bio);
ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
bio_put(comp_bio);
return 0;
@@ -734,7 +735,7 @@ struct btrfs_compress_op *btrfs_compress_op[] = {
&btrfs_lzo_compress,
};
-int __init btrfs_init_compress(void)
+void __init btrfs_init_compress(void)
{
int i;
@@ -744,7 +745,6 @@ int __init btrfs_init_compress(void)
atomic_set(&comp_alloc_workspace[i], 0);
init_waitqueue_head(&comp_workspace_wait[i]);
}
- return 0;
}
/*
@@ -993,9 +993,9 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
bytes = min(PAGE_CACHE_SIZE - *pg_offset,
PAGE_CACHE_SIZE - buf_offset);
bytes = min(bytes, working_bytes);
- kaddr = kmap_atomic(page_out, KM_USER0);
+ kaddr = kmap_atomic(page_out);
memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
flush_dcache_page(page_out);
*pg_offset += bytes;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index a12059f4f0f..9afb0a62ae8 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -19,7 +19,7 @@
#ifndef __BTRFS_COMPRESSION_
#define __BTRFS_COMPRESSION_
-int btrfs_init_compress(void);
+void btrfs_init_compress(void);
void btrfs_exit_compress(void);
int btrfs_compress_pages(int type, struct address_space *mapping,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0639a555e16..4106264fbc6 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -36,7 +36,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *dst_buf,
struct extent_buffer *src_buf);
-static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_path *path, int level, int slot);
struct btrfs_path *btrfs_alloc_path(void)
@@ -156,10 +156,23 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
{
struct extent_buffer *eb;
- rcu_read_lock();
- eb = rcu_dereference(root->node);
- extent_buffer_get(eb);
- rcu_read_unlock();
+ while (1) {
+ rcu_read_lock();
+ eb = rcu_dereference(root->node);
+
+ /*
+ * RCU really hurts here, we could free up the root node because
+ * it was cow'ed but we may not get the new root node yet so do
+ * the inc_not_zero dance and if it doesn't work then
+ * synchronize_rcu and try again.
+ */
+ if (atomic_inc_not_zero(&eb->refs)) {
+ rcu_read_unlock();
+ break;
+ }
+ rcu_read_unlock();
+ synchronize_rcu();
+ }
return eb;
}
@@ -207,10 +220,12 @@ struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
*/
static void add_root_to_dirty_list(struct btrfs_root *root)
{
+ spin_lock(&root->fs_info->trans_lock);
if (root->track_dirty && list_empty(&root->dirty_list)) {
list_add(&root->dirty_list,
&root->fs_info->dirty_cowonly_roots);
}
+ spin_unlock(&root->fs_info->trans_lock);
}
/*
@@ -331,8 +346,13 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (btrfs_block_can_be_shared(root, buf)) {
ret = btrfs_lookup_extent_info(trans, root, buf->start,
buf->len, &refs, &flags);
- BUG_ON(ret);
- BUG_ON(refs == 0);
+ if (ret)
+ return ret;
+ if (refs == 0) {
+ ret = -EROFS;
+ btrfs_std_error(root->fs_info, ret);
+ return ret;
+ }
} else {
refs = 1;
if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
@@ -351,14 +371,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
ret = btrfs_inc_ref(trans, root, buf, 1, 1);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_dec_ref(trans, root, buf, 0, 1);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
ret = btrfs_inc_ref(trans, root, cow, 1, 1);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
} else {
@@ -368,14 +388,15 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
ret = btrfs_inc_ref(trans, root, cow, 1, 1);
else
ret = btrfs_inc_ref(trans, root, cow, 0, 1);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
if (new_flags != 0) {
ret = btrfs_set_disk_extent_flags(trans, root,
buf->start,
buf->len,
new_flags, 0);
- BUG_ON(ret);
+ if (ret)
+ return ret;
}
} else {
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
@@ -384,9 +405,9 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
ret = btrfs_inc_ref(trans, root, cow, 1, 1);
else
ret = btrfs_inc_ref(trans, root, cow, 0, 1);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
ret = btrfs_dec_ref(trans, root, buf, 1, 1);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
clean_tree_block(trans, root, buf);
*last_ref = 1;
@@ -415,7 +436,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
{
struct btrfs_disk_key disk_key;
struct extent_buffer *cow;
- int level;
+ int level, ret;
int last_ref = 0;
int unlock_orig = 0;
u64 parent_start;
@@ -467,7 +488,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
(unsigned long)btrfs_header_fsid(cow),
BTRFS_FSID_SIZE);
- update_ref_for_cow(trans, root, buf, cow, &last_ref);
+ ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
if (root->ref_cows)
btrfs_reloc_cow_block(trans, root, buf, cow);
@@ -504,7 +529,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
}
if (unlock_orig)
btrfs_tree_unlock(buf);
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
btrfs_mark_buffer_dirty(cow);
*cow_ret = cow;
return 0;
@@ -700,7 +725,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
cur = btrfs_find_tree_block(root, blocknr, blocksize);
if (cur)
- uptodate = btrfs_buffer_uptodate(cur, gen);
+ uptodate = btrfs_buffer_uptodate(cur, gen, 0);
else
uptodate = 0;
if (!cur || !uptodate) {
@@ -934,7 +959,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
/* promote the child to a root */
child = read_node_slot(root, mid, 0);
- BUG_ON(!child);
+ if (!child) {
+ ret = -EROFS;
+ btrfs_std_error(root->fs_info, ret);
+ goto enospc;
+ }
+
btrfs_tree_lock(child);
btrfs_set_lock_blocking(child);
ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
@@ -959,7 +989,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
/* once for the root ptr */
- free_extent_buffer(mid);
+ free_extent_buffer_stale(mid);
return 0;
}
if (btrfs_header_nritems(mid) >
@@ -1010,13 +1040,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (btrfs_header_nritems(right) == 0) {
clean_tree_block(trans, root, right);
btrfs_tree_unlock(right);
- wret = del_ptr(trans, root, path, level + 1, pslot +
- 1);
- if (wret)
- ret = wret;
+ del_ptr(trans, root, path, level + 1, pslot + 1);
root_sub_used(root, right->len);
btrfs_free_tree_block(trans, root, right, 0, 1, 0);
- free_extent_buffer(right);
+ free_extent_buffer_stale(right);
right = NULL;
} else {
struct btrfs_disk_key right_key;
@@ -1035,7 +1062,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
* otherwise we would have pulled some pointers from the
* right
*/
- BUG_ON(!left);
+ if (!left) {
+ ret = -EROFS;
+ btrfs_std_error(root->fs_info, ret);
+ goto enospc;
+ }
wret = balance_node_right(trans, root, mid, left);
if (wret < 0) {
ret = wret;
@@ -1051,12 +1082,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (btrfs_header_nritems(mid) == 0) {
clean_tree_block(trans, root, mid);
btrfs_tree_unlock(mid);
- wret = del_ptr(trans, root, path, level + 1, pslot);
- if (wret)
- ret = wret;
+ del_ptr(trans, root, path, level + 1, pslot);
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
- free_extent_buffer(mid);
+ free_extent_buffer_stale(mid);
mid = NULL;
} else {
/* update the parent key to reflect our changes */
@@ -1331,7 +1360,12 @@ static noinline int reada_for_balance(struct btrfs_root *root,
block1 = btrfs_node_blockptr(parent, slot - 1);
gen = btrfs_node_ptr_generation(parent, slot - 1);
eb = btrfs_find_tree_block(root, block1, blocksize);
- if (eb && btrfs_buffer_uptodate(eb, gen))
+ /*
+ * if we get -eagain from btrfs_buffer_uptodate, we
+ * don't want to return eagain here. That will loop
+ * forever
+ */
+ if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
block1 = 0;
free_extent_buffer(eb);
}
@@ -1339,7 +1373,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
block2 = btrfs_node_blockptr(parent, slot + 1);
gen = btrfs_node_ptr_generation(parent, slot + 1);
eb = btrfs_find_tree_block(root, block2, blocksize);
- if (eb && btrfs_buffer_uptodate(eb, gen))
+ if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
block2 = 0;
free_extent_buffer(eb);
}
@@ -1382,7 +1416,8 @@ static noinline int reada_for_balance(struct btrfs_root *root,
* if lowest_unlock is 1, level 0 won't be unlocked
*/
static noinline void unlock_up(struct btrfs_path *path, int level,
- int lowest_unlock)
+ int lowest_unlock, int min_write_lock_level,
+ int *write_lock_level)
{
int i;
int skip_level = level;
@@ -1414,6 +1449,11 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
btrfs_tree_unlock_rw(t, path->locks[i]);
path->locks[i] = 0;
+ if (write_lock_level &&
+ i > min_write_lock_level &&
+ i <= *write_lock_level) {
+ *write_lock_level = i - 1;
+ }
}
}
}
@@ -1471,8 +1511,9 @@ read_block_for_search(struct btrfs_trans_handle *trans,
tmp = btrfs_find_tree_block(root, blocknr, blocksize);
if (tmp) {
- if (btrfs_buffer_uptodate(tmp, 0)) {
- if (btrfs_buffer_uptodate(tmp, gen)) {
+ /* first we do an atomic uptodate check */
+ if (btrfs_buffer_uptodate(tmp, 0, 1) > 0) {
+ if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
/*
* we found an up to date block without
* sleeping, return
@@ -1490,8 +1531,9 @@ read_block_for_search(struct btrfs_trans_handle *trans,
free_extent_buffer(tmp);
btrfs_set_path_blocking(p);
+ /* now we're allowed to do a blocking uptodate check */
tmp = read_tree_block(root, blocknr, blocksize, gen);
- if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+ if (tmp && btrfs_buffer_uptodate(tmp, gen, 0) > 0) {
*eb_ret = tmp;
return 0;
}
@@ -1526,7 +1568,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
* and give up so that our caller doesn't loop forever
* on our EAGAINs.
*/
- if (!btrfs_buffer_uptodate(tmp, 0))
+ if (!btrfs_buffer_uptodate(tmp, 0, 0))
ret = -EIO;
free_extent_buffer(tmp);
}
@@ -1637,6 +1679,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
/* everything at write_lock_level or lower must be write locked */
int write_lock_level = 0;
u8 lowest_level = 0;
+ int min_write_lock_level;
lowest_level = p->lowest_level;
WARN_ON(lowest_level && ins_len > 0);
@@ -1664,6 +1707,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
if (cow && (p->keep_locks || p->lowest_level))
write_lock_level = BTRFS_MAX_LEVEL;
+ min_write_lock_level = write_lock_level;
+
again:
/*
* we try very hard to do read locks on the root
@@ -1795,7 +1840,8 @@ cow_done:
goto again;
}
- unlock_up(p, level, lowest_unlock);
+ unlock_up(p, level, lowest_unlock,
+ min_write_lock_level, &write_lock_level);
if (level == lowest_level) {
if (dec)
@@ -1857,7 +1903,8 @@ cow_done:
}
}
if (!p->search_for_split)
- unlock_up(p, level, lowest_unlock);
+ unlock_up(p, level, lowest_unlock,
+ min_write_lock_level, &write_lock_level);
goto done;
}
}
@@ -1881,15 +1928,12 @@ done:
* fixing up pointers when a given leaf/node is not in slot 0 of the
* higher levels
*
- * If this fails to write a tree block, it returns -1, but continues
- * fixing up the blocks in ram so the tree is consistent.
*/
-static int fixup_low_keys(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_disk_key *key, int level)
+static void fixup_low_keys(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_disk_key *key, int level)
{
int i;
- int ret = 0;
struct extent_buffer *t;
for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -1902,7 +1946,6 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans,
if (tslot != 0)
break;
}
- return ret;
}
/*
@@ -1911,9 +1954,9 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans,
* This function isn't completely safe. It's the caller's responsibility
* that the new key won't break the order
*/
-int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *new_key)
+void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *new_key)
{
struct btrfs_disk_key disk_key;
struct extent_buffer *eb;
@@ -1923,13 +1966,11 @@ int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
slot = path->slots[0];
if (slot > 0) {
btrfs_item_key(eb, &disk_key, slot - 1);
- if (comp_keys(&disk_key, new_key) >= 0)
- return -1;
+ BUG_ON(comp_keys(&disk_key, new_key) >= 0);
}
if (slot < btrfs_header_nritems(eb) - 1) {
btrfs_item_key(eb, &disk_key, slot + 1);
- if (comp_keys(&disk_key, new_key) <= 0)
- return -1;
+ BUG_ON(comp_keys(&disk_key, new_key) <= 0);
}
btrfs_cpu_key_to_disk(&disk_key, new_key);
@@ -1937,7 +1978,6 @@ int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(eb);
if (slot == 0)
fixup_low_keys(trans, root, path, &disk_key, 1);
- return 0;
}
/*
@@ -2140,12 +2180,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
*
* slot and level indicate where you want the key to go, and
* blocknr is the block the key points to.
- *
- * returns zero on success and < 0 on any error
*/
-static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, struct btrfs_disk_key
- *key, u64 bytenr, int slot, int level)
+static void insert_ptr(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_disk_key *key, u64 bytenr,
+ int slot, int level)
{
struct extent_buffer *lower;
int nritems;
@@ -2155,8 +2194,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
lower = path->nodes[level];
nritems = btrfs_header_nritems(lower);
BUG_ON(slot > nritems);
- if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
- BUG();
+ BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
if (slot != nritems) {
memmove_extent_buffer(lower,
btrfs_node_key_ptr_offset(slot + 1),
@@ -2169,7 +2207,6 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_set_node_ptr_generation(lower, slot, trans->transid);
btrfs_set_header_nritems(lower, nritems + 1);
btrfs_mark_buffer_dirty(lower);
- return 0;
}
/*
@@ -2190,7 +2227,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
struct btrfs_disk_key disk_key;
int mid;
int ret;
- int wret;
u32 c_nritems;
c = path->nodes[level];
@@ -2247,11 +2283,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(c);
btrfs_mark_buffer_dirty(split);
- wret = insert_ptr(trans, root, path, &disk_key, split->start,
- path->slots[level + 1] + 1,
- level + 1);
- if (wret)
- ret = wret;
+ insert_ptr(trans, root, path, &disk_key, split->start,
+ path->slots[level + 1] + 1, level + 1);
if (path->slots[level] >= mid) {
path->slots[level] -= mid;
@@ -2320,6 +2353,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *upper = path->nodes[1];
+ struct btrfs_map_token token;
struct btrfs_disk_key disk_key;
int slot;
u32 i;
@@ -2331,6 +2365,8 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
u32 data_end;
u32 this_item_size;
+ btrfs_init_map_token(&token);
+
if (empty)
nr = 0;
else
@@ -2408,8 +2444,8 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
push_space = BTRFS_LEAF_DATA_SIZE(root);
for (i = 0; i < right_nritems; i++) {
item = btrfs_item_nr(right, i);
- push_space -= btrfs_item_size(right, item);
- btrfs_set_item_offset(right, item, push_space);
+ push_space -= btrfs_token_item_size(right, item, &token);
+ btrfs_set_token_item_offset(right, item, push_space, &token);
}
left_nritems -= push_items;
@@ -2537,9 +2573,11 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
u32 old_left_nritems;
u32 nr;
int ret = 0;
- int wret;
u32 this_item_size;
u32 old_left_item_size;
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
if (empty)
nr = min(right_nritems, max_slot);
@@ -2600,9 +2638,10 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
item = btrfs_item_nr(left, i);
- ioff = btrfs_item_offset(left, item);
- btrfs_set_item_offset(left, item,
- ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
+ ioff = btrfs_token_item_offset(left, item, &token);
+ btrfs_set_token_item_offset(left, item,
+ ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size),
+ &token);
}
btrfs_set_header_nritems(left, old_left_nritems + push_items);
@@ -2632,8 +2671,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
for (i = 0; i < right_nritems; i++) {
item = btrfs_item_nr(right, i);
- push_space = push_space - btrfs_item_size(right, item);
- btrfs_set_item_offset(right, item, push_space);
+ push_space = push_space - btrfs_token_item_size(right,
+ item, &token);
+ btrfs_set_token_item_offset(right, item, push_space, &token);
}
btrfs_mark_buffer_dirty(left);
@@ -2643,9 +2683,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
clean_tree_block(trans, root, right);
btrfs_item_key(right, &disk_key, 0);
- wret = fixup_low_keys(trans, root, path, &disk_key, 1);
- if (wret)
- ret = wret;
+ fixup_low_keys(trans, root, path, &disk_key, 1);
/* then fixup the leaf pointer in the path */
if (path->slots[0] < push_items) {
@@ -2716,7 +2754,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
path->nodes[1], slot - 1, &left);
if (ret) {
/* we hit -ENOSPC, but it isn't fatal here */
- ret = 1;
+ if (ret == -ENOSPC)
+ ret = 1;
goto out;
}
@@ -2738,22 +2777,21 @@ out:
/*
* split the path's leaf in two, making sure there is at least data_size
* available for the resulting leaf level of the path.
- *
- * returns 0 if all went well and < 0 on failure.
*/
-static noinline int copy_for_split(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *l,
- struct extent_buffer *right,
- int slot, int mid, int nritems)
+static noinline void copy_for_split(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct extent_buffer *l,
+ struct extent_buffer *right,
+ int slot, int mid, int nritems)
{
int data_copy_size;
int rt_data_off;
int i;
- int ret = 0;
- int wret;
struct btrfs_disk_key disk_key;
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
nritems = nritems - mid;
btrfs_set_header_nritems(right, nritems);
@@ -2775,17 +2813,15 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
struct btrfs_item *item = btrfs_item_nr(right, i);
u32 ioff;
- ioff = btrfs_item_offset(right, item);
- btrfs_set_item_offset(right, item, ioff + rt_data_off);
+ ioff = btrfs_token_item_offset(right, item, &token);
+ btrfs_set_token_item_offset(right, item,
+ ioff + rt_data_off, &token);
}
btrfs_set_header_nritems(l, mid);
- ret = 0;
btrfs_item_key(right, &disk_key, 0);
- wret = insert_ptr(trans, root, path, &disk_key, right->start,
- path->slots[1] + 1, 1);
- if (wret)
- ret = wret;
+ insert_ptr(trans, root, path, &disk_key, right->start,
+ path->slots[1] + 1, 1);
btrfs_mark_buffer_dirty(right);
btrfs_mark_buffer_dirty(l);
@@ -2803,8 +2839,6 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
}
BUG_ON(path->slots[0] < 0);
-
- return ret;
}
/*
@@ -2993,12 +3027,8 @@ again:
if (split == 0) {
if (mid <= slot) {
btrfs_set_header_nritems(right, 0);
- wret = insert_ptr(trans, root, path,
- &disk_key, right->start,
- path->slots[1] + 1, 1);
- if (wret)
- ret = wret;
-
+ insert_ptr(trans, root, path, &disk_key, right->start,
+ path->slots[1] + 1, 1);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
@@ -3006,29 +3036,21 @@ again:
path->slots[1] += 1;
} else {
btrfs_set_header_nritems(right, 0);
- wret = insert_ptr(trans, root, path,
- &disk_key,
- right->start,
+ insert_ptr(trans, root, path, &disk_key, right->start,
path->slots[1], 1);
- if (wret)
- ret = wret;
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
path->slots[0] = 0;
- if (path->slots[1] == 0) {
- wret = fixup_low_keys(trans, root,
- path, &disk_key, 1);
- if (wret)
- ret = wret;
- }
+ if (path->slots[1] == 0)
+ fixup_low_keys(trans, root, path,
+ &disk_key, 1);
}
btrfs_mark_buffer_dirty(right);
return ret;
}
- ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
- BUG_ON(ret);
+ copy_for_split(trans, root, path, l, right, slot, mid, nritems);
if (split == 2) {
BUG_ON(num_doubles != 0);
@@ -3036,7 +3058,7 @@ again:
goto again;
}
- return ret;
+ return 0;
push_for_double:
push_for_double_split(trans, root, path, data_size);
@@ -3238,11 +3260,9 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
return ret;
path->slots[0]++;
- ret = setup_items_for_insert(trans, root, path, new_key, &item_size,
- item_size, item_size +
- sizeof(struct btrfs_item), 1);
- BUG_ON(ret);
-
+ setup_items_for_insert(trans, root, path, new_key, &item_size,
+ item_size, item_size +
+ sizeof(struct btrfs_item), 1);
leaf = path->nodes[0];
memcpy_extent_buffer(leaf,
btrfs_item_ptr_offset(leaf, path->slots[0]),
@@ -3257,10 +3277,10 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
* off the end of the item or if we shift the item to chop bytes off
* the front.
*/
-int btrfs_truncate_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u32 new_size, int from_end)
+void btrfs_truncate_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u32 new_size, int from_end)
{
int slot;
struct extent_buffer *leaf;
@@ -3271,13 +3291,16 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
unsigned int old_size;
unsigned int size_diff;
int i;
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
leaf = path->nodes[0];
slot = path->slots[0];
old_size = btrfs_item_size_nr(leaf, slot);
if (old_size == new_size)
- return 0;
+ return;
nritems = btrfs_header_nritems(leaf);
data_end = leaf_data_end(root, leaf);
@@ -3297,8 +3320,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
u32 ioff;
item = btrfs_item_nr(leaf, i);
- ioff = btrfs_item_offset(leaf, item);
- btrfs_set_item_offset(leaf, item, ioff + size_diff);
+ ioff = btrfs_token_item_offset(leaf, item, &token);
+ btrfs_set_token_item_offset(leaf, item,
+ ioff + size_diff, &token);
}
/* shift the data */
@@ -3350,15 +3374,14 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
btrfs_print_leaf(root, leaf);
BUG();
}
- return 0;
}
/*
* make the item pointed to by the path bigger, data_size is the new size.
*/
-int btrfs_extend_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- u32 data_size)
+void btrfs_extend_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ u32 data_size)
{
int slot;
struct extent_buffer *leaf;
@@ -3368,6 +3391,9 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
unsigned int old_data;
unsigned int old_size;
int i;
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
leaf = path->nodes[0];
@@ -3397,8 +3423,9 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
u32 ioff;
item = btrfs_item_nr(leaf, i);
- ioff = btrfs_item_offset(leaf, item);
- btrfs_set_item_offset(leaf, item, ioff - data_size);
+ ioff = btrfs_token_item_offset(leaf, item, &token);
+ btrfs_set_token_item_offset(leaf, item,
+ ioff - data_size, &token);
}
/* shift the data */
@@ -3416,7 +3443,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
btrfs_print_leaf(root, leaf);
BUG();
}
- return 0;
}
/*
@@ -3441,6 +3467,9 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
unsigned int data_end;
struct btrfs_disk_key disk_key;
struct btrfs_key found_key;
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
for (i = 0; i < nr; i++) {
if (total_size + data_size[i] + sizeof(struct btrfs_item) >
@@ -3506,8 +3535,9 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
u32 ioff;
item = btrfs_item_nr(leaf, i);
- ioff = btrfs_item_offset(leaf, item);
- btrfs_set_item_offset(leaf, item, ioff - total_data);
+ ioff = btrfs_token_item_offset(leaf, item, &token);
+ btrfs_set_token_item_offset(leaf, item,
+ ioff - total_data, &token);
}
/* shift the items */
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
@@ -3534,9 +3564,10 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
btrfs_set_item_key(leaf, &disk_key, slot + i);
item = btrfs_item_nr(leaf, slot + i);
- btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
+ btrfs_set_token_item_offset(leaf, item,
+ data_end - data_size[i], &token);
data_end -= data_size[i];
- btrfs_set_item_size(leaf, item, data_size[i]);
+ btrfs_set_token_item_size(leaf, item, data_size[i], &token);
}
btrfs_set_header_nritems(leaf, nritems + nr);
btrfs_mark_buffer_dirty(leaf);
@@ -3544,7 +3575,7 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
ret = 0;
if (slot == 0) {
btrfs_cpu_key_to_disk(&disk_key, cpu_key);
- ret = fixup_low_keys(trans, root, path, &disk_key, 1);
+ fixup_low_keys(trans, root, path, &disk_key, 1);
}
if (btrfs_leaf_free_space(root, leaf) < 0) {
@@ -3562,19 +3593,21 @@ out:
* to save stack depth by doing the bulk of the work in a function
* that doesn't call btrfs_search_slot
*/
-int setup_items_for_insert(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *cpu_key, u32 *data_size,
- u32 total_data, u32 total_size, int nr)
+void setup_items_for_insert(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *cpu_key, u32 *data_size,
+ u32 total_data, u32 total_size, int nr)
{
struct btrfs_item *item;
int i;
u32 nritems;
unsigned int data_end;
struct btrfs_disk_key disk_key;
- int ret;
struct extent_buffer *leaf;
int slot;
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
leaf = path->nodes[0];
slot = path->slots[0];
@@ -3606,8 +3639,9 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans,
u32 ioff;
item = btrfs_item_nr(leaf, i);
- ioff = btrfs_item_offset(leaf, item);
- btrfs_set_item_offset(leaf, item, ioff - total_data);
+ ioff = btrfs_token_item_offset(leaf, item, &token);
+ btrfs_set_token_item_offset(leaf, item,
+ ioff - total_data, &token);
}
/* shift the items */
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
@@ -3626,17 +3660,17 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans,
btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
btrfs_set_item_key(leaf, &disk_key, slot + i);
item = btrfs_item_nr(leaf, slot + i);
- btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
+ btrfs_set_token_item_offset(leaf, item,
+ data_end - data_size[i], &token);
data_end -= data_size[i];
- btrfs_set_item_size(leaf, item, data_size[i]);
+ btrfs_set_token_item_size(leaf, item, data_size[i], &token);
}
btrfs_set_header_nritems(leaf, nritems + nr);
- ret = 0;
if (slot == 0) {
btrfs_cpu_key_to_disk(&disk_key, cpu_key);
- ret = fixup_low_keys(trans, root, path, &disk_key, 1);
+ fixup_low_keys(trans, root, path, &disk_key, 1);
}
btrfs_unlock_up_safe(path, 1);
btrfs_mark_buffer_dirty(leaf);
@@ -3645,7 +3679,6 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans,
btrfs_print_leaf(root, leaf);
BUG();
}
- return ret;
}
/*
@@ -3672,16 +3705,14 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
if (ret == 0)
return -EEXIST;
if (ret < 0)
- goto out;
+ return ret;
slot = path->slots[0];
BUG_ON(slot < 0);
- ret = setup_items_for_insert(trans, root, path, cpu_key, data_size,
+ setup_items_for_insert(trans, root, path, cpu_key, data_size,
total_data, total_size, nr);
-
-out:
- return ret;
+ return 0;
}
/*
@@ -3717,13 +3748,11 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
* the tree should have been previously balanced so the deletion does not
* empty a node.
*/
-static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_path *path, int level, int slot)
+static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_path *path, int level, int slot)
{
struct extent_buffer *parent = path->nodes[level];
u32 nritems;
- int ret = 0;
- int wret;
nritems = btrfs_header_nritems(parent);
if (slot != nritems - 1) {
@@ -3743,12 +3772,9 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_disk_key disk_key;
btrfs_node_key(parent, &disk_key, 0);
- wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
- if (wret)
- ret = wret;
+ fixup_low_keys(trans, root, path, &disk_key, level + 1);
}
btrfs_mark_buffer_dirty(parent);
- return ret;
}
/*
@@ -3761,17 +3787,13 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
* The path must have already been setup for deleting the leaf, including
* all the proper balancing. path->nodes[1] must be locked.
*/
-static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *leaf)
+static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct extent_buffer *leaf)
{
- int ret;
-
WARN_ON(btrfs_header_generation(leaf) != trans->transid);
- ret = del_ptr(trans, root, path, 1, path->slots[1]);
- if (ret)
- return ret;
+ del_ptr(trans, root, path, 1, path->slots[1]);
/*
* btrfs_free_extent is expensive, we want to make sure we
@@ -3781,8 +3803,9 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
root_sub_used(root, leaf->len);
+ extent_buffer_get(leaf);
btrfs_free_tree_block(trans, root, leaf, 0, 1, 0);
- return 0;
+ free_extent_buffer_stale(leaf);
}
/*
* delete the item at the leaf level in path. If that empties
@@ -3799,6 +3822,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int wret;
int i;
u32 nritems;
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
leaf = path->nodes[0];
last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
@@ -3820,8 +3846,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u32 ioff;
item = btrfs_item_nr(leaf, i);
- ioff = btrfs_item_offset(leaf, item);
- btrfs_set_item_offset(leaf, item, ioff + dsize);
+ ioff = btrfs_token_item_offset(leaf, item, &token);
+ btrfs_set_token_item_offset(leaf, item,
+ ioff + dsize, &token);
}
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
@@ -3839,8 +3866,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
} else {
btrfs_set_path_blocking(path);
clean_tree_block(trans, root, leaf);
- ret = btrfs_del_leaf(trans, root, path, leaf);
- BUG_ON(ret);
+ btrfs_del_leaf(trans, root, path, leaf);
}
} else {
int used = leaf_space_used(leaf, 0, nritems);
@@ -3848,10 +3874,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_disk_key disk_key;
btrfs_item_key(leaf, &disk_key, 0);
- wret = fixup_low_keys(trans, root, path,
- &disk_key, 1);
- if (wret)
- ret = wret;
+ fixup_low_keys(trans, root, path, &disk_key, 1);
}
/* delete the leaf if it is mostly empty */
@@ -3879,9 +3902,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (btrfs_header_nritems(leaf) == 0) {
path->slots[1] = slot;
- ret = btrfs_del_leaf(trans, root, path, leaf);
- BUG_ON(ret);
+ btrfs_del_leaf(trans, root, path, leaf);
free_extent_buffer(leaf);
+ ret = 0;
} else {
/* if we're still in the path, make sure
* we're dirty. Otherwise, one of the
@@ -4029,7 +4052,7 @@ again:
tmp = btrfs_find_tree_block(root, blockptr,
btrfs_level_size(root, level - 1));
- if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+ if (tmp && btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
free_extent_buffer(tmp);
break;
}
@@ -4059,18 +4082,18 @@ find_next_key:
path->slots[level] = slot;
if (level == path->lowest_level) {
ret = 0;
- unlock_up(path, level, 1);
+ unlock_up(path, level, 1, 0, NULL);
goto out;
}
btrfs_set_path_blocking(path);
cur = read_node_slot(root, cur, slot);
- BUG_ON(!cur);
+ BUG_ON(!cur); /* -ENOMEM */
btrfs_tree_read_lock(cur);
path->locks[level - 1] = BTRFS_READ_LOCK;
path->nodes[level - 1] = cur;
- unlock_up(path, level, 1);
+ unlock_up(path, level, 1, 0, NULL);
btrfs_clear_path_blocking(path, NULL, 0);
}
out:
@@ -4152,7 +4175,8 @@ next:
struct extent_buffer *cur;
cur = btrfs_find_tree_block(root, blockptr,
btrfs_level_size(root, level - 1));
- if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
+ if (!cur ||
+ btrfs_buffer_uptodate(cur, gen, 1) <= 0) {
slot++;
if (cur)
free_extent_buffer(cur);
@@ -4306,7 +4330,7 @@ again:
}
ret = 0;
done:
- unlock_up(path, 0, 1);
+ unlock_up(path, 0, 1, 0, NULL);
path->leave_spinning = old_spinning;
if (!old_spinning)
btrfs_set_path_blocking(path);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 91128d7e9d3..8fd72331d60 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -48,6 +48,8 @@ struct btrfs_ordered_sum;
#define BTRFS_MAGIC "_BHRfS_M"
+#define BTRFS_MAX_MIRRORS 2
+
#define BTRFS_MAX_LEVEL 8
#define BTRFS_COMPAT_EXTENT_TREE_V0
@@ -138,6 +140,12 @@ struct btrfs_ordered_sum;
#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
/*
+ * the max metadata block size. This limit is somewhat artificial,
+ * but the memmove costs go through the roof for larger blocks.
+ */
+#define BTRFS_MAX_METADATA_BLOCKSIZE 65536
+
+/*
* we can actually store much bigger names, but lets not confuse the rest
* of linux
*/
@@ -461,6 +469,19 @@ struct btrfs_super_block {
#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
+/*
+ * some patches floated around with a second compression method
+ * lets save that incompat here for when they do get in
+ * Note we don't actually support it, we're just reserving the
+ * number
+ */
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 (1ULL << 4)
+
+/*
+ * older kernels tried to do bigger metadata blocks, but the
+ * code was pretty buggy. Lets not let them try anymore.
+ */
+#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5)
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
@@ -468,6 +489,7 @@ struct btrfs_super_block {
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
+ BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO)
/*
@@ -829,6 +851,21 @@ struct btrfs_csum_item {
*/
#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)
+#define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \
+ BTRFS_AVAIL_ALLOC_BIT_SINGLE)
+
+static inline u64 chunk_to_extended(u64 flags)
+{
+ if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)
+ flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+
+ return flags;
+}
+static inline u64 extended_to_chunk(u64 flags)
+{
+ return flags & ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+}
+
struct btrfs_block_group_item {
__le64 used;
__le64 chunk_objectid;
@@ -1041,7 +1078,7 @@ struct btrfs_fs_info {
* is required instead of the faster short fsync log commits
*/
u64 last_trans_log_full_commit;
- unsigned long mount_opt:21;
+ unsigned long mount_opt;
unsigned long compress_type:4;
u64 max_inline;
u64 alloc_start;
@@ -1503,6 +1540,7 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19)
#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20)
#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
+#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1526,6 +1564,17 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
+struct btrfs_map_token {
+ struct extent_buffer *eb;
+ char *kaddr;
+ unsigned long offset;
+};
+
+static inline void btrfs_init_map_token (struct btrfs_map_token *token)
+{
+ memset(token, 0, sizeof(*token));
+}
+
/* some macros to generate set/get funcs for the struct fields. This
* assumes there is a lefoo_to_cpu for every type, so lets make a simple
* one for u8:
@@ -1549,20 +1598,22 @@ struct btrfs_ioctl_defrag_range_args {
#ifndef BTRFS_SETGET_FUNCS
#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
u##bits btrfs_##name(struct extent_buffer *eb, type *s); \
+u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, struct btrfs_map_token *token); \
+void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token);\
void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val);
#endif
#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
static inline u##bits btrfs_##name(struct extent_buffer *eb) \
{ \
- type *p = page_address(eb->first_page); \
+ type *p = page_address(eb->pages[0]); \
u##bits res = le##bits##_to_cpu(p->member); \
return res; \
} \
static inline void btrfs_set_##name(struct extent_buffer *eb, \
u##bits val) \
{ \
- type *p = page_address(eb->first_page); \
+ type *p = page_address(eb->pages[0]); \
p->member = cpu_to_le##bits(val); \
}
@@ -2466,8 +2517,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
- u64 search_end, struct btrfs_key *ins,
- u64 data);
+ struct btrfs_key *ins, u64 data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref, int for_cow);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -2484,8 +2534,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
u64 start, u64 len);
-int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
+void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
@@ -2548,8 +2598,8 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
u64 num_bytes);
int btrfs_set_block_group_ro(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
-int btrfs_set_block_group_rw(struct btrfs_root *root,
- struct btrfs_block_group_cache *cache);
+void btrfs_set_block_group_rw(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
int btrfs_error_unpin_extent_range(struct btrfs_root *root,
@@ -2568,9 +2618,9 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
int btrfs_previous_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid,
int type);
-int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *new_key);
+void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *new_key);
struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
@@ -2590,12 +2640,13 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
struct extent_buffer **cow_ret, u64 new_root_objectid);
int btrfs_block_can_be_shared(struct btrfs_root *root,
struct extent_buffer *buf);
-int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, u32 data_size);
-int btrfs_truncate_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u32 new_size, int from_end);
+void btrfs_extend_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ u32 data_size);
+void btrfs_truncate_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u32 new_size, int from_end);
int btrfs_split_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
@@ -2629,10 +2680,10 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
return btrfs_del_items(trans, root, path, path->slots[0], 1);
}
-int setup_items_for_insert(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *cpu_key, u32 *data_size,
- u32 total_data, u32 total_size, int nr);
+void setup_items_for_insert(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *cpu_key, u32 *data_size,
+ u32 total_data, u32 total_size, int nr);
int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_key *key, void *data, u32 data_size);
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
@@ -2659,9 +2710,9 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
}
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
-void btrfs_drop_snapshot(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv, int update_ref,
- int for_reloc);
+int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ int update_ref, int for_reloc);
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *node,
@@ -2687,24 +2738,6 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
kfree(fs_info->super_for_commit);
kfree(fs_info);
}
-/**
- * profile_is_valid - tests whether a given profile is valid and reduced
- * @flags: profile to validate
- * @extended: if true @flags is treated as an extended profile
- */
-static inline int profile_is_valid(u64 flags, int extended)
-{
- u64 mask = ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
-
- flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
- if (extended)
- mask &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
-
- if (flags & mask)
- return 0;
- /* true if zero or exactly one bit set */
- return (flags & (~flags + 1)) == flags;
-}
/* root-item.c */
int btrfs_find_root_ref(struct btrfs_root *tree_root,
@@ -2723,9 +2756,10 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_key *key, struct btrfs_root_item
*item);
-int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *key, struct btrfs_root_item
- *item);
+int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_key *key,
+ struct btrfs_root_item *item);
int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
btrfs_root_item *item, struct btrfs_key *key);
int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
@@ -2909,7 +2943,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root);
void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
-int btrfs_invalidate_inodes(struct btrfs_root *root);
+void btrfs_invalidate_inodes(struct btrfs_root *root);
void btrfs_add_delayed_iput(struct inode *inode);
void btrfs_run_delayed_iputs(struct btrfs_root *root);
int btrfs_prealloc_file_range(struct inode *inode, int mode,
@@ -2961,13 +2995,41 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
/* super.c */
int btrfs_parse_options(struct btrfs_root *root, char *options);
int btrfs_sync_fs(struct super_block *sb, int wait);
+void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...);
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
- unsigned int line, int errno);
+ unsigned int line, int errno, const char *fmt, ...);
+
+void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, const char *function,
+ unsigned int line, int errno);
+
+#define btrfs_abort_transaction(trans, root, errno) \
+do { \
+ __btrfs_abort_transaction(trans, root, __func__, \
+ __LINE__, errno); \
+} while (0)
#define btrfs_std_error(fs_info, errno) \
do { \
if ((errno)) \
- __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\
+ __btrfs_std_error((fs_info), __func__, \
+ __LINE__, (errno), NULL); \
+} while (0)
+
+#define btrfs_error(fs_info, errno, fmt, args...) \
+do { \
+ __btrfs_std_error((fs_info), __func__, __LINE__, \
+ (errno), fmt, ##args); \
+} while (0)
+
+void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
+ unsigned int line, int errno, const char *fmt, ...);
+
+#define btrfs_panic(fs_info, errno, fmt, args...) \
+do { \
+ struct btrfs_fs_info *_i = (fs_info); \
+ __btrfs_panic(_i, __func__, __LINE__, errno, fmt, ##args); \
+ BUG_ON(!(_i->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)); \
} while (0)
/* acl.c */
@@ -3003,16 +3065,17 @@ void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending,
u64 *bytes_to_reserve);
-void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
+int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending);
/* scrub.c */
int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
struct btrfs_scrub_progress *progress, int readonly);
-int btrfs_scrub_pause(struct btrfs_root *root);
-int btrfs_scrub_pause_super(struct btrfs_root *root);
-int btrfs_scrub_continue(struct btrfs_root *root);
-int btrfs_scrub_continue_super(struct btrfs_root *root);
+void btrfs_scrub_pause(struct btrfs_root *root);
+void btrfs_scrub_pause_super(struct btrfs_root *root);
+void btrfs_scrub_continue(struct btrfs_root *root);
+void btrfs_scrub_continue_super(struct btrfs_root *root);
+int __btrfs_scrub_cancel(struct btrfs_fs_info *info);
int btrfs_scrub_cancel(struct btrfs_root *root);
int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev);
int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index fe4cd0f1cef..03e3748d84d 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -115,6 +115,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
return NULL;
}
+/* Will return either the node or PTR_ERR(-ENOMEM) */
static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
struct inode *inode)
{
@@ -836,10 +837,8 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
btrfs_clear_path_blocking(path, NULL, 0);
/* insert the keys of the items */
- ret = setup_items_for_insert(trans, root, path, keys, data_size,
- total_data_size, total_size, nitems);
- if (ret)
- goto error;
+ setup_items_for_insert(trans, root, path, keys, data_size,
+ total_data_size, total_size, nitems);
/* insert the dir index items */
slot = path->slots[0];
@@ -1108,16 +1107,25 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
return 0;
}
-/* Called when committing the transaction. */
+/*
+ * Called when committing the transaction.
+ * Returns 0 on success.
+ * Returns < 0 on error and returns with an aborted transaction with any
+ * outstanding delayed items cleaned up.
+ */
int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
+ struct btrfs_root *curr_root = root;
struct btrfs_delayed_root *delayed_root;
struct btrfs_delayed_node *curr_node, *prev_node;
struct btrfs_path *path;
struct btrfs_block_rsv *block_rsv;
int ret = 0;
+ if (trans->aborted)
+ return -EIO;
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -1130,17 +1138,18 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
curr_node = btrfs_first_delayed_node(delayed_root);
while (curr_node) {
- root = curr_node->root;
- ret = btrfs_insert_delayed_items(trans, path, root,
+ curr_root = curr_node->root;
+ ret = btrfs_insert_delayed_items(trans, path, curr_root,
curr_node);
if (!ret)
- ret = btrfs_delete_delayed_items(trans, path, root,
- curr_node);
+ ret = btrfs_delete_delayed_items(trans, path,
+ curr_root, curr_node);
if (!ret)
- ret = btrfs_update_delayed_inode(trans, root, path,
- curr_node);
+ ret = btrfs_update_delayed_inode(trans, curr_root,
+ path, curr_node);
if (ret) {
btrfs_release_delayed_node(curr_node);
+ btrfs_abort_transaction(trans, root, ret);
break;
}
@@ -1151,6 +1160,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
btrfs_free_path(path);
trans->block_rsv = block_rsv;
+
return ret;
}
@@ -1371,6 +1381,7 @@ void btrfs_balance_delayed_items(struct btrfs_root *root)
btrfs_wq_run_delayed_node(delayed_root, root, 0);
}
+/* Will return 0 or -ENOMEM */
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
struct btrfs_root *root, const char *name,
int name_len, struct inode *dir,
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 66e4f29505a..69f22e3ab3b 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -420,7 +420,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
* this does all the dirty work in terms of maintaining the correct
* overall modification count.
*/
-static noinline int add_delayed_ref_head(struct btrfs_fs_info *fs_info,
+static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref,
u64 bytenr, u64 num_bytes,
@@ -487,20 +487,19 @@ static noinline int add_delayed_ref_head(struct btrfs_fs_info *fs_info,
* we've updated the existing ref, free the newly
* allocated ref
*/
- kfree(ref);
+ kfree(head_ref);
} else {
delayed_refs->num_heads++;
delayed_refs->num_heads_ready++;
delayed_refs->num_entries++;
trans->delayed_ref_updates++;
}
- return 0;
}
/*
* helper to insert a delayed tree ref into the rbtree.
*/
-static noinline int add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
+static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref,
u64 bytenr, u64 num_bytes, u64 parent,
@@ -549,18 +548,17 @@ static noinline int add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
* we've updated the existing ref, free the newly
* allocated ref
*/
- kfree(ref);
+ kfree(full_ref);
} else {
delayed_refs->num_entries++;
trans->delayed_ref_updates++;
}
- return 0;
}
/*
* helper to insert a delayed data ref into the rbtree.
*/
-static noinline int add_delayed_data_ref(struct btrfs_fs_info *fs_info,
+static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref,
u64 bytenr, u64 num_bytes, u64 parent,
@@ -611,12 +609,11 @@ static noinline int add_delayed_data_ref(struct btrfs_fs_info *fs_info,
* we've updated the existing ref, free the newly
* allocated ref
*/
- kfree(ref);
+ kfree(full_ref);
} else {
delayed_refs->num_entries++;
trans->delayed_ref_updates++;
}
- return 0;
}
/*
@@ -634,7 +631,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_tree_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
- int ret;
BUG_ON(extent_op && extent_op->is_data);
ref = kmalloc(sizeof(*ref), GFP_NOFS);
@@ -656,14 +652,12 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
* insert both the head node and the new ref without dropping
* the spin lock
*/
- ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
+ add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
num_bytes, action, 0);
- BUG_ON(ret);
- ret = add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
+ add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action,
for_cow);
- BUG_ON(ret);
if (!need_ref_seq(for_cow, ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
@@ -685,7 +679,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
- int ret;
BUG_ON(extent_op && !extent_op->is_data);
ref = kmalloc(sizeof(*ref), GFP_NOFS);
@@ -707,14 +700,12 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
* insert both the head node and the new ref without dropping
* the spin lock
*/
- ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
+ add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
num_bytes, action, 1);
- BUG_ON(ret);
- ret = add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
+ add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action, for_cow);
- BUG_ON(ret);
if (!need_ref_seq(for_cow, ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
@@ -729,7 +720,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
{
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
- int ret;
head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
if (!head_ref)
@@ -740,10 +730,9 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
- ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
+ add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data);
- BUG_ON(ret);
if (waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 31d84e78129..c1a074d0696 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -49,9 +49,8 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
di = btrfs_match_dir_item_name(root, path, name, name_len);
if (di)
return ERR_PTR(-EEXIST);
- ret = btrfs_extend_item(trans, root, path, data_size);
- }
- if (ret < 0)
+ btrfs_extend_item(trans, root, path, data_size);
+ } else if (ret < 0)
return ERR_PTR(ret);
WARN_ON(ret > 0);
leaf = path->nodes[0];
@@ -116,6 +115,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
* 'location' is the key to stuff into the directory item, 'type' is the
* type of the inode we're pointing to, and 'index' is the sequence number
* to use for the second index (if one is created).
+ * Will return 0 or -ENOMEM
*/
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
*root, const char *name, int name_len,
@@ -383,8 +383,8 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
start = btrfs_item_ptr_offset(leaf, path->slots[0]);
memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
item_len - (ptr + sub_item_len - start));
- ret = btrfs_truncate_item(trans, root, path,
- item_len - sub_item_len, 1);
+ btrfs_truncate_item(trans, root, path,
+ item_len - sub_item_len, 1);
}
return ret;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 534266fe505..a7ffc88a7db 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -48,20 +48,19 @@
static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
-static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
+static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
int read_only);
-static int btrfs_destroy_ordered_operations(struct btrfs_root *root);
-static int btrfs_destroy_ordered_extents(struct btrfs_root *root);
+static void btrfs_destroy_ordered_operations(struct btrfs_root *root);
+static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_root *root);
-static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
-static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
+static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
+static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
static int btrfs_destroy_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages,
int mark);
static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
struct extent_io_tree *pinned_extents);
-static int btrfs_cleanup_transaction(struct btrfs_root *root);
/*
* end_io_wq structs are used to do processing in task context when an IO is
@@ -99,6 +98,7 @@ struct async_submit_bio {
*/
u64 bio_offset;
struct btrfs_work work;
+ int error;
};
/*
@@ -323,7 +323,8 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
* in the wrong place.
*/
static int verify_parent_transid(struct extent_io_tree *io_tree,
- struct extent_buffer *eb, u64 parent_transid)
+ struct extent_buffer *eb, u64 parent_transid,
+ int atomic)
{
struct extent_state *cached_state = NULL;
int ret;
@@ -331,9 +332,12 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
return 0;
+ if (atomic)
+ return -EAGAIN;
+
lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
- 0, &cached_state, GFP_NOFS);
- if (extent_buffer_uptodate(io_tree, eb, cached_state) &&
+ 0, &cached_state);
+ if (extent_buffer_uptodate(eb) &&
btrfs_header_generation(eb) == parent_transid) {
ret = 0;
goto out;
@@ -344,7 +348,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
(unsigned long long)parent_transid,
(unsigned long long)btrfs_header_generation(eb));
ret = 1;
- clear_extent_buffer_uptodate(io_tree, eb, &cached_state);
+ clear_extent_buffer_uptodate(eb);
out:
unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
&cached_state, GFP_NOFS);
@@ -360,9 +364,11 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
u64 start, u64 parent_transid)
{
struct extent_io_tree *io_tree;
+ int failed = 0;
int ret;
int num_copies = 0;
int mirror_num = 0;
+ int failed_mirror = 0;
clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
@@ -370,9 +376,9 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
ret = read_extent_buffer_pages(io_tree, eb, start,
WAIT_COMPLETE,
btree_get_extent, mirror_num);
- if (!ret &&
- !verify_parent_transid(io_tree, eb, parent_transid))
- return ret;
+ if (!ret && !verify_parent_transid(io_tree, eb,
+ parent_transid, 0))
+ break;
/*
* This buffer's crc is fine, but its contents are corrupted, so
@@ -380,18 +386,30 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
* any less wrong.
*/
if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
- return ret;
+ break;
num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
eb->start, eb->len);
if (num_copies == 1)
- return ret;
+ break;
+
+ if (!failed_mirror) {
+ failed = 1;
+ failed_mirror = eb->read_mirror;
+ }
mirror_num++;
+ if (mirror_num == failed_mirror)
+ mirror_num++;
+
if (mirror_num > num_copies)
- return ret;
+ break;
}
- return -EIO;
+
+ if (failed && !ret)
+ repair_eb_io_failure(root, eb, failed_mirror);
+
+ return ret;
}
/*
@@ -404,50 +422,27 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
struct extent_io_tree *tree;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 found_start;
- unsigned long len;
struct extent_buffer *eb;
- int ret;
tree = &BTRFS_I(page->mapping->host)->io_tree;
- if (page->private == EXTENT_PAGE_PRIVATE) {
- WARN_ON(1);
- goto out;
- }
- if (!page->private) {
- WARN_ON(1);
- goto out;
- }
- len = page->private >> 2;
- WARN_ON(len == 0);
-
- eb = alloc_extent_buffer(tree, start, len, page);
- if (eb == NULL) {
- WARN_ON(1);
- goto out;
- }
- ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
- btrfs_header_generation(eb));
- BUG_ON(ret);
- WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN));
-
+ eb = (struct extent_buffer *)page->private;
+ if (page != eb->pages[0])
+ return 0;
found_start = btrfs_header_bytenr(eb);
if (found_start != start) {
WARN_ON(1);
- goto err;
+ return 0;
}
- if (eb->first_page != page) {
+ if (eb->pages[0] != page) {
WARN_ON(1);
- goto err;
+ return 0;
}
if (!PageUptodate(page)) {
WARN_ON(1);
- goto err;
+ return 0;
}
csum_tree_block(root, eb, 0);
-err:
- free_extent_buffer(eb);
-out:
return 0;
}
@@ -537,34 +532,75 @@ static noinline int check_leaf(struct btrfs_root *root,
return 0;
}
+struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,
+ struct page *page, int max_walk)
+{
+ struct extent_buffer *eb;
+ u64 start = page_offset(page);
+ u64 target = start;
+ u64 min_start;
+
+ if (start < max_walk)
+ min_start = 0;
+ else
+ min_start = start - max_walk;
+
+ while (start >= min_start) {
+ eb = find_extent_buffer(tree, start, 0);
+ if (eb) {
+ /*
+ * we found an extent buffer and it contains our page
+ * horray!
+ */
+ if (eb->start <= target &&
+ eb->start + eb->len > target)
+ return eb;
+
+ /* we found an extent buffer that wasn't for us */
+ free_extent_buffer(eb);
+ return NULL;
+ }
+ if (start == 0)
+ break;
+ start -= PAGE_CACHE_SIZE;
+ }
+ return NULL;
+}
+
static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
- struct extent_state *state)
+ struct extent_state *state, int mirror)
{
struct extent_io_tree *tree;
u64 found_start;
int found_level;
- unsigned long len;
struct extent_buffer *eb;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
int ret = 0;
+ int reads_done;
- tree = &BTRFS_I(page->mapping->host)->io_tree;
- if (page->private == EXTENT_PAGE_PRIVATE)
- goto out;
if (!page->private)
goto out;
- len = page->private >> 2;
- WARN_ON(len == 0);
+ tree = &BTRFS_I(page->mapping->host)->io_tree;
+ eb = (struct extent_buffer *)page->private;
+
+ /* the pending IO might have been the only thing that kept this buffer
+ * in memory. Make sure we have a ref for all this other checks
+ */
+ extent_buffer_get(eb);
+
+ reads_done = atomic_dec_and_test(&eb->io_pages);
+ if (!reads_done)
+ goto err;
- eb = alloc_extent_buffer(tree, start, len, page);
- if (eb == NULL) {
+ eb->read_mirror = mirror;
+ if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
ret = -EIO;
- goto out;
+ goto err;
}
found_start = btrfs_header_bytenr(eb);
- if (found_start != start) {
+ if (found_start != eb->start) {
printk_ratelimited(KERN_INFO "btrfs bad tree block start "
"%llu %llu\n",
(unsigned long long)found_start,
@@ -572,13 +608,6 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
ret = -EIO;
goto err;
}
- if (eb->first_page != page) {
- printk(KERN_INFO "btrfs bad first page %lu %lu\n",
- eb->first_page->index, page->index);
- WARN_ON(1);
- ret = -EIO;
- goto err;
- }
if (check_tree_block_fsid(root, eb)) {
printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
(unsigned long long)eb->start);
@@ -606,48 +635,31 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
ret = -EIO;
}
- end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
- end = eb->start + end - 1;
+ if (!ret)
+ set_extent_buffer_uptodate(eb);
err:
if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
btree_readahead_hook(root, eb, eb->start, ret);
}
+ if (ret)
+ clear_extent_buffer_uptodate(eb);
free_extent_buffer(eb);
out:
return ret;
}
-static int btree_io_failed_hook(struct bio *failed_bio,
- struct page *page, u64 start, u64 end,
- int mirror_num, struct extent_state *state)
+static int btree_io_failed_hook(struct page *page, int failed_mirror)
{
- struct extent_io_tree *tree;
- unsigned long len;
struct extent_buffer *eb;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
- tree = &BTRFS_I(page->mapping->host)->io_tree;
- if (page->private == EXTENT_PAGE_PRIVATE)
- goto out;
- if (!page->private)
- goto out;
-
- len = page->private >> 2;
- WARN_ON(len == 0);
-
- eb = alloc_extent_buffer(tree, start, len, page);
- if (eb == NULL)
- goto out;
-
- if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
- clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
+ eb = (struct extent_buffer *)page->private;
+ set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
+ eb->read_mirror = failed_mirror;
+ if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
btree_readahead_hook(root, eb, eb->start, -EIO);
- }
- free_extent_buffer(eb);
-
-out:
return -EIO; /* we fixed nothing */
}
@@ -719,11 +731,14 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
static void run_one_async_start(struct btrfs_work *work)
{
struct async_submit_bio *async;
+ int ret;
async = container_of(work, struct async_submit_bio, work);
- async->submit_bio_start(async->inode, async->rw, async->bio,
- async->mirror_num, async->bio_flags,
- async->bio_offset);
+ ret = async->submit_bio_start(async->inode, async->rw, async->bio,
+ async->mirror_num, async->bio_flags,
+ async->bio_offset);
+ if (ret)
+ async->error = ret;
}
static void run_one_async_done(struct btrfs_work *work)
@@ -744,6 +759,12 @@ static void run_one_async_done(struct btrfs_work *work)
waitqueue_active(&fs_info->async_submit_wait))
wake_up(&fs_info->async_submit_wait);
+ /* If an error occured we just want to clean up the bio and move on */
+ if (async->error) {
+ bio_endio(async->bio, async->error);
+ return;
+ }
+
async->submit_bio_done(async->inode, async->rw, async->bio,
async->mirror_num, async->bio_flags,
async->bio_offset);
@@ -785,6 +806,8 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
async->bio_flags = bio_flags;
async->bio_offset = bio_offset;
+ async->error = 0;
+
atomic_inc(&fs_info->nr_async_submits);
if (rw & REQ_SYNC)
@@ -806,15 +829,18 @@ static int btree_csum_one_bio(struct bio *bio)
struct bio_vec *bvec = bio->bi_io_vec;
int bio_index = 0;
struct btrfs_root *root;
+ int ret = 0;
WARN_ON(bio->bi_vcnt <= 0);
while (bio_index < bio->bi_vcnt) {
root = BTRFS_I(bvec->bv_page->mapping->host)->root;
- csum_dirty_buffer(root, bvec->bv_page);
+ ret = csum_dirty_buffer(root, bvec->bv_page);
+ if (ret)
+ break;
bio_index++;
bvec++;
}
- return 0;
+ return ret;
}
static int __btree_submit_bio_start(struct inode *inode, int rw,
@@ -826,8 +852,7 @@ static int __btree_submit_bio_start(struct inode *inode, int rw,
* when we're called for a write, we're already in the async
* submission context. Just jump into btrfs_map_bio
*/
- btree_csum_one_bio(bio);
- return 0;
+ return btree_csum_one_bio(bio);
}
static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
@@ -847,15 +872,16 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
{
int ret;
- ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
- bio, 1);
- BUG_ON(ret);
-
if (!(rw & REQ_WRITE)) {
+
/*
* called for a read, do the setup so that checksum validation
* can happen in the async kernel threads
*/
+ ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
+ bio, 1);
+ if (ret)
+ return ret;
return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
mirror_num, 0);
}
@@ -893,34 +919,6 @@ static int btree_migratepage(struct address_space *mapping,
}
#endif
-static int btree_writepage(struct page *page, struct writeback_control *wbc)
-{
- struct extent_io_tree *tree;
- struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
- struct extent_buffer *eb;
- int was_dirty;
-
- tree = &BTRFS_I(page->mapping->host)->io_tree;
- if (!(current->flags & PF_MEMALLOC)) {
- return extent_write_full_page(tree, page,
- btree_get_extent, wbc);
- }
-
- redirty_page_for_writepage(wbc, page);
- eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE);
- WARN_ON(!eb);
-
- was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
- if (!was_dirty) {
- spin_lock(&root->fs_info->delalloc_lock);
- root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
- spin_unlock(&root->fs_info->delalloc_lock);
- }
- free_extent_buffer(eb);
-
- unlock_page(page);
- return 0;
-}
static int btree_writepages(struct address_space *mapping,
struct writeback_control *wbc)
@@ -940,7 +938,7 @@ static int btree_writepages(struct address_space *mapping,
if (num_dirty < thresh)
return 0;
}
- return extent_writepages(tree, mapping, btree_get_extent, wbc);
+ return btree_write_cache_pages(mapping, wbc);
}
static int btree_readpage(struct file *file, struct page *page)
@@ -952,16 +950,8 @@ static int btree_readpage(struct file *file, struct page *page)
static int btree_releasepage(struct page *page, gfp_t gfp_flags)
{
- struct extent_io_tree *tree;
- struct extent_map_tree *map;
- int ret;
-
if (PageWriteback(page) || PageDirty(page))
return 0;
-
- tree = &BTRFS_I(page->mapping->host)->io_tree;
- map = &BTRFS_I(page->mapping->host)->extent_tree;
-
/*
* We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing
* slab allocation from alloc_extent_state down the callchain where
@@ -969,18 +959,7 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
*/
gfp_flags &= ~GFP_SLAB_BUG_MASK;
- ret = try_release_extent_state(map, tree, page, gfp_flags);
- if (!ret)
- return 0;
-
- ret = try_release_extent_buffer(tree, page);
- if (ret == 1) {
- ClearPagePrivate(page);
- set_page_private(page, 0);
- page_cache_release(page);
- }
-
- return ret;
+ return try_release_extent_buffer(page, gfp_flags);
}
static void btree_invalidatepage(struct page *page, unsigned long offset)
@@ -998,15 +977,28 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
}
}
+static int btree_set_page_dirty(struct page *page)
+{
+ struct extent_buffer *eb;
+
+ BUG_ON(!PagePrivate(page));
+ eb = (struct extent_buffer *)page->private;
+ BUG_ON(!eb);
+ BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+ BUG_ON(!atomic_read(&eb->refs));
+ btrfs_assert_tree_locked(eb);
+ return __set_page_dirty_nobuffers(page);
+}
+
static const struct address_space_operations btree_aops = {
.readpage = btree_readpage,
- .writepage = btree_writepage,
.writepages = btree_writepages,
.releasepage = btree_releasepage,
.invalidatepage = btree_invalidatepage,
#ifdef CONFIG_MIGRATION
.migratepage = btree_migratepage,
#endif
+ .set_page_dirty = btree_set_page_dirty,
};
int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
@@ -1049,7 +1041,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
free_extent_buffer(buf);
return -EIO;
- } else if (extent_buffer_uptodate(io_tree, buf, NULL)) {
+ } else if (extent_buffer_uptodate(buf)) {
*eb = buf;
} else {
free_extent_buffer(buf);
@@ -1074,20 +1066,20 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
struct extent_buffer *eb;
eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
- bytenr, blocksize, NULL);
+ bytenr, blocksize);
return eb;
}
int btrfs_write_tree_block(struct extent_buffer *buf)
{
- return filemap_fdatawrite_range(buf->first_page->mapping, buf->start,
+ return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
buf->start + buf->len - 1);
}
int btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
{
- return filemap_fdatawait_range(buf->first_page->mapping,
+ return filemap_fdatawait_range(buf->pages[0]->mapping,
buf->start, buf->start + buf->len - 1);
}
@@ -1102,17 +1094,13 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
return NULL;
ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
-
- if (ret == 0)
- set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
return buf;
}
-int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf)
+void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct extent_buffer *buf)
{
- struct inode *btree_inode = root->fs_info->btree_inode;
if (btrfs_header_generation(buf) ==
root->fs_info->running_transaction->transid) {
btrfs_assert_tree_locked(buf);
@@ -1121,23 +1109,27 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
spin_lock(&root->fs_info->delalloc_lock);
if (root->fs_info->dirty_metadata_bytes >= buf->len)
root->fs_info->dirty_metadata_bytes -= buf->len;
- else
- WARN_ON(1);
+ else {
+ spin_unlock(&root->fs_info->delalloc_lock);
+ btrfs_panic(root->fs_info, -EOVERFLOW,
+ "Can't clear %lu bytes from "
+ " dirty_mdatadata_bytes (%lu)",
+ buf->len,
+ root->fs_info->dirty_metadata_bytes);
+ }
spin_unlock(&root->fs_info->delalloc_lock);
}
/* ugh, clear_extent_buffer_dirty needs to lock the page */
btrfs_set_lock_blocking(buf);
- clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
- buf);
+ clear_extent_buffer_dirty(buf);
}
- return 0;
}
-static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
- u32 stripesize, struct btrfs_root *root,
- struct btrfs_fs_info *fs_info,
- u64 objectid)
+static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
+ u32 stripesize, struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
+ u64 objectid)
{
root->node = NULL;
root->commit_root = NULL;
@@ -1189,13 +1181,12 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->defrag_running = 0;
root->root_key.objectid = objectid;
root->anon_dev = 0;
- return 0;
}
-static int find_and_setup_root(struct btrfs_root *tree_root,
- struct btrfs_fs_info *fs_info,
- u64 objectid,
- struct btrfs_root *root)
+static int __must_check find_and_setup_root(struct btrfs_root *tree_root,
+ struct btrfs_fs_info *fs_info,
+ u64 objectid,
+ struct btrfs_root *root)
{
int ret;
u32 blocksize;
@@ -1208,14 +1199,15 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
&root->root_item, &root->root_key);
if (ret > 0)
return -ENOENT;
- BUG_ON(ret);
+ else if (ret < 0)
+ return ret;
generation = btrfs_root_generation(&root->root_item);
blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
root->commit_root = NULL;
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
blocksize, generation);
- if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) {
+ if (!root->node || !btrfs_buffer_uptodate(root->node, generation, 0)) {
free_extent_buffer(root->node);
root->node = NULL;
return -EIO;
@@ -1377,7 +1369,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
blocksize, generation);
root->commit_root = btrfs_root_node(root);
- BUG_ON(!root->node);
+ BUG_ON(!root->node); /* -ENOMEM */
out:
if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
root->ref_cows = 1;
@@ -1513,41 +1505,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
return 0;
}
-static int bio_ready_for_csum(struct bio *bio)
-{
- u64 length = 0;
- u64 buf_len = 0;
- u64 start = 0;
- struct page *page;
- struct extent_io_tree *io_tree = NULL;
- struct bio_vec *bvec;
- int i;
- int ret;
-
- bio_for_each_segment(bvec, bio, i) {
- page = bvec->bv_page;
- if (page->private == EXTENT_PAGE_PRIVATE) {
- length += bvec->bv_len;
- continue;
- }
- if (!page->private) {
- length += bvec->bv_len;
- continue;
- }
- length = bvec->bv_len;
- buf_len = page->private >> 2;
- start = page_offset(page) + bvec->bv_offset;
- io_tree = &BTRFS_I(page->mapping->host)->io_tree;
- }
- /* are we fully contained in this bio? */
- if (buf_len <= length)
- return 1;
-
- ret = extent_range_uptodate(io_tree, start + length,
- start + buf_len - 1);
- return ret;
-}
-
/*
* called by the kthread helper functions to finally call the bio end_io
* functions. This is where read checksum verification actually happens
@@ -1563,17 +1520,6 @@ static void end_workqueue_fn(struct btrfs_work *work)
bio = end_io_wq->bio;
fs_info = end_io_wq->info;
- /* metadata bio reads are special because the whole tree block must
- * be checksummed at once. This makes sure the entire block is in
- * ram and up to date before trying to verify things. For
- * blocksize <= pagesize, it is basically a noop
- */
- if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata &&
- !bio_ready_for_csum(bio)) {
- btrfs_queue_worker(&fs_info->endio_meta_workers,
- &end_io_wq->work);
- return;
- }
error = end_io_wq->error;
bio->bi_private = end_io_wq->private;
bio->bi_end_io = end_io_wq->end_io;
@@ -1614,9 +1560,10 @@ static int transaction_kthread(void *arg)
u64 transid;
unsigned long now;
unsigned long delay;
- int ret;
+ bool cannot_commit;
do {
+ cannot_commit = false;
delay = HZ * 30;
vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
mutex_lock(&root->fs_info->transaction_kthread_mutex);
@@ -1638,11 +1585,14 @@ static int transaction_kthread(void *arg)
transid = cur->transid;
spin_unlock(&root->fs_info->trans_lock);
+ /* If the file system is aborted, this will always fail. */
trans = btrfs_join_transaction(root);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans)) {
+ cannot_commit = true;
+ goto sleep;
+ }
if (transid == trans->transid) {
- ret = btrfs_commit_transaction(trans, root);
- BUG_ON(ret);
+ btrfs_commit_transaction(trans, root);
} else {
btrfs_end_transaction(trans, root);
}
@@ -1653,7 +1603,8 @@ sleep:
if (!try_to_freeze()) {
set_current_state(TASK_INTERRUPTIBLE);
if (!kthread_should_stop() &&
- !btrfs_transaction_blocked(root->fs_info))
+ (!btrfs_transaction_blocked(root->fs_info) ||
+ cannot_commit))
schedule_timeout(delay);
__set_current_state(TASK_RUNNING);
}
@@ -2042,6 +1993,7 @@ int open_ctree(struct super_block *sb,
RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
fs_info->btree_inode->i_mapping);
+ BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
@@ -2084,6 +2036,7 @@ int open_ctree(struct super_block *sb,
__setup_root(4096, 4096, 4096, 4096, tree_root,
fs_info, BTRFS_ROOT_TREE_OBJECTID);
+ invalidate_bdev(fs_devices->latest_bdev);
bh = btrfs_read_dev_super(fs_devices->latest_bdev);
if (!bh) {
err = -EINVAL;
@@ -2104,7 +2057,12 @@ int open_ctree(struct super_block *sb,
/* check FS state, whether FS is broken. */
fs_info->fs_state |= btrfs_super_flags(disk_super);
- btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
+ ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
+ if (ret) {
+ printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
+ err = ret;
+ goto fail_alloc;
+ }
/*
* run through our array of backup supers and setup
@@ -2135,10 +2093,55 @@ int open_ctree(struct super_block *sb,
goto fail_alloc;
}
+ if (btrfs_super_leafsize(disk_super) !=
+ btrfs_super_nodesize(disk_super)) {
+ printk(KERN_ERR "BTRFS: couldn't mount because metadata "
+ "blocksizes don't match. node %d leaf %d\n",
+ btrfs_super_nodesize(disk_super),
+ btrfs_super_leafsize(disk_super));
+ err = -EINVAL;
+ goto fail_alloc;
+ }
+ if (btrfs_super_leafsize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
+ printk(KERN_ERR "BTRFS: couldn't mount because metadata "
+ "blocksize (%d) was too large\n",
+ btrfs_super_leafsize(disk_super));
+ err = -EINVAL;
+ goto fail_alloc;
+ }
+
features = btrfs_super_incompat_flags(disk_super);
features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO)
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
+
+ /*
+ * flag our filesystem as having big metadata blocks if
+ * they are bigger than the page size
+ */
+ if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) {
+ if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
+ printk(KERN_INFO "btrfs flagging fs with big metadata feature\n");
+ features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
+ }
+
+ nodesize = btrfs_super_nodesize(disk_super);
+ leafsize = btrfs_super_leafsize(disk_super);
+ sectorsize = btrfs_super_sectorsize(disk_super);
+ stripesize = btrfs_super_stripesize(disk_super);
+
+ /*
+ * mixed block groups end up with duplicate but slightly offset
+ * extent buffers for the same range. It leads to corruptions
+ */
+ if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
+ (sectorsize != leafsize)) {
+ printk(KERN_WARNING "btrfs: unequal leaf/node/sector sizes "
+ "are not allowed for mixed block groups on %s\n",
+ sb->s_id);
+ goto fail_alloc;
+ }
+
btrfs_set_super_incompat_flags(disk_super, features);
features = btrfs_super_compat_ro_flags(disk_super) &
@@ -2242,10 +2245,6 @@ int open_ctree(struct super_block *sb,
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
4 * 1024 * 1024 / PAGE_CACHE_SIZE);
- nodesize = btrfs_super_nodesize(disk_super);
- leafsize = btrfs_super_leafsize(disk_super);
- sectorsize = btrfs_super_sectorsize(disk_super);
- stripesize = btrfs_super_stripesize(disk_super);
tree_root->nodesize = nodesize;
tree_root->leafsize = leafsize;
tree_root->sectorsize = sectorsize;
@@ -2260,9 +2259,9 @@ int open_ctree(struct super_block *sb,
goto fail_sb_buffer;
}
- if (sectorsize < PAGE_SIZE) {
- printk(KERN_WARNING "btrfs: Incompatible sector size "
- "found on %s\n", sb->s_id);
+ if (sectorsize != PAGE_SIZE) {
+ printk(KERN_WARNING "btrfs: Incompatible sector size(%lu) "
+ "found on %s\n", (unsigned long)sectorsize, sb->s_id);
goto fail_sb_buffer;
}
@@ -2285,7 +2284,7 @@ int open_ctree(struct super_block *sb,
chunk_root->node = read_tree_block(chunk_root,
btrfs_super_chunk_root(disk_super),
blocksize, generation);
- BUG_ON(!chunk_root->node);
+ BUG_ON(!chunk_root->node); /* -ENOMEM */
if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
sb->s_id);
@@ -2425,21 +2424,31 @@ retry_root_backup:
log_tree_root->node = read_tree_block(tree_root, bytenr,
blocksize,
generation + 1);
+ /* returns with log_tree_root freed on success */
ret = btrfs_recover_log_trees(log_tree_root);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_error(tree_root->fs_info, ret,
+ "Failed to recover log tree");
+ free_extent_buffer(log_tree_root->node);
+ kfree(log_tree_root);
+ goto fail_trans_kthread;
+ }
if (sb->s_flags & MS_RDONLY) {
- ret = btrfs_commit_super(tree_root);
- BUG_ON(ret);
+ ret = btrfs_commit_super(tree_root);
+ if (ret)
+ goto fail_trans_kthread;
}
}
ret = btrfs_find_orphan_roots(tree_root);
- BUG_ON(ret);
+ if (ret)
+ goto fail_trans_kthread;
if (!(sb->s_flags & MS_RDONLY)) {
ret = btrfs_cleanup_fs_roots(fs_info);
- BUG_ON(ret);
+ if (ret) {
+ }
ret = btrfs_recover_relocation(tree_root);
if (ret < 0) {
@@ -2859,6 +2868,8 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
if (total_errors > max_errors) {
printk(KERN_ERR "btrfs: %d errors while writing supers\n",
total_errors);
+
+ /* This shouldn't happen. FUA is masked off if unsupported */
BUG();
}
@@ -2875,9 +2886,9 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
}
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
if (total_errors > max_errors) {
- printk(KERN_ERR "btrfs: %d errors while writing supers\n",
- total_errors);
- BUG();
+ btrfs_error(root->fs_info, -EIO,
+ "%d errors while writing supers", total_errors);
+ return -EIO;
}
return 0;
}
@@ -2891,7 +2902,20 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
return ret;
}
-int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
+/* Kill all outstanding I/O */
+void btrfs_abort_devices(struct btrfs_root *root)
+{
+ struct list_head *head;
+ struct btrfs_device *dev;
+ mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
+ head = &root->fs_info->fs_devices->devices;
+ list_for_each_entry_rcu(dev, head, dev_list) {
+ blk_abort_queue(dev->bdev->bd_disk->queue);
+ }
+ mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+}
+
+void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
{
spin_lock(&fs_info->fs_roots_radix_lock);
radix_tree_delete(&fs_info->fs_roots_radix,
@@ -2904,7 +2928,6 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
__btrfs_remove_free_space_cache(root->free_ino_pinned);
__btrfs_remove_free_space_cache(root->free_ino_ctl);
free_fs_root(root);
- return 0;
}
static void free_fs_root(struct btrfs_root *root)
@@ -2921,7 +2944,7 @@ static void free_fs_root(struct btrfs_root *root)
kfree(root);
}
-static int del_fs_roots(struct btrfs_fs_info *fs_info)
+static void del_fs_roots(struct btrfs_fs_info *fs_info)
{
int ret;
struct btrfs_root *gang[8];
@@ -2950,7 +2973,6 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info)
for (i = 0; i < ret; i++)
btrfs_free_fs_root(fs_info, gang[i]);
}
- return 0;
}
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
@@ -2999,14 +3021,21 @@ int btrfs_commit_super(struct btrfs_root *root)
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_commit_transaction(trans, root);
- BUG_ON(ret);
+ if (ret)
+ return ret;
/* run commit again to drop the original snapshot */
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
- btrfs_commit_transaction(trans, root);
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret)
+ return ret;
ret = btrfs_write_and_wait_transaction(NULL, root);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_error(root->fs_info, ret,
+ "Failed to sync btree inode to disk.");
+ return ret;
+ }
ret = write_ctree_super(NULL, root, 0);
return ret;
@@ -3119,33 +3148,32 @@ int close_ctree(struct btrfs_root *root)
return 0;
}
-int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
+int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
+ int atomic)
{
int ret;
- struct inode *btree_inode = buf->first_page->mapping->host;
+ struct inode *btree_inode = buf->pages[0]->mapping->host;
- ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf,
- NULL);
+ ret = extent_buffer_uptodate(buf);
if (!ret)
return ret;
ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf,
- parent_transid);
+ parent_transid, atomic);
+ if (ret == -EAGAIN)
+ return ret;
return !ret;
}
int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
{
- struct inode *btree_inode = buf->first_page->mapping->host;
- return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree,
- buf);
+ return set_extent_buffer_uptodate(buf);
}
void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
{
- struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
+ struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
u64 transid = btrfs_header_generation(buf);
- struct inode *btree_inode = root->fs_info->btree_inode;
int was_dirty;
btrfs_assert_tree_locked(buf);
@@ -3157,8 +3185,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
(unsigned long long)root->fs_info->generation);
WARN_ON(1);
}
- was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
- buf);
+ was_dirty = set_extent_buffer_dirty(buf);
if (!was_dirty) {
spin_lock(&root->fs_info->delalloc_lock);
root->fs_info->dirty_metadata_bytes += buf->len;
@@ -3212,12 +3239,8 @@ void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
{
- struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
- int ret;
- ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
- if (ret == 0)
- set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
- return ret;
+ struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+ return btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
}
static int btree_lock_page_hook(struct page *page, void *data,
@@ -3225,17 +3248,21 @@ static int btree_lock_page_hook(struct page *page, void *data,
{
struct inode *inode = page->mapping->host;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_buffer *eb;
- unsigned long len;
- u64 bytenr = page_offset(page);
- if (page->private == EXTENT_PAGE_PRIVATE)
+ /*
+ * We culled this eb but the page is still hanging out on the mapping,
+ * carry on.
+ */
+ if (!PagePrivate(page))
goto out;
- len = page->private >> 2;
- eb = find_extent_buffer(io_tree, bytenr, len);
- if (!eb)
+ eb = (struct extent_buffer *)page->private;
+ if (!eb) {
+ WARN_ON(1);
+ goto out;
+ }
+ if (page != eb->pages[0])
goto out;
if (!btrfs_try_tree_write_lock(eb)) {
@@ -3254,7 +3281,6 @@ static int btree_lock_page_hook(struct page *page, void *data,
}
btrfs_tree_unlock(eb);
- free_extent_buffer(eb);
out:
if (!trylock_page(page)) {
flush_fn(data);
@@ -3263,15 +3289,23 @@ out:
return 0;
}
-static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
+static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
int read_only)
{
+ if (btrfs_super_csum_type(fs_info->super_copy) >= ARRAY_SIZE(btrfs_csum_sizes)) {
+ printk(KERN_ERR "btrfs: unsupported checksum algorithm\n");
+ return -EINVAL;
+ }
+
if (read_only)
- return;
+ return 0;
- if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+ if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
printk(KERN_WARNING "warning: mount fs with errors, "
"running btrfsck is recommended\n");
+ }
+
+ return 0;
}
int btrfs_error_commit_super(struct btrfs_root *root)
@@ -3293,7 +3327,7 @@ int btrfs_error_commit_super(struct btrfs_root *root)
return ret;
}
-static int btrfs_destroy_ordered_operations(struct btrfs_root *root)
+static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
{
struct btrfs_inode *btrfs_inode;
struct list_head splice;
@@ -3315,11 +3349,9 @@ static int btrfs_destroy_ordered_operations(struct btrfs_root *root)
spin_unlock(&root->fs_info->ordered_extent_lock);
mutex_unlock(&root->fs_info->ordered_operations_mutex);
-
- return 0;
}
-static int btrfs_destroy_ordered_extents(struct btrfs_root *root)
+static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
{
struct list_head splice;
struct btrfs_ordered_extent *ordered;
@@ -3351,12 +3383,10 @@ static int btrfs_destroy_ordered_extents(struct btrfs_root *root)
}
spin_unlock(&root->fs_info->ordered_extent_lock);
-
- return 0;
}
-static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
- struct btrfs_root *root)
+int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
+ struct btrfs_root *root)
{
struct rb_node *node;
struct btrfs_delayed_ref_root *delayed_refs;
@@ -3365,6 +3395,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
delayed_refs = &trans->delayed_refs;
+again:
spin_lock(&delayed_refs->lock);
if (delayed_refs->num_entries == 0) {
spin_unlock(&delayed_refs->lock);
@@ -3386,6 +3417,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_delayed_ref_head *head;
head = btrfs_delayed_node_to_head(ref);
+ spin_unlock(&delayed_refs->lock);
mutex_lock(&head->mutex);
kfree(head->extent_op);
delayed_refs->num_heads--;
@@ -3393,8 +3425,9 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
delayed_refs->num_heads_ready--;
list_del_init(&head->cluster);
mutex_unlock(&head->mutex);
+ btrfs_put_delayed_ref(ref);
+ goto again;
}
-
spin_unlock(&delayed_refs->lock);
btrfs_put_delayed_ref(ref);
@@ -3407,7 +3440,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
return ret;
}
-static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
+static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
{
struct btrfs_pending_snapshot *snapshot;
struct list_head splice;
@@ -3425,11 +3458,9 @@ static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
kfree(snapshot);
}
-
- return 0;
}
-static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
+static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
{
struct btrfs_inode *btrfs_inode;
struct list_head splice;
@@ -3449,8 +3480,6 @@ static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
}
spin_unlock(&root->fs_info->delalloc_lock);
-
- return 0;
}
static int btrfs_destroy_marked_extents(struct btrfs_root *root,
@@ -3541,13 +3570,43 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
return 0;
}
-static int btrfs_cleanup_transaction(struct btrfs_root *root)
+void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
+ struct btrfs_root *root)
+{
+ btrfs_destroy_delayed_refs(cur_trans, root);
+ btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
+ cur_trans->dirty_pages.dirty_bytes);
+
+ /* FIXME: cleanup wait for commit */
+ cur_trans->in_commit = 1;
+ cur_trans->blocked = 1;
+ if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
+ wake_up(&root->fs_info->transaction_blocked_wait);
+
+ cur_trans->blocked = 0;
+ if (waitqueue_active(&root->fs_info->transaction_wait))
+ wake_up(&root->fs_info->transaction_wait);
+
+ cur_trans->commit_done = 1;
+ if (waitqueue_active(&cur_trans->commit_wait))
+ wake_up(&cur_trans->commit_wait);
+
+ btrfs_destroy_pending_snapshots(cur_trans);
+
+ btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,
+ EXTENT_DIRTY);
+
+ /*
+ memset(cur_trans, 0, sizeof(*cur_trans));
+ kmem_cache_free(btrfs_transaction_cachep, cur_trans);
+ */
+}
+
+int btrfs_cleanup_transaction(struct btrfs_root *root)
{
struct btrfs_transaction *t;
LIST_HEAD(list);
- WARN_ON(1);
-
mutex_lock(&root->fs_info->transaction_kthread_mutex);
spin_lock(&root->fs_info->trans_lock);
@@ -3612,6 +3671,17 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
return 0;
}
+static int btree_writepage_io_failed_hook(struct bio *bio, struct page *page,
+ u64 start, u64 end,
+ struct extent_state *state)
+{
+ struct super_block *sb = page->mapping->host->i_sb;
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+ btrfs_error(fs_info, -EIO,
+ "Error occured while writing out btree at %llu", start);
+ return -EIO;
+}
+
static struct extent_io_ops btree_extent_io_ops = {
.write_cache_pages_lock_hook = btree_lock_page_hook,
.readpage_end_io_hook = btree_readpage_end_io_hook,
@@ -3619,4 +3689,5 @@ static struct extent_io_ops btree_extent_io_ops = {
.submit_bio_hook = btree_submit_bio_hook,
/* note we're sharing with inode.c for the merge bio hook */
.merge_bio_hook = btrfs_merge_bio_hook,
+ .writepage_io_failed_hook = btree_writepage_io_failed_hook,
};
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index e4bc4741319..ab1830aaf0e 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -44,8 +44,8 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
int mirror_num, struct extent_buffer **eb);
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize);
-int clean_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *buf);
+void clean_tree_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *buf);
int open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
char *options);
@@ -64,9 +64,10 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
-int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
+void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
-int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
+int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
+ int atomic);
int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
@@ -85,6 +86,10 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
+int btrfs_cleanup_transaction(struct btrfs_root *root);
+void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
+ struct btrfs_root *root);
+void btrfs_abort_devices(struct btrfs_root *root);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_init_lockdep(void);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 5f77166fd01..e887ee62b6d 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -193,7 +193,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
if (ret < 0)
goto fail;
- BUG_ON(ret == 0);
+ BUG_ON(ret == 0); /* Key with offset of -1 found */
if (path->slots[0] == 0) {
ret = -ENOENT;
goto fail;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f6f584fd549..49fd7b66d57 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -245,7 +245,7 @@ static int exclude_super_stripes(struct btrfs_root *root,
cache->bytes_super += stripe_len;
ret = add_excluded_extent(root, cache->key.objectid,
stripe_len);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
@@ -253,13 +253,13 @@ static int exclude_super_stripes(struct btrfs_root *root,
ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
cache->key.objectid, bytenr,
0, &logical, &nr, &stripe_len);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
while (nr--) {
cache->bytes_super += stripe_len;
ret = add_excluded_extent(root, logical[nr],
stripe_len);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
kfree(logical);
@@ -321,7 +321,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
total_added += size;
ret = btrfs_add_free_space(block_group, start,
size);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM or logic error */
start = extent_end + 1;
} else {
break;
@@ -332,7 +332,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
size = end - start;
total_added += size;
ret = btrfs_add_free_space(block_group, start, size);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM or logic error */
}
return total_added;
@@ -474,7 +474,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
int ret = 0;
caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
- BUG_ON(!caching_ctl);
+ if (!caching_ctl)
+ return -ENOMEM;
INIT_LIST_HEAD(&caching_ctl->list);
mutex_init(&caching_ctl->mutex);
@@ -528,9 +529,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
* allocate blocks for the tree root we can't do the fast caching since
* we likely hold important locks.
*/
- if (trans && (!trans->transaction->in_commit) &&
- (root && root != root->fs_info->tree_root) &&
- btrfs_test_opt(root, SPACE_CACHE)) {
+ if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
ret = load_free_space_cache(fs_info, cache);
spin_lock(&cache->lock);
@@ -982,7 +981,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
ret = btrfs_next_leaf(root, path);
if (ret < 0)
return ret;
- BUG_ON(ret > 0);
+ BUG_ON(ret > 0); /* Corruption */
leaf = path->nodes[0];
}
btrfs_item_key_to_cpu(leaf, &found_key,
@@ -1008,9 +1007,9 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
new_size + extra_size, 1);
if (ret < 0)
return ret;
- BUG_ON(ret);
+ BUG_ON(ret); /* Corruption */
- ret = btrfs_extend_item(trans, root, path, new_size);
+ btrfs_extend_item(trans, root, path, new_size);
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1478,7 +1477,11 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
err = ret;
goto out;
}
- BUG_ON(ret);
+ if (ret && !insert) {
+ err = -ENOENT;
+ goto out;
+ }
+ BUG_ON(ret); /* Corruption */
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
@@ -1592,13 +1595,13 @@ out:
* helper to add new inline back ref
*/
static noinline_for_stack
-int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_extent_inline_ref *iref,
- u64 parent, u64 root_objectid,
- u64 owner, u64 offset, int refs_to_add,
- struct btrfs_delayed_extent_op *extent_op)
+void setup_inline_extent_backref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_extent_inline_ref *iref,
+ u64 parent, u64 root_objectid,
+ u64 owner, u64 offset, int refs_to_add,
+ struct btrfs_delayed_extent_op *extent_op)
{
struct extent_buffer *leaf;
struct btrfs_extent_item *ei;
@@ -1608,7 +1611,6 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
u64 refs;
int size;
int type;
- int ret;
leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1617,7 +1619,7 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
type = extent_ref_type(parent, owner);
size = btrfs_extent_inline_ref_size(type);
- ret = btrfs_extend_item(trans, root, path, size);
+ btrfs_extend_item(trans, root, path, size);
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, ei);
@@ -1652,7 +1654,6 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
}
btrfs_mark_buffer_dirty(leaf);
- return 0;
}
static int lookup_extent_backref(struct btrfs_trans_handle *trans,
@@ -1687,12 +1688,12 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
* helper to update/remove inline back ref
*/
static noinline_for_stack
-int update_inline_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_extent_inline_ref *iref,
- int refs_to_mod,
- struct btrfs_delayed_extent_op *extent_op)
+void update_inline_extent_backref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_extent_inline_ref *iref,
+ int refs_to_mod,
+ struct btrfs_delayed_extent_op *extent_op)
{
struct extent_buffer *leaf;
struct btrfs_extent_item *ei;
@@ -1703,7 +1704,6 @@ int update_inline_extent_backref(struct btrfs_trans_handle *trans,
u32 item_size;
int size;
int type;
- int ret;
u64 refs;
leaf = path->nodes[0];
@@ -1745,10 +1745,9 @@ int update_inline_extent_backref(struct btrfs_trans_handle *trans,
memmove_extent_buffer(leaf, ptr, ptr + size,
end - ptr - size);
item_size -= size;
- ret = btrfs_truncate_item(trans, root, path, item_size, 1);
+ btrfs_truncate_item(trans, root, path, item_size, 1);
}
btrfs_mark_buffer_dirty(leaf);
- return 0;
}
static noinline_for_stack
@@ -1768,13 +1767,13 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
root_objectid, owner, offset, 1);
if (ret == 0) {
BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
- ret = update_inline_extent_backref(trans, root, path, iref,
- refs_to_add, extent_op);
+ update_inline_extent_backref(trans, root, path, iref,
+ refs_to_add, extent_op);
} else if (ret == -ENOENT) {
- ret = setup_inline_extent_backref(trans, root, path, iref,
- parent, root_objectid,
- owner, offset, refs_to_add,
- extent_op);
+ setup_inline_extent_backref(trans, root, path, iref, parent,
+ root_objectid, owner, offset,
+ refs_to_add, extent_op);
+ ret = 0;
}
return ret;
}
@@ -1804,12 +1803,12 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_extent_inline_ref *iref,
int refs_to_drop, int is_data)
{
- int ret;
+ int ret = 0;
BUG_ON(!is_data && refs_to_drop != 1);
if (iref) {
- ret = update_inline_extent_backref(trans, root, path, iref,
- -refs_to_drop, NULL);
+ update_inline_extent_backref(trans, root, path, iref,
+ -refs_to_drop, NULL);
} else if (is_data) {
ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
} else {
@@ -1835,6 +1834,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
/* Tell the block device(s) that the sectors can be discarded */
ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD,
bytenr, &num_bytes, &bbio, 0);
+ /* Error condition is -ENOMEM */
if (!ret) {
struct btrfs_bio_stripe *stripe = bbio->stripes;
int i;
@@ -1850,7 +1850,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
if (!ret)
discarded_bytes += stripe->length;
else if (ret != -EOPNOTSUPP)
- break;
+ break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
/*
* Just in case we get back EOPNOTSUPP for some reason,
@@ -1869,6 +1869,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
return ret;
}
+/* Can return -ENOMEM */
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
@@ -1944,7 +1945,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
ret = insert_extent_backref(trans, root->fs_info->extent_root,
path, bytenr, parent, root_objectid,
owner, offset, refs_to_add);
- BUG_ON(ret);
+ if (ret)
+ btrfs_abort_transaction(trans, root, ret);
out:
btrfs_free_path(path);
return err;
@@ -2031,6 +2033,9 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
int ret;
int err = 0;
+ if (trans->aborted)
+ return 0;
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -2128,7 +2133,11 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_extent_op *extent_op,
int insert_reserved)
{
- int ret;
+ int ret = 0;
+
+ if (trans->aborted)
+ return 0;
+
if (btrfs_delayed_ref_is_head(node)) {
struct btrfs_delayed_ref_head *head;
/*
@@ -2146,11 +2155,10 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
ret = btrfs_del_csums(trans, root,
node->bytenr,
node->num_bytes);
- BUG_ON(ret);
}
}
mutex_unlock(&head->mutex);
- return 0;
+ return ret;
}
if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
@@ -2197,6 +2205,10 @@ again:
return NULL;
}
+/*
+ * Returns 0 on success or if called with an already aborted transaction.
+ * Returns -ENOMEM or -EIO on failure and will abort the transaction.
+ */
static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct list_head *cluster)
@@ -2285,9 +2297,14 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
ret = run_delayed_extent_op(trans, root,
ref, extent_op);
- BUG_ON(ret);
kfree(extent_op);
+ if (ret) {
+ printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret);
+ spin_lock(&delayed_refs->lock);
+ return ret;
+ }
+
goto next;
}
@@ -2308,11 +2325,17 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
ret = run_one_delayed_ref(trans, root, ref, extent_op,
must_insert_reserved);
- BUG_ON(ret);
btrfs_put_delayed_ref(ref);
kfree(extent_op);
count++;
+
+ if (ret) {
+ printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret);
+ spin_lock(&delayed_refs->lock);
+ return ret;
+ }
+
next:
do_chunk_alloc(trans, root->fs_info->extent_root,
2 * 1024 * 1024,
@@ -2347,6 +2370,9 @@ static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
* 0, which means to process everything in the tree at the start
* of the run (but not newly added entries), or it can be some target
* number you'd like to process.
+ *
+ * Returns 0 on success or if called with an aborted transaction
+ * Returns <0 on error and aborts the transaction
*/
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count)
@@ -2362,6 +2388,10 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
unsigned long num_refs = 0;
int consider_waiting;
+ /* We'll clean this up in btrfs_cleanup_transaction */
+ if (trans->aborted)
+ return 0;
+
if (root == root->fs_info->extent_root)
root = root->fs_info->tree_root;
@@ -2419,7 +2449,11 @@ again:
}
ret = run_clustered_refs(trans, root, &cluster);
- BUG_ON(ret < 0);
+ if (ret < 0) {
+ spin_unlock(&delayed_refs->lock);
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
count -= min_t(unsigned long, ret, count);
@@ -2584,7 +2618,7 @@ static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
goto out;
- BUG_ON(ret == 0);
+ BUG_ON(ret == 0); /* Corruption */
ret = -ENOENT;
if (path->slots[0] == 0)
@@ -2738,7 +2772,6 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
}
return 0;
fail:
- BUG();
return ret;
}
@@ -2767,7 +2800,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
if (ret < 0)
goto fail;
- BUG_ON(ret);
+ BUG_ON(ret); /* Corruption */
leaf = path->nodes[0];
bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
@@ -2775,8 +2808,10 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
fail:
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
return ret;
+ }
return 0;
}
@@ -2949,7 +2984,8 @@ again:
if (last == 0) {
err = btrfs_run_delayed_refs(trans, root,
(unsigned long)-1);
- BUG_ON(err);
+ if (err) /* File system offline */
+ goto out;
}
cache = btrfs_lookup_first_block_group(root->fs_info, last);
@@ -2976,7 +3012,9 @@ again:
last = cache->key.objectid + cache->key.offset;
err = write_one_cache_group(trans, root, path, cache);
- BUG_ON(err);
+ if (err) /* File system offline */
+ goto out;
+
btrfs_put_block_group(cache);
}
@@ -2989,7 +3027,8 @@ again:
if (last == 0) {
err = btrfs_run_delayed_refs(trans, root,
(unsigned long)-1);
- BUG_ON(err);
+ if (err) /* File system offline */
+ goto out;
}
cache = btrfs_lookup_first_block_group(root->fs_info, last);
@@ -3014,20 +3053,21 @@ again:
continue;
}
- btrfs_write_out_cache(root, trans, cache, path);
+ err = btrfs_write_out_cache(root, trans, cache, path);
/*
* If we didn't have an error then the cache state is still
* NEED_WRITE, so we can set it to WRITTEN.
*/
- if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
+ if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
cache->disk_cache_state = BTRFS_DC_WRITTEN;
last = cache->key.objectid + cache->key.offset;
btrfs_put_block_group(cache);
}
+out:
btrfs_free_path(path);
- return 0;
+ return err;
}
int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
@@ -3098,11 +3138,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
- u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK;
-
- /* chunk -> extended profile */
- if (extra_flags == 0)
- extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+ u64 extra_flags = chunk_to_extended(flags) &
+ BTRFS_EXTENDED_PROFILE_MASK;
if (flags & BTRFS_BLOCK_GROUP_DATA)
fs_info->avail_data_alloc_bits |= extra_flags;
@@ -3113,6 +3150,34 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
}
/*
+ * returns target flags in extended format or 0 if restripe for this
+ * chunk_type is not in progress
+ *
+ * should be called with either volume_mutex or balance_lock held
+ */
+static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
+{
+ struct btrfs_balance_control *bctl = fs_info->balance_ctl;
+ u64 target = 0;
+
+ if (!bctl)
+ return 0;
+
+ if (flags & BTRFS_BLOCK_GROUP_DATA &&
+ bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
+ target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
+ } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
+ bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
+ target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
+ } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
+ bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
+ target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
+ }
+
+ return target;
+}
+
+/*
* @flags: available profiles in extended format (see ctree.h)
*
* Returns reduced profile in chunk format. If profile changing is in
@@ -3128,31 +3193,19 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
*/
u64 num_devices = root->fs_info->fs_devices->rw_devices +
root->fs_info->fs_devices->missing_devices;
+ u64 target;
- /* pick restriper's target profile if it's available */
+ /*
+ * see if restripe for this chunk_type is in progress, if so
+ * try to reduce to the target profile
+ */
spin_lock(&root->fs_info->balance_lock);
- if (root->fs_info->balance_ctl) {
- struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
- u64 tgt = 0;
-
- if ((flags & BTRFS_BLOCK_GROUP_DATA) &&
- (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
- (flags & bctl->data.target)) {
- tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
- } else if ((flags & BTRFS_BLOCK_GROUP_SYSTEM) &&
- (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
- (flags & bctl->sys.target)) {
- tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
- } else if ((flags & BTRFS_BLOCK_GROUP_METADATA) &&
- (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
- (flags & bctl->meta.target)) {
- tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
- }
-
- if (tgt) {
+ target = get_restripe_target(root->fs_info, flags);
+ if (target) {
+ /* pick target profile only if it's already available */
+ if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
spin_unlock(&root->fs_info->balance_lock);
- flags = tgt;
- goto out;
+ return extended_to_chunk(target);
}
}
spin_unlock(&root->fs_info->balance_lock);
@@ -3180,10 +3233,7 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
flags &= ~BTRFS_BLOCK_GROUP_RAID0;
}
-out:
- /* extended -> chunk profile */
- flags &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
- return flags;
+ return extended_to_chunk(flags);
}
static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
@@ -3312,8 +3362,7 @@ commit_trans:
}
data_sinfo->bytes_may_use += bytes;
trace_btrfs_space_reservation(root->fs_info, "space_info",
- (u64)(unsigned long)data_sinfo,
- bytes, 1);
+ data_sinfo->flags, bytes, 1);
spin_unlock(&data_sinfo->lock);
return 0;
@@ -3334,8 +3383,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
spin_lock(&data_sinfo->lock);
data_sinfo->bytes_may_use -= bytes;
trace_btrfs_space_reservation(root->fs_info, "space_info",
- (u64)(unsigned long)data_sinfo,
- bytes, 0);
+ data_sinfo->flags, bytes, 0);
spin_unlock(&data_sinfo->lock);
}
@@ -3396,6 +3444,50 @@ static int should_alloc_chunk(struct btrfs_root *root,
return 1;
}
+static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type)
+{
+ u64 num_dev;
+
+ if (type & BTRFS_BLOCK_GROUP_RAID10 ||
+ type & BTRFS_BLOCK_GROUP_RAID0)
+ num_dev = root->fs_info->fs_devices->rw_devices;
+ else if (type & BTRFS_BLOCK_GROUP_RAID1)
+ num_dev = 2;
+ else
+ num_dev = 1; /* DUP or single */
+
+ /* metadata for updaing devices and chunk tree */
+ return btrfs_calc_trans_metadata_size(root, num_dev + 1);
+}
+
+static void check_system_chunk(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 type)
+{
+ struct btrfs_space_info *info;
+ u64 left;
+ u64 thresh;
+
+ info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
+ spin_lock(&info->lock);
+ left = info->total_bytes - info->bytes_used - info->bytes_pinned -
+ info->bytes_reserved - info->bytes_readonly;
+ spin_unlock(&info->lock);
+
+ thresh = get_system_chunk_thresh(root, type);
+ if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
+ printk(KERN_INFO "left=%llu, need=%llu, flags=%llu\n",
+ left, thresh, type);
+ dump_space_info(info, 0, 0);
+ }
+
+ if (left < thresh) {
+ u64 flags;
+
+ flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
+ btrfs_alloc_chunk(trans, root, flags);
+ }
+}
+
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force)
@@ -3405,15 +3497,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
int wait_for_alloc = 0;
int ret = 0;
- BUG_ON(!profile_is_valid(flags, 0));
-
space_info = __find_space_info(extent_root->fs_info, flags);
if (!space_info) {
ret = update_space_info(extent_root->fs_info, flags,
0, 0, &space_info);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
- BUG_ON(!space_info);
+ BUG_ON(!space_info); /* Logic error */
again:
spin_lock(&space_info->lock);
@@ -3468,6 +3558,12 @@ again:
force_metadata_allocation(fs_info);
}
+ /*
+ * Check if we have enough space in SYSTEM chunk because we may need
+ * to update devices.
+ */
+ check_system_chunk(trans, extent_root, flags);
+
ret = btrfs_alloc_chunk(trans, extent_root, flags);
if (ret < 0 && ret != -ENOSPC)
goto out;
@@ -3675,9 +3771,8 @@ again:
*/
if (current->journal_info)
return -EAGAIN;
- ret = wait_event_interruptible(space_info->wait,
- !space_info->flush);
- /* Must have been interrupted, return */
+ ret = wait_event_killable(space_info->wait, !space_info->flush);
+ /* Must have been killed, return */
if (ret)
return -EINTR;
@@ -3700,9 +3795,7 @@ again:
if (used + orig_bytes <= space_info->total_bytes) {
space_info->bytes_may_use += orig_bytes;
trace_btrfs_space_reservation(root->fs_info,
- "space_info",
- (u64)(unsigned long)space_info,
- orig_bytes, 1);
+ "space_info", space_info->flags, orig_bytes, 1);
ret = 0;
} else {
/*
@@ -3771,9 +3864,7 @@ again:
if (used + num_bytes < space_info->total_bytes + avail) {
space_info->bytes_may_use += orig_bytes;
trace_btrfs_space_reservation(root->fs_info,
- "space_info",
- (u64)(unsigned long)space_info,
- orig_bytes, 1);
+ "space_info", space_info->flags, orig_bytes, 1);
ret = 0;
} else {
wait_ordered = true;
@@ -3836,8 +3927,9 @@ out:
return ret;
}
-static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
+static struct btrfs_block_rsv *get_block_rsv(
+ const struct btrfs_trans_handle *trans,
+ const struct btrfs_root *root)
{
struct btrfs_block_rsv *block_rsv = NULL;
@@ -3918,8 +4010,7 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
spin_lock(&space_info->lock);
space_info->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
- (u64)(unsigned long)space_info,
- num_bytes, 0);
+ space_info->flags, num_bytes, 0);
space_info->reservation_progress++;
spin_unlock(&space_info->lock);
}
@@ -4123,8 +4214,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
num_bytes = calc_global_metadata_size(fs_info);
- spin_lock(&block_rsv->lock);
spin_lock(&sinfo->lock);
+ spin_lock(&block_rsv->lock);
block_rsv->size = num_bytes;
@@ -4137,21 +4228,21 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
block_rsv->reserved += num_bytes;
sinfo->bytes_may_use += num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
- (u64)(unsigned long)sinfo, num_bytes, 1);
+ sinfo->flags, num_bytes, 1);
}
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
sinfo->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
- (u64)(unsigned long)sinfo, num_bytes, 0);
+ sinfo->flags, num_bytes, 0);
sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
}
- spin_unlock(&sinfo->lock);
spin_unlock(&block_rsv->lock);
+ spin_unlock(&sinfo->lock);
}
static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
@@ -4198,12 +4289,12 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
return;
trace_btrfs_space_reservation(root->fs_info, "transaction",
- (u64)(unsigned long)trans,
- trans->bytes_reserved, 0);
+ trans->transid, trans->bytes_reserved, 0);
btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
trans->bytes_reserved = 0;
}
+/* Can only return 0 or -ENOSPC */
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
struct inode *inode)
{
@@ -4540,7 +4631,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
while (total) {
cache = btrfs_lookup_block_group(info, bytenr);
if (!cache)
- return -1;
+ return -ENOENT;
if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10))
@@ -4643,7 +4734,7 @@ int btrfs_pin_extent(struct btrfs_root *root,
struct btrfs_block_group_cache *cache;
cache = btrfs_lookup_block_group(root->fs_info, bytenr);
- BUG_ON(!cache);
+ BUG_ON(!cache); /* Logic error */
pin_down_extent(root, cache, bytenr, num_bytes, reserved);
@@ -4661,7 +4752,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *cache;
cache = btrfs_lookup_block_group(root->fs_info, bytenr);
- BUG_ON(!cache);
+ BUG_ON(!cache); /* Logic error */
/*
* pull in the free space cache (if any) so that our pin
@@ -4706,6 +4797,7 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
{
struct btrfs_space_info *space_info = cache->space_info;
int ret = 0;
+
spin_lock(&space_info->lock);
spin_lock(&cache->lock);
if (reserve != RESERVE_FREE) {
@@ -4716,9 +4808,8 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
space_info->bytes_reserved += num_bytes;
if (reserve == RESERVE_ALLOC) {
trace_btrfs_space_reservation(cache->fs_info,
- "space_info",
- (u64)(unsigned long)space_info,
- num_bytes, 0);
+ "space_info", space_info->flags,
+ num_bytes, 0);
space_info->bytes_may_use -= num_bytes;
}
}
@@ -4734,7 +4825,7 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
return ret;
}
-int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
+void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4764,7 +4855,6 @@ int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
up_write(&fs_info->extent_commit_sem);
update_global_block_rsv(fs_info);
- return 0;
}
static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
@@ -4779,7 +4869,7 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
if (cache)
btrfs_put_block_group(cache);
cache = btrfs_lookup_block_group(fs_info, start);
- BUG_ON(!cache);
+ BUG_ON(!cache); /* Logic error */
}
len = cache->key.objectid + cache->key.offset - start;
@@ -4816,6 +4906,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
u64 end;
int ret;
+ if (trans->aborted)
+ return 0;
+
if (fs_info->pinned_extents == &fs_info->freed_extents[0])
unpin = &fs_info->freed_extents[1];
else
@@ -4901,7 +4994,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
ret = remove_extent_backref(trans, extent_root, path,
NULL, refs_to_drop,
is_data);
- BUG_ON(ret);
+ if (ret)
+ goto abort;
btrfs_release_path(path);
path->leave_spinning = 1;
@@ -4919,10 +5013,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
btrfs_print_leaf(extent_root,
path->nodes[0]);
}
- BUG_ON(ret);
+ if (ret < 0)
+ goto abort;
extent_slot = path->slots[0];
}
- } else {
+ } else if (ret == -ENOENT) {
btrfs_print_leaf(extent_root, path->nodes[0]);
WARN_ON(1);
printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
@@ -4932,6 +5027,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
(unsigned long long)root_objectid,
(unsigned long long)owner_objectid,
(unsigned long long)owner_offset);
+ } else {
+ goto abort;
}
leaf = path->nodes[0];
@@ -4941,7 +5038,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
BUG_ON(found_extent || extent_slot != path->slots[0]);
ret = convert_extent_item_v0(trans, extent_root, path,
owner_objectid, 0);
- BUG_ON(ret < 0);
+ if (ret < 0)
+ goto abort;
btrfs_release_path(path);
path->leave_spinning = 1;
@@ -4958,7 +5056,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
(unsigned long long)bytenr);
btrfs_print_leaf(extent_root, path->nodes[0]);
}
- BUG_ON(ret);
+ if (ret < 0)
+ goto abort;
extent_slot = path->slots[0];
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, extent_slot);
@@ -4995,7 +5094,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
ret = remove_extent_backref(trans, extent_root, path,
iref, refs_to_drop,
is_data);
- BUG_ON(ret);
+ if (ret)
+ goto abort;
}
} else {
if (found_extent) {
@@ -5012,23 +5112,27 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
num_to_del);
- BUG_ON(ret);
+ if (ret)
+ goto abort;
btrfs_release_path(path);
if (is_data) {
ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
- BUG_ON(ret);
- } else {
- invalidate_mapping_pages(info->btree_inode->i_mapping,
- bytenr >> PAGE_CACHE_SHIFT,
- (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT);
+ if (ret)
+ goto abort;
}
ret = update_block_group(trans, root, bytenr, num_bytes, 0);
- BUG_ON(ret);
+ if (ret)
+ goto abort;
}
+out:
btrfs_free_path(path);
return ret;
+
+abort:
+ btrfs_abort_transaction(trans, extent_root, ret);
+ goto out;
}
/*
@@ -5124,7 +5228,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
parent, root->root_key.objectid,
btrfs_header_level(buf),
BTRFS_DROP_DELAYED_REF, NULL, for_cow);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
if (!last_ref)
@@ -5158,6 +5262,7 @@ out:
btrfs_put_block_group(cache);
}
+/* Can return -ENOMEM */
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
u64 owner, u64 offset, int for_cow)
@@ -5179,14 +5284,12 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
num_bytes,
parent, root_objectid, (int)owner,
BTRFS_DROP_DELAYED_REF, NULL, for_cow);
- BUG_ON(ret);
} else {
ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, owner,
offset, BTRFS_DROP_DELAYED_REF,
NULL, for_cow);
- BUG_ON(ret);
}
return ret;
}
@@ -5243,28 +5346,34 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
return 0;
}
-static int get_block_group_index(struct btrfs_block_group_cache *cache)
+static int __get_block_group_index(u64 flags)
{
int index;
- if (cache->flags & BTRFS_BLOCK_GROUP_RAID10)
+
+ if (flags & BTRFS_BLOCK_GROUP_RAID10)
index = 0;
- else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1)
+ else if (flags & BTRFS_BLOCK_GROUP_RAID1)
index = 1;
- else if (cache->flags & BTRFS_BLOCK_GROUP_DUP)
+ else if (flags & BTRFS_BLOCK_GROUP_DUP)
index = 2;
- else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0)
+ else if (flags & BTRFS_BLOCK_GROUP_RAID0)
index = 3;
else
index = 4;
+
return index;
}
+static int get_block_group_index(struct btrfs_block_group_cache *cache)
+{
+ return __get_block_group_index(cache->flags);
+}
+
enum btrfs_loop_type {
- LOOP_FIND_IDEAL = 0,
- LOOP_CACHING_NOWAIT = 1,
- LOOP_CACHING_WAIT = 2,
- LOOP_ALLOC_CHUNK = 3,
- LOOP_NO_EMPTY_SIZE = 4,
+ LOOP_CACHING_NOWAIT = 0,
+ LOOP_CACHING_WAIT = 1,
+ LOOP_ALLOC_CHUNK = 2,
+ LOOP_NO_EMPTY_SIZE = 3,
};
/*
@@ -5278,7 +5387,6 @@ enum btrfs_loop_type {
static noinline int find_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *orig_root,
u64 num_bytes, u64 empty_size,
- u64 search_start, u64 search_end,
u64 hint_byte, struct btrfs_key *ins,
u64 data)
{
@@ -5287,6 +5395,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_free_cluster *last_ptr = NULL;
struct btrfs_block_group_cache *block_group = NULL;
struct btrfs_block_group_cache *used_block_group;
+ u64 search_start = 0;
int empty_cluster = 2 * 1024 * 1024;
int allowed_chunk_alloc = 0;
int done_chunk_alloc = 0;
@@ -5300,8 +5409,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
bool failed_alloc = false;
bool use_cluster = true;
bool have_caching_bg = false;
- u64 ideal_cache_percent = 0;
- u64 ideal_cache_offset = 0;
WARN_ON(num_bytes < root->sectorsize);
btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -5351,7 +5458,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
empty_cluster = 0;
if (search_start == hint_byte) {
-ideal_cache:
block_group = btrfs_lookup_block_group(root->fs_info,
search_start);
used_block_group = block_group;
@@ -5363,8 +5469,7 @@ ideal_cache:
* picked out then we don't care that the block group is cached.
*/
if (block_group && block_group_bits(block_group, data) &&
- (block_group->cached != BTRFS_CACHE_NO ||
- search_start == ideal_cache_offset)) {
+ block_group->cached != BTRFS_CACHE_NO) {
down_read(&space_info->groups_sem);
if (list_empty(&block_group->list) ||
block_group->ro) {
@@ -5418,44 +5523,13 @@ search:
have_block_group:
cached = block_group_cache_done(block_group);
if (unlikely(!cached)) {
- u64 free_percent;
-
found_uncached_bg = true;
ret = cache_block_group(block_group, trans,
- orig_root, 1);
- if (block_group->cached == BTRFS_CACHE_FINISHED)
- goto alloc;
-
- free_percent = btrfs_block_group_used(&block_group->item);
- free_percent *= 100;
- free_percent = div64_u64(free_percent,
- block_group->key.offset);
- free_percent = 100 - free_percent;
- if (free_percent > ideal_cache_percent &&
- likely(!block_group->ro)) {
- ideal_cache_offset = block_group->key.objectid;
- ideal_cache_percent = free_percent;
- }
-
- /*
- * The caching workers are limited to 2 threads, so we
- * can queue as much work as we care to.
- */
- if (loop > LOOP_FIND_IDEAL) {
- ret = cache_block_group(block_group, trans,
- orig_root, 0);
- BUG_ON(ret);
- }
-
- /*
- * If loop is set for cached only, try the next block
- * group.
- */
- if (loop == LOOP_FIND_IDEAL)
- goto loop;
+ orig_root, 0);
+ BUG_ON(ret < 0);
+ ret = 0;
}
-alloc:
if (unlikely(block_group->ro))
goto loop;
@@ -5606,11 +5680,6 @@ unclustered_alloc:
}
checks:
search_start = stripe_align(root, offset);
- /* move on to the next group */
- if (search_start + num_bytes >= search_end) {
- btrfs_add_free_space(used_block_group, offset, num_bytes);
- goto loop;
- }
/* move on to the next group */
if (search_start + num_bytes >
@@ -5661,9 +5730,7 @@ loop:
if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
goto search;
- /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for
- * for them to make caching progress. Also
- * determine the best possible bg to cache
+ /*
* LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
* caching kthreads as we move along
* LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
@@ -5673,50 +5740,17 @@ loop:
*/
if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
index = 0;
- if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
- found_uncached_bg = false;
- loop++;
- if (!ideal_cache_percent)
- goto search;
-
- /*
- * 1 of the following 2 things have happened so far
- *
- * 1) We found an ideal block group for caching that
- * is mostly full and will cache quickly, so we might
- * as well wait for it.
- *
- * 2) We searched for cached only and we didn't find
- * anything, and we didn't start any caching kthreads
- * either, so chances are we will loop through and
- * start a couple caching kthreads, and then come back
- * around and just wait for them. This will be slower
- * because we will have 2 caching kthreads reading at
- * the same time when we could have just started one
- * and waited for it to get far enough to give us an
- * allocation, so go ahead and go to the wait caching
- * loop.
- */
- loop = LOOP_CACHING_WAIT;
- search_start = ideal_cache_offset;
- ideal_cache_percent = 0;
- goto ideal_cache;
- } else if (loop == LOOP_FIND_IDEAL) {
- /*
- * Didn't find a uncached bg, wait on anything we find
- * next.
- */
- loop = LOOP_CACHING_WAIT;
- goto search;
- }
-
loop++;
-
if (loop == LOOP_ALLOC_CHUNK) {
if (allowed_chunk_alloc) {
ret = do_chunk_alloc(trans, root, num_bytes +
2 * 1024 * 1024, data,
CHUNK_ALLOC_LIMITED);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans,
+ root, ret);
+ goto out;
+ }
allowed_chunk_alloc = 0;
if (ret == 1)
done_chunk_alloc = 1;
@@ -5745,6 +5779,7 @@ loop:
} else if (ins->objectid) {
ret = 0;
}
+out:
return ret;
}
@@ -5798,12 +5833,10 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
- u64 search_end, struct btrfs_key *ins,
- u64 data)
+ struct btrfs_key *ins, u64 data)
{
bool final_tried = false;
int ret;
- u64 search_start = 0;
data = btrfs_get_alloc_profile(root, data);
again:
@@ -5811,23 +5844,31 @@ again:
* the only place that sets empty_size is btrfs_realloc_node, which
* is not called recursively on allocations
*/
- if (empty_size || root->ref_cows)
+ if (empty_size || root->ref_cows) {
ret = do_chunk_alloc(trans, root->fs_info->extent_root,
num_bytes + 2 * 1024 * 1024, data,
CHUNK_ALLOC_NO_FORCE);
+ if (ret < 0 && ret != -ENOSPC) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
+ }
WARN_ON(num_bytes < root->sectorsize);
ret = find_free_extent(trans, root, num_bytes, empty_size,
- search_start, search_end, hint_byte,
- ins, data);
+ hint_byte, ins, data);
if (ret == -ENOSPC) {
if (!final_tried) {
num_bytes = num_bytes >> 1;
num_bytes = num_bytes & ~(root->sectorsize - 1);
num_bytes = max(num_bytes, min_alloc_size);
- do_chunk_alloc(trans, root->fs_info->extent_root,
+ ret = do_chunk_alloc(trans, root->fs_info->extent_root,
num_bytes, data, CHUNK_ALLOC_FORCE);
+ if (ret < 0 && ret != -ENOSPC) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
if (num_bytes == min_alloc_size)
final_tried = true;
goto again;
@@ -5838,7 +5879,8 @@ again:
printk(KERN_ERR "btrfs allocation failed flags %llu, "
"wanted %llu\n", (unsigned long long)data,
(unsigned long long)num_bytes);
- dump_space_info(sinfo, num_bytes, 1);
+ if (sinfo)
+ dump_space_info(sinfo, num_bytes, 1);
}
}
@@ -5917,7 +5959,10 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
ins, size);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_free_path(path);
+ return ret;
+ }
leaf = path->nodes[0];
extent_item = btrfs_item_ptr(leaf, path->slots[0],
@@ -5947,7 +5992,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_free_path(path);
ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
- if (ret) {
+ if (ret) { /* -ENOENT, logic error */
printk(KERN_ERR "btrfs update block group failed for %llu "
"%llu\n", (unsigned long long)ins->objectid,
(unsigned long long)ins->offset);
@@ -5978,7 +6023,10 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
ins, size);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_free_path(path);
+ return ret;
+ }
leaf = path->nodes[0];
extent_item = btrfs_item_ptr(leaf, path->slots[0],
@@ -6008,7 +6056,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
btrfs_free_path(path);
ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
- if (ret) {
+ if (ret) { /* -ENOENT, logic error */
printk(KERN_ERR "btrfs update block group failed for %llu "
"%llu\n", (unsigned long long)ins->objectid,
(unsigned long long)ins->offset);
@@ -6056,28 +6104,28 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
if (!caching_ctl) {
BUG_ON(!block_group_cache_done(block_group));
ret = btrfs_remove_free_space(block_group, start, num_bytes);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
} else {
mutex_lock(&caching_ctl->mutex);
if (start >= caching_ctl->progress) {
ret = add_excluded_extent(root, start, num_bytes);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
} else if (start + num_bytes <= caching_ctl->progress) {
ret = btrfs_remove_free_space(block_group,
start, num_bytes);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
} else {
num_bytes = caching_ctl->progress - start;
ret = btrfs_remove_free_space(block_group,
start, num_bytes);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
start = caching_ctl->progress;
num_bytes = ins->objectid + ins->offset -
caching_ctl->progress;
ret = add_excluded_extent(root, start, num_bytes);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
mutex_unlock(&caching_ctl->mutex);
@@ -6086,7 +6134,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
ret = btrfs_update_reserved_bytes(block_group, ins->offset,
RESERVE_ALLOC_NO_ACCOUNT);
- BUG_ON(ret);
+ BUG_ON(ret); /* logic error */
btrfs_put_block_group(block_group);
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
0, owner, offset, ins, 1);
@@ -6107,6 +6155,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
btrfs_tree_lock(buf);
clean_tree_block(trans, root, buf);
+ clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
btrfs_set_lock_blocking(buf);
btrfs_set_buffer_uptodate(buf);
@@ -6214,7 +6263,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
return ERR_CAST(block_rsv);
ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
- empty_size, hint, (u64)-1, &ins, 0);
+ empty_size, hint, &ins, 0);
if (ret) {
unuse_block_rsv(root->fs_info, block_rsv, blocksize);
return ERR_PTR(ret);
@@ -6222,7 +6271,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
buf = btrfs_init_new_buffer(trans, root, ins.objectid,
blocksize, level);
- BUG_ON(IS_ERR(buf));
+ BUG_ON(IS_ERR(buf)); /* -ENOMEM */
if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
if (parent == 0)
@@ -6234,7 +6283,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
struct btrfs_delayed_extent_op *extent_op;
extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
- BUG_ON(!extent_op);
+ BUG_ON(!extent_op); /* -ENOMEM */
if (key)
memcpy(&extent_op->key, key, sizeof(extent_op->key));
else
@@ -6249,7 +6298,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
ins.offset, parent, root_objectid,
level, BTRFS_ADD_DELAYED_EXTENT,
extent_op, for_cow);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
return buf;
}
@@ -6319,7 +6368,9 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
/* We don't lock the tree block, it's OK to be racy here */
ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
&refs, &flags);
- BUG_ON(ret);
+ /* We don't care about errors in readahead. */
+ if (ret < 0)
+ continue;
BUG_ON(refs == 0);
if (wc->stage == DROP_REFERENCE) {
@@ -6386,7 +6437,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
eb->start, eb->len,
&wc->refs[level],
&wc->flags[level]);
- BUG_ON(ret);
+ BUG_ON(ret == -ENOMEM);
+ if (ret)
+ return ret;
BUG_ON(wc->refs[level] == 0);
}
@@ -6405,12 +6458,12 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
if (!(wc->flags[level] & flag)) {
BUG_ON(!path->locks[level]);
ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
eb->len, flag, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
wc->flags[level] |= flag;
}
@@ -6482,7 +6535,11 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
&wc->refs[level - 1],
&wc->flags[level - 1]);
- BUG_ON(ret);
+ if (ret < 0) {
+ btrfs_tree_unlock(next);
+ return ret;
+ }
+
BUG_ON(wc->refs[level - 1] == 0);
*lookup_info = 0;
@@ -6511,7 +6568,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
goto skip;
}
- if (!btrfs_buffer_uptodate(next, generation)) {
+ if (!btrfs_buffer_uptodate(next, generation, 0)) {
btrfs_tree_unlock(next);
free_extent_buffer(next);
next = NULL;
@@ -6551,7 +6608,7 @@ skip:
ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
root->root_key.objectid, level - 1, 0, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
btrfs_tree_unlock(next);
free_extent_buffer(next);
@@ -6609,7 +6666,10 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
eb->start, eb->len,
&wc->refs[level],
&wc->flags[level]);
- BUG_ON(ret);
+ if (ret < 0) {
+ btrfs_tree_unlock_rw(eb, path->locks[level]);
+ return ret;
+ }
BUG_ON(wc->refs[level] == 0);
if (wc->refs[level] == 1) {
btrfs_tree_unlock_rw(eb, path->locks[level]);
@@ -6629,7 +6689,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
else
ret = btrfs_dec_ref(trans, root, eb, 0,
wc->for_reloc);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
/* make block locked assertion in clean_tree_block happy */
if (!path->locks[level] &&
@@ -6738,7 +6798,7 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
* also make sure backrefs for the shared block and all lower level
* blocks are properly updated.
*/
-void btrfs_drop_snapshot(struct btrfs_root *root,
+int btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, int update_ref,
int for_reloc)
{
@@ -6766,7 +6826,10 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
}
trans = btrfs_start_transaction(tree_root, 0);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ goto out_free;
+ }
if (block_rsv)
trans->block_rsv = block_rsv;
@@ -6791,7 +6854,7 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
path->lowest_level = 0;
if (ret < 0) {
err = ret;
- goto out_free;
+ goto out_end_trans;
}
WARN_ON(ret > 0);
@@ -6811,7 +6874,10 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
path->nodes[level]->len,
&wc->refs[level],
&wc->flags[level]);
- BUG_ON(ret);
+ if (ret < 0) {
+ err = ret;
+ goto out_end_trans;
+ }
BUG_ON(wc->refs[level] == 0);
if (level == root_item->drop_level)
@@ -6862,26 +6928,40 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
ret = btrfs_update_root(trans, tree_root,
&root->root_key,
root_item);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, tree_root, ret);
+ err = ret;
+ goto out_end_trans;
+ }
btrfs_end_transaction_throttle(trans, tree_root);
trans = btrfs_start_transaction(tree_root, 0);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ goto out_free;
+ }
if (block_rsv)
trans->block_rsv = block_rsv;
}
}
btrfs_release_path(path);
- BUG_ON(err);
+ if (err)
+ goto out_end_trans;
ret = btrfs_del_root(trans, tree_root, &root->root_key);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, tree_root, ret);
+ goto out_end_trans;
+ }
if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_find_last_root(tree_root, root->root_key.objectid,
NULL, NULL);
- BUG_ON(ret < 0);
- if (ret > 0) {
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, tree_root, ret);
+ err = ret;
+ goto out_end_trans;
+ } else if (ret > 0) {
/* if we fail to delete the orphan item this time
* around, it'll get picked up the next time.
*
@@ -6899,14 +6979,15 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
free_extent_buffer(root->commit_root);
kfree(root);
}
-out_free:
+out_end_trans:
btrfs_end_transaction_throttle(trans, tree_root);
+out_free:
kfree(wc);
btrfs_free_path(path);
out:
if (err)
btrfs_std_error(root->fs_info, err);
- return;
+ return err;
}
/*
@@ -6983,31 +7064,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
{
u64 num_devices;
- u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
- BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
-
- if (root->fs_info->balance_ctl) {
- struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
- u64 tgt = 0;
-
- /* pick restriper's target profile and return */
- if (flags & BTRFS_BLOCK_GROUP_DATA &&
- bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
- tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
- } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
- bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
- tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
- } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
- bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
- tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
- }
+ u64 stripped;
- if (tgt) {
- /* extended -> chunk profile */
- tgt &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
- return tgt;
- }
- }
+ /*
+ * if restripe for this chunk_type is on pick target profile and
+ * return, otherwise do the usual balance
+ */
+ stripped = get_restripe_target(root->fs_info, flags);
+ if (stripped)
+ return extended_to_chunk(stripped);
/*
* we add in the count of missing devices because we want
@@ -7017,6 +7082,9 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
num_devices = root->fs_info->fs_devices->rw_devices +
root->fs_info->fs_devices->missing_devices;
+ stripped = BTRFS_BLOCK_GROUP_RAID0 |
+ BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
+
if (num_devices == 1) {
stripped |= BTRFS_BLOCK_GROUP_DUP;
stripped = flags & ~stripped;
@@ -7029,7 +7097,6 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10))
return stripped | BTRFS_BLOCK_GROUP_DUP;
- return flags;
} else {
/* they already had raid on here, just return */
if (flags & stripped)
@@ -7042,9 +7109,9 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
if (flags & BTRFS_BLOCK_GROUP_DUP)
return stripped | BTRFS_BLOCK_GROUP_RAID1;
- /* turn single device chunks into raid0 */
- return stripped | BTRFS_BLOCK_GROUP_RAID0;
+ /* this is drive concat, leave it alone */
}
+
return flags;
}
@@ -7103,12 +7170,16 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
BUG_ON(cache->ro);
trans = btrfs_join_transaction(root);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
alloc_flags = update_block_group_flags(root, cache->flags);
- if (alloc_flags != cache->flags)
- do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
- CHUNK_ALLOC_FORCE);
+ if (alloc_flags != cache->flags) {
+ ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+ CHUNK_ALLOC_FORCE);
+ if (ret < 0)
+ goto out;
+ }
ret = set_block_group_ro(cache, 0);
if (!ret)
@@ -7188,7 +7259,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
return free_bytes;
}
-int btrfs_set_block_group_rw(struct btrfs_root *root,
+void btrfs_set_block_group_rw(struct btrfs_root *root,
struct btrfs_block_group_cache *cache)
{
struct btrfs_space_info *sinfo = cache->space_info;
@@ -7204,7 +7275,6 @@ int btrfs_set_block_group_rw(struct btrfs_root *root,
cache->ro = 0;
spin_unlock(&cache->lock);
spin_unlock(&sinfo->lock);
- return 0;
}
/*
@@ -7222,6 +7292,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
u64 min_free;
u64 dev_min = 1;
u64 dev_nr = 0;
+ u64 target;
int index;
int full = 0;
int ret = 0;
@@ -7262,13 +7333,11 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
/*
* ok we don't have enough space, but maybe we have free space on our
* devices to allocate new chunks for relocation, so loop through our
- * alloc devices and guess if we have enough space. However, if we
- * were marked as full, then we know there aren't enough chunks, and we
- * can just return.
+ * alloc devices and guess if we have enough space. if this block
+ * group is going to be restriped, run checks against the target
+ * profile instead of the current one.
*/
ret = -1;
- if (full)
- goto out;
/*
* index:
@@ -7278,7 +7347,20 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
* 3: raid0
* 4: single
*/
- index = get_block_group_index(block_group);
+ target = get_restripe_target(root->fs_info, block_group->flags);
+ if (target) {
+ index = __get_block_group_index(extended_to_chunk(target));
+ } else {
+ /*
+ * this is just a balance, so if we were marked as full
+ * we know there is no space for a new chunk
+ */
+ if (full)
+ goto out;
+
+ index = get_block_group_index(block_group);
+ }
+
if (index == 0) {
dev_min = 4;
/* Divide by 2 */
@@ -7572,7 +7654,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
ret = update_space_info(info, cache->flags, found_key.offset,
btrfs_block_group_used(&cache->item),
&space_info);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
cache->space_info = space_info;
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_readonly += cache->bytes_super;
@@ -7581,7 +7663,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
__link_block_group(space_info, cache);
ret = btrfs_add_block_group_cache(root->fs_info, cache);
- BUG_ON(ret);
+ BUG_ON(ret); /* Logic error */
set_avail_alloc_bits(root->fs_info, cache->flags);
if (btrfs_chunk_readonly(root, cache->key.objectid))
@@ -7663,7 +7745,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
&cache->space_info);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
update_global_block_rsv(root->fs_info);
spin_lock(&cache->space_info->lock);
@@ -7673,11 +7755,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
__link_block_group(cache->space_info, cache);
ret = btrfs_add_block_group_cache(root->fs_info, cache);
- BUG_ON(ret);
+ BUG_ON(ret); /* Logic error */
ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
sizeof(cache->item));
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, extent_root, ret);
+ return ret;
+ }
set_avail_alloc_bits(extent_root->fs_info, type);
@@ -7686,11 +7771,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
- u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK;
-
- /* chunk -> extended profile */
- if (extra_flags == 0)
- extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+ u64 extra_flags = chunk_to_extended(flags) &
+ BTRFS_EXTENDED_PROFILE_MASK;
if (flags & BTRFS_BLOCK_GROUP_DATA)
fs_info->avail_data_alloc_bits &= ~extra_flags;
@@ -7758,7 +7840,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
inode = lookup_free_space_inode(tree_root, block_group, path);
if (!IS_ERR(inode)) {
ret = btrfs_orphan_add(trans, inode);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_add_delayed_iput(inode);
+ goto out;
+ }
clear_nlink(inode);
/* One for the block groups ref */
spin_lock(&block_group->lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a55fbe6252d..c9018a05036 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -19,6 +19,7 @@
#include "btrfs_inode.h"
#include "volumes.h"
#include "check-integrity.h"
+#include "locking.h"
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
@@ -53,6 +54,13 @@ struct extent_page_data {
unsigned int sync_io:1;
};
+static noinline void flush_write_bio(void *data);
+static inline struct btrfs_fs_info *
+tree_fs_info(struct extent_io_tree *tree)
+{
+ return btrfs_sb(tree->mapping->host->i_sb);
+}
+
int __init extent_io_init(void)
{
extent_state_cache = kmem_cache_create("extent_state",
@@ -136,6 +144,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
#endif
atomic_set(&state->refs, 1);
init_waitqueue_head(&state->wq);
+ trace_alloc_extent_state(state, mask, _RET_IP_);
return state;
}
@@ -153,6 +162,7 @@ void free_extent_state(struct extent_state *state)
list_del(&state->leak_list);
spin_unlock_irqrestore(&leak_lock, flags);
#endif
+ trace_free_extent_state(state, _RET_IP_);
kmem_cache_free(extent_state_cache, state);
}
}
@@ -392,20 +402,28 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
return 0;
}
+static struct extent_state *next_state(struct extent_state *state)
+{
+ struct rb_node *next = rb_next(&state->rb_node);
+ if (next)
+ return rb_entry(next, struct extent_state, rb_node);
+ else
+ return NULL;
+}
+
/*
* utility function to clear some bits in an extent state struct.
- * it will optionally wake up any one waiting on this state (wake == 1), or
- * forcibly remove the state from the tree (delete == 1).
+ * it will optionally wake up any one waiting on this state (wake == 1)
*
* If no bits are set on the state struct after clearing things, the
* struct is freed and removed from the tree
*/
-static int clear_state_bit(struct extent_io_tree *tree,
- struct extent_state *state,
- int *bits, int wake)
+static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
+ struct extent_state *state,
+ int *bits, int wake)
{
+ struct extent_state *next;
int bits_to_clear = *bits & ~EXTENT_CTLBITS;
- int ret = state->state & bits_to_clear;
if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
@@ -417,6 +435,7 @@ static int clear_state_bit(struct extent_io_tree *tree,
if (wake)
wake_up(&state->wq);
if (state->state == 0) {
+ next = next_state(state);
if (state->tree) {
rb_erase(&state->rb_node, &tree->state);
state->tree = NULL;
@@ -426,8 +445,9 @@ static int clear_state_bit(struct extent_io_tree *tree,
}
} else {
merge_state(tree, state);
+ next = next_state(state);
}
- return ret;
+ return next;
}
static struct extent_state *
@@ -439,6 +459,13 @@ alloc_extent_state_atomic(struct extent_state *prealloc)
return prealloc;
}
+void extent_io_tree_panic(struct extent_io_tree *tree, int err)
+{
+ btrfs_panic(tree_fs_info(tree), err, "Locking error: "
+ "Extent tree was modified by another "
+ "thread while locked.");
+}
+
/*
* clear some bits on a range in the tree. This may require splitting
* or inserting elements in the tree, so the gfp mask is used to
@@ -449,8 +476,7 @@ alloc_extent_state_atomic(struct extent_state *prealloc)
*
* the range [start, end] is inclusive.
*
- * This takes the tree lock, and returns < 0 on error, > 0 if any of the
- * bits were already set, or zero if none of the bits were already set.
+ * This takes the tree lock, and returns 0 on success and < 0 on error.
*/
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bits, int wake, int delete,
@@ -460,11 +486,9 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state *state;
struct extent_state *cached;
struct extent_state *prealloc = NULL;
- struct rb_node *next_node;
struct rb_node *node;
u64 last_end;
int err;
- int set = 0;
int clear = 0;
if (delete)
@@ -513,14 +537,11 @@ hit_next:
WARN_ON(state->end < start);
last_end = state->end;
- if (state->end < end && !need_resched())
- next_node = rb_next(&state->rb_node);
- else
- next_node = NULL;
-
/* the state doesn't have the wanted bits, go ahead */
- if (!(state->state & bits))
+ if (!(state->state & bits)) {
+ state = next_state(state);
goto next;
+ }
/*
* | ---- desired range ---- |
@@ -542,12 +563,14 @@ hit_next:
prealloc = alloc_extent_state_atomic(prealloc);
BUG_ON(!prealloc);
err = split_state(tree, state, prealloc, start);
- BUG_ON(err == -EEXIST);
+ if (err)
+ extent_io_tree_panic(tree, err);
+
prealloc = NULL;
if (err)
goto out;
if (state->end <= end) {
- set |= clear_state_bit(tree, state, &bits, wake);
+ clear_state_bit(tree, state, &bits, wake);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
@@ -564,26 +587,25 @@ hit_next:
prealloc = alloc_extent_state_atomic(prealloc);
BUG_ON(!prealloc);
err = split_state(tree, state, prealloc, end + 1);
- BUG_ON(err == -EEXIST);
+ if (err)
+ extent_io_tree_panic(tree, err);
+
if (wake)
wake_up(&state->wq);
- set |= clear_state_bit(tree, prealloc, &bits, wake);
+ clear_state_bit(tree, prealloc, &bits, wake);
prealloc = NULL;
goto out;
}
- set |= clear_state_bit(tree, state, &bits, wake);
+ state = clear_state_bit(tree, state, &bits, wake);
next:
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
- if (start <= end && next_node) {
- state = rb_entry(next_node, struct extent_state,
- rb_node);
+ if (start <= end && state && !need_resched())
goto hit_next;
- }
goto search_again;
out:
@@ -591,7 +613,7 @@ out:
if (prealloc)
free_extent_state(prealloc);
- return set;
+ return 0;
search_again:
if (start > end)
@@ -602,8 +624,8 @@ search_again:
goto again;
}
-static int wait_on_state(struct extent_io_tree *tree,
- struct extent_state *state)
+static void wait_on_state(struct extent_io_tree *tree,
+ struct extent_state *state)
__releases(tree->lock)
__acquires(tree->lock)
{
@@ -613,7 +635,6 @@ static int wait_on_state(struct extent_io_tree *tree,
schedule();
spin_lock(&tree->lock);
finish_wait(&state->wq, &wait);
- return 0;
}
/*
@@ -621,7 +642,7 @@ static int wait_on_state(struct extent_io_tree *tree,
* The range [start, end] is inclusive.
* The tree lock is taken by this function
*/
-int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
+void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
{
struct extent_state *state;
struct rb_node *node;
@@ -658,7 +679,6 @@ again:
}
out:
spin_unlock(&tree->lock);
- return 0;
}
static void set_state_bits(struct extent_io_tree *tree,
@@ -706,9 +726,10 @@ static void uncache_state(struct extent_state **cached_ptr)
* [start, end] is inclusive This takes the tree lock.
*/
-int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int exclusive_bits, u64 *failed_start,
- struct extent_state **cached_state, gfp_t mask)
+static int __must_check
+__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, int exclusive_bits, u64 *failed_start,
+ struct extent_state **cached_state, gfp_t mask)
{
struct extent_state *state;
struct extent_state *prealloc = NULL;
@@ -742,8 +763,10 @@ again:
prealloc = alloc_extent_state_atomic(prealloc);
BUG_ON(!prealloc);
err = insert_state(tree, prealloc, start, end, &bits);
+ if (err)
+ extent_io_tree_panic(tree, err);
+
prealloc = NULL;
- BUG_ON(err == -EEXIST);
goto out;
}
state = rb_entry(node, struct extent_state, rb_node);
@@ -809,7 +832,9 @@ hit_next:
prealloc = alloc_extent_state_atomic(prealloc);
BUG_ON(!prealloc);
err = split_state(tree, state, prealloc, start);
- BUG_ON(err == -EEXIST);
+ if (err)
+ extent_io_tree_panic(tree, err);
+
prealloc = NULL;
if (err)
goto out;
@@ -846,12 +871,9 @@ hit_next:
*/
err = insert_state(tree, prealloc, start, this_end,
&bits);
- BUG_ON(err == -EEXIST);
- if (err) {
- free_extent_state(prealloc);
- prealloc = NULL;
- goto out;
- }
+ if (err)
+ extent_io_tree_panic(tree, err);
+
cache_state(prealloc, cached_state);
prealloc = NULL;
start = this_end + 1;
@@ -873,7 +895,8 @@ hit_next:
prealloc = alloc_extent_state_atomic(prealloc);
BUG_ON(!prealloc);
err = split_state(tree, state, prealloc, end + 1);
- BUG_ON(err == -EEXIST);
+ if (err)
+ extent_io_tree_panic(tree, err);
set_state_bits(tree, prealloc, &bits);
cache_state(prealloc, cached_state);
@@ -900,6 +923,15 @@ search_again:
goto again;
}
+int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
+ u64 *failed_start, struct extent_state **cached_state,
+ gfp_t mask)
+{
+ return __set_extent_bit(tree, start, end, bits, 0, failed_start,
+ cached_state, mask);
+}
+
+
/**
* convert_extent - convert all bits in a given range from one bit to another
* @tree: the io tree to search
@@ -946,7 +978,8 @@ again:
}
err = insert_state(tree, prealloc, start, end, &bits);
prealloc = NULL;
- BUG_ON(err == -EEXIST);
+ if (err)
+ extent_io_tree_panic(tree, err);
goto out;
}
state = rb_entry(node, struct extent_state, rb_node);
@@ -1002,7 +1035,8 @@ hit_next:
goto out;
}
err = split_state(tree, state, prealloc, start);
- BUG_ON(err == -EEXIST);
+ if (err)
+ extent_io_tree_panic(tree, err);
prealloc = NULL;
if (err)
goto out;
@@ -1041,12 +1075,8 @@ hit_next:
*/
err = insert_state(tree, prealloc, start, this_end,
&bits);
- BUG_ON(err == -EEXIST);
- if (err) {
- free_extent_state(prealloc);
- prealloc = NULL;
- goto out;
- }
+ if (err)
+ extent_io_tree_panic(tree, err);
prealloc = NULL;
start = this_end + 1;
goto search_again;
@@ -1065,7 +1095,8 @@ hit_next:
}
err = split_state(tree, state, prealloc, end + 1);
- BUG_ON(err == -EEXIST);
+ if (err)
+ extent_io_tree_panic(tree, err);
set_state_bits(tree, prealloc, &bits);
clear_state_bit(tree, prealloc, &clear_bits, 0);
@@ -1095,14 +1126,14 @@ search_again:
int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
- return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
+ return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
NULL, mask);
}
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask)
{
- return set_extent_bit(tree, start, end, bits, 0, NULL,
+ return set_extent_bit(tree, start, end, bits, NULL,
NULL, mask);
}
@@ -1117,7 +1148,7 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
{
return set_extent_bit(tree, start, end,
EXTENT_DELALLOC | EXTENT_UPTODATE,
- 0, NULL, cached_state, mask);
+ NULL, cached_state, mask);
}
int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
@@ -1131,7 +1162,7 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
- return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
+ return set_extent_bit(tree, start, end, EXTENT_NEW, NULL,
NULL, mask);
}
@@ -1139,7 +1170,7 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
- NULL, cached_state, mask);
+ cached_state, mask);
}
static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
@@ -1155,42 +1186,40 @@ static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
* us if waiting is desired.
*/
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, struct extent_state **cached_state, gfp_t mask)
+ int bits, struct extent_state **cached_state)
{
int err;
u64 failed_start;
while (1) {
- err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
- EXTENT_LOCKED, &failed_start,
- cached_state, mask);
- if (err == -EEXIST && (mask & __GFP_WAIT)) {
+ err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
+ EXTENT_LOCKED, &failed_start,
+ cached_state, GFP_NOFS);
+ if (err == -EEXIST) {
wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
start = failed_start;
- } else {
+ } else
break;
- }
WARN_ON(start > end);
}
return err;
}
-int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
+int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
{
- return lock_extent_bits(tree, start, end, 0, NULL, mask);
+ return lock_extent_bits(tree, start, end, 0, NULL);
}
-int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
+int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
{
int err;
u64 failed_start;
- err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
- &failed_start, NULL, mask);
+ err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
+ &failed_start, NULL, GFP_NOFS);
if (err == -EEXIST) {
if (failed_start > start)
clear_extent_bit(tree, start, failed_start - 1,
- EXTENT_LOCKED, 1, 0, NULL, mask);
+ EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
return 0;
}
return 1;
@@ -1203,10 +1232,10 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
mask);
}
-int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
+int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
{
return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
- mask);
+ GFP_NOFS);
}
/*
@@ -1220,7 +1249,7 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
while (index <= end_index) {
page = find_get_page(tree->mapping, index);
- BUG_ON(!page);
+ BUG_ON(!page); /* Pages should be in the extent_io_tree */
set_page_writeback(page);
page_cache_release(page);
index++;
@@ -1343,9 +1372,9 @@ out:
return found;
}
-static noinline int __unlock_for_delalloc(struct inode *inode,
- struct page *locked_page,
- u64 start, u64 end)
+static noinline void __unlock_for_delalloc(struct inode *inode,
+ struct page *locked_page,
+ u64 start, u64 end)
{
int ret;
struct page *pages[16];
@@ -1355,7 +1384,7 @@ static noinline int __unlock_for_delalloc(struct inode *inode,
int i;
if (index == locked_page->index && end_index == index)
- return 0;
+ return;
while (nr_pages > 0) {
ret = find_get_pages_contig(inode->i_mapping, index,
@@ -1370,7 +1399,6 @@ static noinline int __unlock_for_delalloc(struct inode *inode,
index += ret;
cond_resched();
}
- return 0;
}
static noinline int lock_delalloc_pages(struct inode *inode,
@@ -1500,11 +1528,10 @@ again:
goto out_failed;
}
}
- BUG_ON(ret);
+ BUG_ON(ret); /* Only valid values are 0 and -EAGAIN */
/* step three, lock the state bits for the whole range */
- lock_extent_bits(tree, delalloc_start, delalloc_end,
- 0, &cached_state, GFP_NOFS);
+ lock_extent_bits(tree, delalloc_start, delalloc_end, 0, &cached_state);
/* then test to make sure it is all still delalloc */
ret = test_range_bit(tree, delalloc_start, delalloc_end,
@@ -1761,39 +1788,34 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
* helper function to set a given page up to date if all the
* extents in the tree for that page are up to date
*/
-static int check_page_uptodate(struct extent_io_tree *tree,
- struct page *page)
+static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
{
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 end = start + PAGE_CACHE_SIZE - 1;
if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
SetPageUptodate(page);
- return 0;
}
/*
* helper function to unlock a page if all the extents in the tree
* for that page are unlocked
*/
-static int check_page_locked(struct extent_io_tree *tree,
- struct page *page)
+static void check_page_locked(struct extent_io_tree *tree, struct page *page)
{
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 end = start + PAGE_CACHE_SIZE - 1;
if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
unlock_page(page);
- return 0;
}
/*
* helper function to end page writeback if all the extents
* in the tree for that page are done with writeback
*/
-static int check_page_writeback(struct extent_io_tree *tree,
- struct page *page)
+static void check_page_writeback(struct extent_io_tree *tree,
+ struct page *page)
{
end_page_writeback(page);
- return 0;
}
/*
@@ -1912,6 +1934,26 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
return 0;
}
+int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
+ int mirror_num)
+{
+ struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+ u64 start = eb->start;
+ unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
+ int ret = 0;
+
+ for (i = 0; i < num_pages; i++) {
+ struct page *p = extent_buffer_page(eb, i);
+ ret = repair_io_failure(map_tree, start, PAGE_CACHE_SIZE,
+ start, p, mirror_num);
+ if (ret)
+ break;
+ start += PAGE_CACHE_SIZE;
+ }
+
+ return ret;
+}
+
/*
* each time an IO finishes, we do a fast check in the IO failure tree
* to see if we need to process or clean up an io_failure_record
@@ -2141,6 +2183,10 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
}
bio = bio_alloc(GFP_NOFS, 1);
+ if (!bio) {
+ free_io_failure(inode, failrec, 0);
+ return -EIO;
+ }
bio->bi_private = state;
bio->bi_end_io = failed_bio->bi_end_io;
bio->bi_sector = failrec->logical >> 9;
@@ -2258,6 +2304,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
u64 start;
u64 end;
int whole_page;
+ int mirror;
int ret;
if (err)
@@ -2296,17 +2343,22 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
}
spin_unlock(&tree->lock);
+ mirror = (int)(unsigned long)bio->bi_bdev;
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end,
- state);
+ state, mirror);
if (ret)
uptodate = 0;
else
clean_io_failure(start, page);
}
- if (!uptodate) {
- int failed_mirror;
- failed_mirror = (int)(unsigned long)bio->bi_bdev;
+
+ if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
+ ret = tree->ops->readpage_io_failed_hook(page, mirror);
+ if (!ret && !err &&
+ test_bit(BIO_UPTODATE, &bio->bi_flags))
+ uptodate = 1;
+ } else if (!uptodate) {
/*
* The generic bio_readpage_error handles errors the
* following way: If possible, new read requests are
@@ -2317,10 +2369,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
* can't handle the error it will return -EIO and we
* remain responsible for that page.
*/
- ret = bio_readpage_error(bio, page, start, end,
- failed_mirror, NULL);
+ ret = bio_readpage_error(bio, page, start, end, mirror, NULL);
if (ret == 0) {
-error_handled:
uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags);
if (err)
@@ -2328,16 +2378,9 @@ error_handled:
uncache_state(&cached);
continue;
}
- if (tree->ops && tree->ops->readpage_io_failed_hook) {
- ret = tree->ops->readpage_io_failed_hook(
- bio, page, start, end,
- failed_mirror, state);
- if (ret == 0)
- goto error_handled;
- }
}
- if (uptodate) {
+ if (uptodate && tree->track_uptodate) {
set_extent_uptodate(tree, start, end, &cached,
GFP_ATOMIC);
}
@@ -2386,8 +2429,12 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
return bio;
}
-static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
- unsigned long bio_flags)
+/*
+ * Since writes are async, they will only return -ENOMEM.
+ * Reads can return the full range of I/O error conditions.
+ */
+static int __must_check submit_one_bio(int rw, struct bio *bio,
+ int mirror_num, unsigned long bio_flags)
{
int ret = 0;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
@@ -2413,6 +2460,19 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
return ret;
}
+static int merge_bio(struct extent_io_tree *tree, struct page *page,
+ unsigned long offset, size_t size, struct bio *bio,
+ unsigned long bio_flags)
+{
+ int ret = 0;
+ if (tree->ops && tree->ops->merge_bio_hook)
+ ret = tree->ops->merge_bio_hook(page, offset, size, bio,
+ bio_flags);
+ BUG_ON(ret < 0);
+ return ret;
+
+}
+
static int submit_extent_page(int rw, struct extent_io_tree *tree,
struct page *page, sector_t sector,
size_t size, unsigned long offset,
@@ -2441,12 +2501,12 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
sector;
if (prev_bio_flags != bio_flags || !contig ||
- (tree->ops && tree->ops->merge_bio_hook &&
- tree->ops->merge_bio_hook(page, offset, page_size, bio,
- bio_flags)) ||
+ merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
bio_add_page(bio, page, page_size, offset) < page_size) {
ret = submit_one_bio(rw, bio, mirror_num,
prev_bio_flags);
+ if (ret < 0)
+ return ret;
bio = NULL;
} else {
return 0;
@@ -2473,25 +2533,31 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
return ret;
}
-void set_page_extent_mapped(struct page *page)
+void attach_extent_buffer_page(struct extent_buffer *eb, struct page *page)
{
if (!PagePrivate(page)) {
SetPagePrivate(page);
page_cache_get(page);
- set_page_private(page, EXTENT_PAGE_PRIVATE);
+ set_page_private(page, (unsigned long)eb);
+ } else {
+ WARN_ON(page->private != (unsigned long)eb);
}
}
-static void set_page_extent_head(struct page *page, unsigned long len)
+void set_page_extent_mapped(struct page *page)
{
- WARN_ON(!PagePrivate(page));
- set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
+ if (!PagePrivate(page)) {
+ SetPagePrivate(page);
+ page_cache_get(page);
+ set_page_private(page, EXTENT_PAGE_PRIVATE);
+ }
}
/*
* basic readpage implementation. Locked extent state structs are inserted
* into the tree that are removed when the IO is done (by the end_io
* handlers)
+ * XXX JDM: This needs looking at to ensure proper page locking
*/
static int __extent_read_full_page(struct extent_io_tree *tree,
struct page *page,
@@ -2531,11 +2597,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
end = page_end;
while (1) {
- lock_extent(tree, start, end, GFP_NOFS);
+ lock_extent(tree, start, end);
ordered = btrfs_lookup_ordered_extent(inode, start);
if (!ordered)
break;
- unlock_extent(tree, start, end, GFP_NOFS);
+ unlock_extent(tree, start, end);
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
}
@@ -2546,10 +2612,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
if (zero_offset) {
iosize = PAGE_CACHE_SIZE - zero_offset;
- userpage = kmap_atomic(page, KM_USER0);
+ userpage = kmap_atomic(page);
memset(userpage + zero_offset, 0, iosize);
flush_dcache_page(page);
- kunmap_atomic(userpage, KM_USER0);
+ kunmap_atomic(userpage);
}
}
while (cur <= end) {
@@ -2558,10 +2624,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
struct extent_state *cached = NULL;
iosize = PAGE_CACHE_SIZE - pg_offset;
- userpage = kmap_atomic(page, KM_USER0);
+ userpage = kmap_atomic(page);
memset(userpage + pg_offset, 0, iosize);
flush_dcache_page(page);
- kunmap_atomic(userpage, KM_USER0);
+ kunmap_atomic(userpage);
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
unlock_extent_cached(tree, cur, cur + iosize - 1,
@@ -2572,7 +2638,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
end - cur + 1, 0);
if (IS_ERR_OR_NULL(em)) {
SetPageError(page);
- unlock_extent(tree, cur, end, GFP_NOFS);
+ unlock_extent(tree, cur, end);
break;
}
extent_offset = cur - em->start;
@@ -2607,10 +2673,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
char *userpage;
struct extent_state *cached = NULL;
- userpage = kmap_atomic(page, KM_USER0);
+ userpage = kmap_atomic(page);
memset(userpage + pg_offset, 0, iosize);
flush_dcache_page(page);
- kunmap_atomic(userpage, KM_USER0);
+ kunmap_atomic(userpage);
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
@@ -2624,7 +2690,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
if (test_range_bit(tree, cur, cur_end,
EXTENT_UPTODATE, 1, NULL)) {
check_page_uptodate(tree, page);
- unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
+ unlock_extent(tree, cur, cur + iosize - 1);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -2634,7 +2700,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
*/
if (block_start == EXTENT_MAP_INLINE) {
SetPageError(page);
- unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
+ unlock_extent(tree, cur, cur + iosize - 1);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -2654,6 +2720,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
end_bio_extent_readpage, mirror_num,
*bio_flags,
this_bio_flag);
+ BUG_ON(ret == -ENOMEM);
nr++;
*bio_flags = this_bio_flag;
}
@@ -2756,10 +2823,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (page->index == end_index) {
char *userpage;
- userpage = kmap_atomic(page, KM_USER0);
+ userpage = kmap_atomic(page);
memset(userpage + pg_offset, 0,
PAGE_CACHE_SIZE - pg_offset);
- kunmap_atomic(userpage, KM_USER0);
+ kunmap_atomic(userpage);
flush_dcache_page(page);
}
pg_offset = 0;
@@ -2795,7 +2862,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
delalloc_end,
&page_started,
&nr_written);
- BUG_ON(ret);
+ /* File system has been set read-only */
+ if (ret) {
+ SetPageError(page);
+ goto done;
+ }
/*
* delalloc_end is already one less than the total
* length, so we don't subtract one from
@@ -2968,6 +3039,275 @@ done_unlocked:
return 0;
}
+static int eb_wait(void *word)
+{
+ io_schedule();
+ return 0;
+}
+
+static void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
+{
+ wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait,
+ TASK_UNINTERRUPTIBLE);
+}
+
+static int lock_extent_buffer_for_io(struct extent_buffer *eb,
+ struct btrfs_fs_info *fs_info,
+ struct extent_page_data *epd)
+{
+ unsigned long i, num_pages;
+ int flush = 0;
+ int ret = 0;
+
+ if (!btrfs_try_tree_write_lock(eb)) {
+ flush = 1;
+ flush_write_bio(epd);
+ btrfs_tree_lock(eb);
+ }
+
+ if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
+ btrfs_tree_unlock(eb);
+ if (!epd->sync_io)
+ return 0;
+ if (!flush) {
+ flush_write_bio(epd);
+ flush = 1;
+ }
+ while (1) {
+ wait_on_extent_buffer_writeback(eb);
+ btrfs_tree_lock(eb);
+ if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
+ break;
+ btrfs_tree_unlock(eb);
+ }
+ }
+
+ if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
+ set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
+ btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
+ spin_lock(&fs_info->delalloc_lock);
+ if (fs_info->dirty_metadata_bytes >= eb->len)
+ fs_info->dirty_metadata_bytes -= eb->len;
+ else
+ WARN_ON(1);
+ spin_unlock(&fs_info->delalloc_lock);
+ ret = 1;
+ }
+
+ btrfs_tree_unlock(eb);
+
+ if (!ret)
+ return ret;
+
+ num_pages = num_extent_pages(eb->start, eb->len);
+ for (i = 0; i < num_pages; i++) {
+ struct page *p = extent_buffer_page(eb, i);
+
+ if (!trylock_page(p)) {
+ if (!flush) {
+ flush_write_bio(epd);
+ flush = 1;
+ }
+ lock_page(p);
+ }
+ }
+
+ return ret;
+}
+
+static void end_extent_buffer_writeback(struct extent_buffer *eb)
+{
+ clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
+}
+
+static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
+{
+ int uptodate = err == 0;
+ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct extent_buffer *eb;
+ int done;
+
+ do {
+ struct page *page = bvec->bv_page;
+
+ bvec--;
+ eb = (struct extent_buffer *)page->private;
+ BUG_ON(!eb);
+ done = atomic_dec_and_test(&eb->io_pages);
+
+ if (!uptodate || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
+ set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
+ ClearPageUptodate(page);
+ SetPageError(page);
+ }
+
+ end_page_writeback(page);
+
+ if (!done)
+ continue;
+
+ end_extent_buffer_writeback(eb);
+ } while (bvec >= bio->bi_io_vec);
+
+ bio_put(bio);
+
+}
+
+static int write_one_eb(struct extent_buffer *eb,
+ struct btrfs_fs_info *fs_info,
+ struct writeback_control *wbc,
+ struct extent_page_data *epd)
+{
+ struct block_device *bdev = fs_info->fs_devices->latest_bdev;
+ u64 offset = eb->start;
+ unsigned long i, num_pages;
+ int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
+ int ret;
+
+ clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
+ num_pages = num_extent_pages(eb->start, eb->len);
+ atomic_set(&eb->io_pages, num_pages);
+ for (i = 0; i < num_pages; i++) {
+ struct page *p = extent_buffer_page(eb, i);
+
+ clear_page_dirty_for_io(p);
+ set_page_writeback(p);
+ ret = submit_extent_page(rw, eb->tree, p, offset >> 9,
+ PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
+ -1, end_bio_extent_buffer_writepage,
+ 0, 0, 0);
+ if (ret) {
+ set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
+ SetPageError(p);
+ if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
+ end_extent_buffer_writeback(eb);
+ ret = -EIO;
+ break;
+ }
+ offset += PAGE_CACHE_SIZE;
+ update_nr_written(p, wbc, 1);
+ unlock_page(p);
+ }
+
+ if (unlikely(ret)) {
+ for (; i < num_pages; i++) {
+ struct page *p = extent_buffer_page(eb, i);
+ unlock_page(p);
+ }
+ }
+
+ return ret;
+}
+
+int btree_write_cache_pages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
+ struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
+ struct extent_buffer *eb, *prev_eb = NULL;
+ struct extent_page_data epd = {
+ .bio = NULL,
+ .tree = tree,
+ .extent_locked = 0,
+ .sync_io = wbc->sync_mode == WB_SYNC_ALL,
+ };
+ int ret = 0;
+ int done = 0;
+ int nr_to_write_done = 0;
+ struct pagevec pvec;
+ int nr_pages;
+ pgoff_t index;
+ pgoff_t end; /* Inclusive */
+ int scanned = 0;
+ int tag;
+
+ pagevec_init(&pvec, 0);
+ if (wbc->range_cyclic) {
+ index = mapping->writeback_index; /* Start from prev offset */
+ end = -1;
+ } else {
+ index = wbc->range_start >> PAGE_CACHE_SHIFT;
+ end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ scanned = 1;
+ }
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ tag = PAGECACHE_TAG_TOWRITE;
+ else
+ tag = PAGECACHE_TAG_DIRTY;
+retry:
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ tag_pages_for_writeback(mapping, index, end);
+ while (!done && !nr_to_write_done && (index <= end) &&
+ (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+ unsigned i;
+
+ scanned = 1;
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pvec.pages[i];
+
+ if (!PagePrivate(page))
+ continue;
+
+ if (!wbc->range_cyclic && page->index > end) {
+ done = 1;
+ break;
+ }
+
+ eb = (struct extent_buffer *)page->private;
+ if (!eb) {
+ WARN_ON(1);
+ continue;
+ }
+
+ if (eb == prev_eb)
+ continue;
+
+ if (!atomic_inc_not_zero(&eb->refs)) {
+ WARN_ON(1);
+ continue;
+ }
+
+ prev_eb = eb;
+ ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
+ if (!ret) {
+ free_extent_buffer(eb);
+ continue;
+ }
+
+ ret = write_one_eb(eb, fs_info, wbc, &epd);
+ if (ret) {
+ done = 1;
+ free_extent_buffer(eb);
+ break;
+ }
+ free_extent_buffer(eb);
+
+ /*
+ * the filesystem may choose to bump up nr_to_write.
+ * We have to make sure to honor the new nr_to_write
+ * at any time
+ */
+ nr_to_write_done = wbc->nr_to_write <= 0;
+ }
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+ if (!scanned && !done) {
+ /*
+ * We hit the last page and there is more work to be done: wrap
+ * back to the start of the file
+ */
+ scanned = 1;
+ index = 0;
+ goto retry;
+ }
+ flush_write_bio(&epd);
+ return ret;
+}
+
/**
* write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
* @mapping: address space structure to write
@@ -3099,10 +3439,14 @@ retry:
static void flush_epd_write_bio(struct extent_page_data *epd)
{
if (epd->bio) {
+ int rw = WRITE;
+ int ret;
+
if (epd->sync_io)
- submit_one_bio(WRITE_SYNC, epd->bio, 0, 0);
- else
- submit_one_bio(WRITE, epd->bio, 0, 0);
+ rw = WRITE_SYNC;
+
+ ret = submit_one_bio(rw, epd->bio, 0, 0);
+ BUG_ON(ret < 0); /* -ENOMEM */
epd->bio = NULL;
}
}
@@ -3219,7 +3563,7 @@ int extent_readpages(struct extent_io_tree *tree,
}
BUG_ON(!list_empty(pages));
if (bio)
- submit_one_bio(READ, bio, 0, bio_flags);
+ return submit_one_bio(READ, bio, 0, bio_flags);
return 0;
}
@@ -3240,7 +3584,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
if (start > end)
return 0;
- lock_extent_bits(tree, start, end, 0, &cached_state, GFP_NOFS);
+ lock_extent_bits(tree, start, end, 0, &cached_state);
wait_on_page_writeback(page);
clear_extent_bit(tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
@@ -3454,7 +3798,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
- &cached_state, GFP_NOFS);
+ &cached_state);
em = get_extent_skip_holes(inode, start, last_for_get_extent,
get_extent);
@@ -3548,26 +3892,7 @@ out:
inline struct page *extent_buffer_page(struct extent_buffer *eb,
unsigned long i)
{
- struct page *p;
- struct address_space *mapping;
-
- if (i == 0)
- return eb->first_page;
- i += eb->start >> PAGE_CACHE_SHIFT;
- mapping = eb->first_page->mapping;
- if (!mapping)
- return NULL;
-
- /*
- * extent_buffer_page is only called after pinning the page
- * by increasing the reference count. So we know the page must
- * be in the radix tree.
- */
- rcu_read_lock();
- p = radix_tree_lookup(&mapping->page_tree, i);
- rcu_read_unlock();
-
- return p;
+ return eb->pages[i];
}
inline unsigned long num_extent_pages(u64 start, u64 len)
@@ -3576,6 +3901,19 @@ inline unsigned long num_extent_pages(u64 start, u64 len)
(start >> PAGE_CACHE_SHIFT);
}
+static void __free_extent_buffer(struct extent_buffer *eb)
+{
+#if LEAK_DEBUG
+ unsigned long flags;
+ spin_lock_irqsave(&leak_lock, flags);
+ list_del(&eb->leak_list);
+ spin_unlock_irqrestore(&leak_lock, flags);
+#endif
+ if (eb->pages && eb->pages != eb->inline_pages)
+ kfree(eb->pages);
+ kmem_cache_free(extent_buffer_cache, eb);
+}
+
static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
u64 start,
unsigned long len,
@@ -3591,6 +3929,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
return NULL;
eb->start = start;
eb->len = len;
+ eb->tree = tree;
rwlock_init(&eb->lock);
atomic_set(&eb->write_locks, 0);
atomic_set(&eb->read_locks, 0);
@@ -3607,20 +3946,32 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
list_add(&eb->leak_list, &buffers);
spin_unlock_irqrestore(&leak_lock, flags);
#endif
+ spin_lock_init(&eb->refs_lock);
atomic_set(&eb->refs, 1);
+ atomic_set(&eb->io_pages, 0);
+
+ if (len > MAX_INLINE_EXTENT_BUFFER_SIZE) {
+ struct page **pages;
+ int num_pages = (len + PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT;
+ pages = kzalloc(num_pages, mask);
+ if (!pages) {
+ __free_extent_buffer(eb);
+ return NULL;
+ }
+ eb->pages = pages;
+ } else {
+ eb->pages = eb->inline_pages;
+ }
return eb;
}
-static void __free_extent_buffer(struct extent_buffer *eb)
+static int extent_buffer_under_io(struct extent_buffer *eb)
{
-#if LEAK_DEBUG
- unsigned long flags;
- spin_lock_irqsave(&leak_lock, flags);
- list_del(&eb->leak_list);
- spin_unlock_irqrestore(&leak_lock, flags);
-#endif
- kmem_cache_free(extent_buffer_cache, eb);
+ return (atomic_read(&eb->io_pages) ||
+ test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
+ test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
}
/*
@@ -3632,8 +3983,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
unsigned long index;
struct page *page;
- if (!eb->first_page)
- return;
+ BUG_ON(extent_buffer_under_io(eb));
index = num_extent_pages(eb->start, eb->len);
if (start_idx >= index)
@@ -3642,8 +3992,34 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
do {
index--;
page = extent_buffer_page(eb, index);
- if (page)
+ if (page) {
+ spin_lock(&page->mapping->private_lock);
+ /*
+ * We do this since we'll remove the pages after we've
+ * removed the eb from the radix tree, so we could race
+ * and have this page now attached to the new eb. So
+ * only clear page_private if it's still connected to
+ * this eb.
+ */
+ if (PagePrivate(page) &&
+ page->private == (unsigned long)eb) {
+ BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+ BUG_ON(PageDirty(page));
+ BUG_ON(PageWriteback(page));
+ /*
+ * We need to make sure we haven't be attached
+ * to a new eb.
+ */
+ ClearPagePrivate(page);
+ set_page_private(page, 0);
+ /* One for the page private */
+ page_cache_release(page);
+ }
+ spin_unlock(&page->mapping->private_lock);
+
+ /* One for when we alloced the page */
page_cache_release(page);
+ }
} while (index != start_idx);
}
@@ -3656,9 +4032,50 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
__free_extent_buffer(eb);
}
+static void check_buffer_tree_ref(struct extent_buffer *eb)
+{
+ /* the ref bit is tricky. We have to make sure it is set
+ * if we have the buffer dirty. Otherwise the
+ * code to free a buffer can end up dropping a dirty
+ * page
+ *
+ * Once the ref bit is set, it won't go away while the
+ * buffer is dirty or in writeback, and it also won't
+ * go away while we have the reference count on the
+ * eb bumped.
+ *
+ * We can't just set the ref bit without bumping the
+ * ref on the eb because free_extent_buffer might
+ * see the ref bit and try to clear it. If this happens
+ * free_extent_buffer might end up dropping our original
+ * ref by mistake and freeing the page before we are able
+ * to add one more ref.
+ *
+ * So bump the ref count first, then set the bit. If someone
+ * beat us to it, drop the ref we added.
+ */
+ if (!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
+ atomic_inc(&eb->refs);
+ if (test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
+ atomic_dec(&eb->refs);
+ }
+}
+
+static void mark_extent_buffer_accessed(struct extent_buffer *eb)
+{
+ unsigned long num_pages, i;
+
+ check_buffer_tree_ref(eb);
+
+ num_pages = num_extent_pages(eb->start, eb->len);
+ for (i = 0; i < num_pages; i++) {
+ struct page *p = extent_buffer_page(eb, i);
+ mark_page_accessed(p);
+ }
+}
+
struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
- u64 start, unsigned long len,
- struct page *page0)
+ u64 start, unsigned long len)
{
unsigned long num_pages = num_extent_pages(start, len);
unsigned long i;
@@ -3674,7 +4091,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
if (eb && atomic_inc_not_zero(&eb->refs)) {
rcu_read_unlock();
- mark_page_accessed(eb->first_page);
+ mark_extent_buffer_accessed(eb);
return eb;
}
rcu_read_unlock();
@@ -3683,32 +4100,44 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
if (!eb)
return NULL;
- if (page0) {
- eb->first_page = page0;
- i = 1;
- index++;
- page_cache_get(page0);
- mark_page_accessed(page0);
- set_page_extent_mapped(page0);
- set_page_extent_head(page0, len);
- uptodate = PageUptodate(page0);
- } else {
- i = 0;
- }
- for (; i < num_pages; i++, index++) {
+ for (i = 0; i < num_pages; i++, index++) {
p = find_or_create_page(mapping, index, GFP_NOFS);
if (!p) {
WARN_ON(1);
goto free_eb;
}
- set_page_extent_mapped(p);
- mark_page_accessed(p);
- if (i == 0) {
- eb->first_page = p;
- set_page_extent_head(p, len);
- } else {
- set_page_private(p, EXTENT_PAGE_PRIVATE);
+
+ spin_lock(&mapping->private_lock);
+ if (PagePrivate(p)) {
+ /*
+ * We could have already allocated an eb for this page
+ * and attached one so lets see if we can get a ref on
+ * the existing eb, and if we can we know it's good and
+ * we can just return that one, else we know we can just
+ * overwrite page->private.
+ */
+ exists = (struct extent_buffer *)p->private;
+ if (atomic_inc_not_zero(&exists->refs)) {
+ spin_unlock(&mapping->private_lock);
+ unlock_page(p);
+ page_cache_release(p);
+ mark_extent_buffer_accessed(exists);
+ goto free_eb;
+ }
+
+ /*
+ * Do this so attach doesn't complain and we need to
+ * drop the ref the old guy had.
+ */
+ ClearPagePrivate(p);
+ WARN_ON(PageDirty(p));
+ page_cache_release(p);
}
+ attach_extent_buffer_page(eb, p);
+ spin_unlock(&mapping->private_lock);
+ WARN_ON(PageDirty(p));
+ mark_page_accessed(p);
+ eb->pages[i] = p;
if (!PageUptodate(p))
uptodate = 0;
@@ -3716,12 +4145,10 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
* see below about how we avoid a nasty race with release page
* and why we unlock later
*/
- if (i != 0)
- unlock_page(p);
}
if (uptodate)
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
-
+again:
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
if (ret)
goto free_eb;
@@ -3731,14 +4158,21 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
if (ret == -EEXIST) {
exists = radix_tree_lookup(&tree->buffer,
start >> PAGE_CACHE_SHIFT);
- /* add one reference for the caller */
- atomic_inc(&exists->refs);
+ if (!atomic_inc_not_zero(&exists->refs)) {
+ spin_unlock(&tree->buffer_lock);
+ radix_tree_preload_end();
+ exists = NULL;
+ goto again;
+ }
spin_unlock(&tree->buffer_lock);
radix_tree_preload_end();
+ mark_extent_buffer_accessed(exists);
goto free_eb;
}
/* add one reference for the tree */
- atomic_inc(&eb->refs);
+ spin_lock(&eb->refs_lock);
+ check_buffer_tree_ref(eb);
+ spin_unlock(&eb->refs_lock);
spin_unlock(&tree->buffer_lock);
radix_tree_preload_end();
@@ -3751,18 +4185,22 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
* after the extent buffer is in the radix tree so
* it doesn't get lost
*/
- set_page_extent_mapped(eb->first_page);
- set_page_extent_head(eb->first_page, eb->len);
- if (!page0)
- unlock_page(eb->first_page);
+ SetPageChecked(eb->pages[0]);
+ for (i = 1; i < num_pages; i++) {
+ p = extent_buffer_page(eb, i);
+ ClearPageChecked(p);
+ unlock_page(p);
+ }
+ unlock_page(eb->pages[0]);
return eb;
free_eb:
- if (eb->first_page && !page0)
- unlock_page(eb->first_page);
+ for (i = 0; i < num_pages; i++) {
+ if (eb->pages[i])
+ unlock_page(eb->pages[i]);
+ }
- if (!atomic_dec_and_test(&eb->refs))
- return exists;
+ WARN_ON(!atomic_dec_and_test(&eb->refs));
btrfs_release_extent_buffer(eb);
return exists;
}
@@ -3776,7 +4214,7 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
if (eb && atomic_inc_not_zero(&eb->refs)) {
rcu_read_unlock();
- mark_page_accessed(eb->first_page);
+ mark_extent_buffer_accessed(eb);
return eb;
}
rcu_read_unlock();
@@ -3784,19 +4222,71 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
return NULL;
}
+static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
+{
+ struct extent_buffer *eb =
+ container_of(head, struct extent_buffer, rcu_head);
+
+ __free_extent_buffer(eb);
+}
+
+/* Expects to have eb->eb_lock already held */
+static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
+{
+ WARN_ON(atomic_read(&eb->refs) == 0);
+ if (atomic_dec_and_test(&eb->refs)) {
+ struct extent_io_tree *tree = eb->tree;
+
+ spin_unlock(&eb->refs_lock);
+
+ spin_lock(&tree->buffer_lock);
+ radix_tree_delete(&tree->buffer,
+ eb->start >> PAGE_CACHE_SHIFT);
+ spin_unlock(&tree->buffer_lock);
+
+ /* Should be safe to release our pages at this point */
+ btrfs_release_extent_buffer_page(eb, 0);
+
+ call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
+ return;
+ }
+ spin_unlock(&eb->refs_lock);
+}
+
void free_extent_buffer(struct extent_buffer *eb)
{
if (!eb)
return;
- if (!atomic_dec_and_test(&eb->refs))
+ spin_lock(&eb->refs_lock);
+ if (atomic_read(&eb->refs) == 2 &&
+ test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
+ !extent_buffer_under_io(eb) &&
+ test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
+ atomic_dec(&eb->refs);
+
+ /*
+ * I know this is terrible, but it's temporary until we stop tracking
+ * the uptodate bits and such for the extent buffers.
+ */
+ release_extent_buffer(eb, GFP_ATOMIC);
+}
+
+void free_extent_buffer_stale(struct extent_buffer *eb)
+{
+ if (!eb)
return;
- WARN_ON(1);
+ spin_lock(&eb->refs_lock);
+ set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
+
+ if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
+ test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
+ atomic_dec(&eb->refs);
+ release_extent_buffer(eb, GFP_NOFS);
}
-int clear_extent_buffer_dirty(struct extent_io_tree *tree,
- struct extent_buffer *eb)
+void clear_extent_buffer_dirty(struct extent_buffer *eb)
{
unsigned long i;
unsigned long num_pages;
@@ -3812,10 +4302,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
lock_page(page);
WARN_ON(!PagePrivate(page));
- set_page_extent_mapped(page);
- if (i == 0)
- set_page_extent_head(page, eb->len);
-
clear_page_dirty_for_io(page);
spin_lock_irq(&page->mapping->tree_lock);
if (!PageDirty(page)) {
@@ -3827,24 +4313,29 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
ClearPageError(page);
unlock_page(page);
}
- return 0;
+ WARN_ON(atomic_read(&eb->refs) == 0);
}
-int set_extent_buffer_dirty(struct extent_io_tree *tree,
- struct extent_buffer *eb)
+int set_extent_buffer_dirty(struct extent_buffer *eb)
{
unsigned long i;
unsigned long num_pages;
int was_dirty = 0;
+ check_buffer_tree_ref(eb);
+
was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
+
num_pages = num_extent_pages(eb->start, eb->len);
+ WARN_ON(atomic_read(&eb->refs) == 0);
+ WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
+
for (i = 0; i < num_pages; i++)
- __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
+ set_page_dirty(extent_buffer_page(eb, i));
return was_dirty;
}
-static int __eb_straddles_pages(u64 start, u64 len)
+static int range_straddles_pages(u64 start, u64 len)
{
if (len < PAGE_CACHE_SIZE)
return 1;
@@ -3855,25 +4346,14 @@ static int __eb_straddles_pages(u64 start, u64 len)
return 0;
}
-static int eb_straddles_pages(struct extent_buffer *eb)
-{
- return __eb_straddles_pages(eb->start, eb->len);
-}
-
-int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
- struct extent_buffer *eb,
- struct extent_state **cached_state)
+int clear_extent_buffer_uptodate(struct extent_buffer *eb)
{
unsigned long i;
struct page *page;
unsigned long num_pages;
- num_pages = num_extent_pages(eb->start, eb->len);
clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
-
- clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
- cached_state, GFP_NOFS);
-
+ num_pages = num_extent_pages(eb->start, eb->len);
for (i = 0; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
if (page)
@@ -3882,27 +4362,16 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
return 0;
}
-int set_extent_buffer_uptodate(struct extent_io_tree *tree,
- struct extent_buffer *eb)
+int set_extent_buffer_uptodate(struct extent_buffer *eb)
{
unsigned long i;
struct page *page;
unsigned long num_pages;
+ set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
num_pages = num_extent_pages(eb->start, eb->len);
-
- if (eb_straddles_pages(eb)) {
- set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
- NULL, GFP_NOFS);
- }
for (i = 0; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
- if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
- ((i == num_pages - 1) &&
- ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
- check_page_uptodate(tree, page);
- continue;
- }
SetPageUptodate(page);
}
return 0;
@@ -3917,7 +4386,7 @@ int extent_range_uptodate(struct extent_io_tree *tree,
int uptodate;
unsigned long index;
- if (__eb_straddles_pages(start, end - start + 1)) {
+ if (range_straddles_pages(start, end - start + 1)) {
ret = test_range_bit(tree, start, end,
EXTENT_UPTODATE, 1, NULL);
if (ret)
@@ -3939,35 +4408,9 @@ int extent_range_uptodate(struct extent_io_tree *tree,
return pg_uptodate;
}
-int extent_buffer_uptodate(struct extent_io_tree *tree,
- struct extent_buffer *eb,
- struct extent_state *cached_state)
+int extent_buffer_uptodate(struct extent_buffer *eb)
{
- int ret = 0;
- unsigned long num_pages;
- unsigned long i;
- struct page *page;
- int pg_uptodate = 1;
-
- if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
- return 1;
-
- if (eb_straddles_pages(eb)) {
- ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
- EXTENT_UPTODATE, 1, cached_state);
- if (ret)
- return ret;
- }
-
- num_pages = num_extent_pages(eb->start, eb->len);
- for (i = 0; i < num_pages; i++) {
- page = extent_buffer_page(eb, i);
- if (!PageUptodate(page)) {
- pg_uptodate = 0;
- break;
- }
- }
- return pg_uptodate;
+ return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
}
int read_extent_buffer_pages(struct extent_io_tree *tree,
@@ -3981,21 +4424,14 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
int ret = 0;
int locked_pages = 0;
int all_uptodate = 1;
- int inc_all_pages = 0;
unsigned long num_pages;
+ unsigned long num_reads = 0;
struct bio *bio = NULL;
unsigned long bio_flags = 0;
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 0;
- if (eb_straddles_pages(eb)) {
- if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
- EXTENT_UPTODATE, 1, NULL)) {
- return 0;
- }
- }
-
if (start) {
WARN_ON(start < eb->start);
start_i = (start >> PAGE_CACHE_SHIFT) -
@@ -4014,8 +4450,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
lock_page(page);
}
locked_pages++;
- if (!PageUptodate(page))
+ if (!PageUptodate(page)) {
+ num_reads++;
all_uptodate = 0;
+ }
}
if (all_uptodate) {
if (start_i == 0)
@@ -4023,20 +4461,12 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
goto unlock_exit;
}
+ clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
+ eb->read_mirror = 0;
+ atomic_set(&eb->io_pages, num_reads);
for (i = start_i; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
-
- WARN_ON(!PagePrivate(page));
-
- set_page_extent_mapped(page);
- if (i == 0)
- set_page_extent_head(page, eb->len);
-
- if (inc_all_pages)
- page_cache_get(page);
if (!PageUptodate(page)) {
- if (start_i == 0)
- inc_all_pages = 1;
ClearPageError(page);
err = __extent_read_full_page(tree, page,
get_extent, &bio,
@@ -4048,8 +4478,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
}
}
- if (bio)
- submit_one_bio(READ, bio, mirror_num, bio_flags);
+ if (bio) {
+ err = submit_one_bio(READ, bio, mirror_num, bio_flags);
+ if (err)
+ return err;
+ }
if (ret || wait != WAIT_COMPLETE)
return ret;
@@ -4061,8 +4494,6 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
ret = -EIO;
}
- if (!ret)
- set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
return ret;
unlock_exit:
@@ -4304,15 +4735,20 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
{
char *dst_kaddr = page_address(dst_page);
char *src_kaddr;
+ int must_memmove = 0;
if (dst_page != src_page) {
src_kaddr = page_address(src_page);
} else {
src_kaddr = dst_kaddr;
- BUG_ON(areas_overlap(src_off, dst_off, len));
+ if (areas_overlap(src_off, dst_off, len))
+ must_memmove = 1;
}
- memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
+ if (must_memmove)
+ memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
+ else
+ memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
}
void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
@@ -4382,7 +4818,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
"len %lu len %lu\n", dst_offset, len, dst->len);
BUG_ON(1);
}
- if (!areas_overlap(src_offset, dst_offset, len)) {
+ if (dst_offset < src_offset) {
memcpy_extent_buffer(dst, dst_offset, src_offset, len);
return;
}
@@ -4408,47 +4844,48 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
}
}
-static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
-{
- struct extent_buffer *eb =
- container_of(head, struct extent_buffer, rcu_head);
-
- btrfs_release_extent_buffer(eb);
-}
-
-int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
+int try_release_extent_buffer(struct page *page, gfp_t mask)
{
- u64 start = page_offset(page);
struct extent_buffer *eb;
- int ret = 1;
- spin_lock(&tree->buffer_lock);
- eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
- if (!eb) {
- spin_unlock(&tree->buffer_lock);
- return ret;
+ /*
+ * We need to make sure noboody is attaching this page to an eb right
+ * now.
+ */
+ spin_lock(&page->mapping->private_lock);
+ if (!PagePrivate(page)) {
+ spin_unlock(&page->mapping->private_lock);
+ return 1;
}
- if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
- ret = 0;
- goto out;
- }
+ eb = (struct extent_buffer *)page->private;
+ BUG_ON(!eb);
/*
- * set @eb->refs to 0 if it is already 1, and then release the @eb.
- * Or go back.
+ * This is a little awful but should be ok, we need to make sure that
+ * the eb doesn't disappear out from under us while we're looking at
+ * this page.
*/
- if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
- ret = 0;
- goto out;
+ spin_lock(&eb->refs_lock);
+ if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
+ spin_unlock(&eb->refs_lock);
+ spin_unlock(&page->mapping->private_lock);
+ return 0;
}
+ spin_unlock(&page->mapping->private_lock);
- radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
-out:
- spin_unlock(&tree->buffer_lock);
+ if ((mask & GFP_NOFS) == GFP_NOFS)
+ mask = GFP_NOFS;
- /* at this point we can safely release the extent buffer */
- if (atomic_read(&eb->refs) == 0)
- call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
- return ret;
+ /*
+ * If tree ref isn't set then we know the ref on this eb is a real ref,
+ * so just return, this page will likely be freed soon anyway.
+ */
+ if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
+ spin_unlock(&eb->refs_lock);
+ return 0;
+ }
+ release_extent_buffer(eb, mask);
+
+ return 1;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index cecc3518c12..b516c3b8dec 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -35,6 +35,10 @@
#define EXTENT_BUFFER_DIRTY 2
#define EXTENT_BUFFER_CORRUPT 3
#define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */
+#define EXTENT_BUFFER_TREE_REF 5
+#define EXTENT_BUFFER_STALE 6
+#define EXTENT_BUFFER_WRITEBACK 7
+#define EXTENT_BUFFER_IOERR 8
/* these are flags for extent_clear_unlock_delalloc */
#define EXTENT_CLEAR_UNLOCK_PAGE 0x1
@@ -54,6 +58,7 @@
#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3
struct extent_state;
+struct btrfs_root;
typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
struct bio *bio, int mirror_num,
@@ -69,14 +74,12 @@ struct extent_io_ops {
size_t size, struct bio *bio,
unsigned long bio_flags);
int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
- int (*readpage_io_failed_hook)(struct bio *bio, struct page *page,
- u64 start, u64 end, int failed_mirror,
- struct extent_state *state);
+ int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
u64 start, u64 end,
struct extent_state *state);
int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
- struct extent_state *state);
+ struct extent_state *state, int mirror);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate);
void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
@@ -97,6 +100,7 @@ struct extent_io_tree {
struct radix_tree_root buffer;
struct address_space *mapping;
u64 dirty_bytes;
+ int track_uptodate;
spinlock_t lock;
spinlock_t buffer_lock;
struct extent_io_ops *ops;
@@ -119,16 +123,21 @@ struct extent_state {
struct list_head leak_list;
};
+#define INLINE_EXTENT_BUFFER_PAGES 16
+#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_CACHE_SIZE)
struct extent_buffer {
u64 start;
unsigned long len;
unsigned long map_start;
unsigned long map_len;
- struct page *first_page;
unsigned long bflags;
+ struct extent_io_tree *tree;
+ spinlock_t refs_lock;
+ atomic_t refs;
+ atomic_t io_pages;
+ int read_mirror;
struct list_head leak_list;
struct rcu_head rcu_head;
- atomic_t refs;
pid_t lock_owner;
/* count of read lock holders on the extent buffer */
@@ -152,6 +161,9 @@ struct extent_buffer {
* to unlock
*/
wait_queue_head_t read_lock_wq;
+ wait_queue_head_t lock_wq;
+ struct page *inline_pages[INLINE_EXTENT_BUFFER_PAGES];
+ struct page **pages;
};
static inline void extent_set_compress_type(unsigned long *bio_flags,
@@ -178,18 +190,17 @@ void extent_io_tree_init(struct extent_io_tree *tree,
int try_release_extent_mapping(struct extent_map_tree *map,
struct extent_io_tree *tree, struct page *page,
gfp_t mask);
-int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page);
+int try_release_extent_buffer(struct page *page, gfp_t mask);
int try_release_extent_state(struct extent_map_tree *map,
struct extent_io_tree *tree, struct page *page,
gfp_t mask);
-int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
+int lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, struct extent_state **cached, gfp_t mask);
-int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
+ int bits, struct extent_state **cached);
+int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached, gfp_t mask);
-int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask);
+int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent, int mirror_num);
int __init extent_io_init(void);
@@ -210,7 +221,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int exclusive_bits, u64 *failed_start,
+ int bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask);
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
@@ -240,6 +251,8 @@ int extent_writepages(struct extent_io_tree *tree,
struct address_space *mapping,
get_extent_t *get_extent,
struct writeback_control *wbc);
+int btree_write_cache_pages(struct address_space *mapping,
+ struct writeback_control *wbc);
int extent_readpages(struct extent_io_tree *tree,
struct address_space *mapping,
struct list_head *pages, unsigned nr_pages,
@@ -251,11 +264,11 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
void set_page_extent_mapped(struct page *page);
struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
- u64 start, unsigned long len,
- struct page *page0);
+ u64 start, unsigned long len);
struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
u64 start, unsigned long len);
void free_extent_buffer(struct extent_buffer *eb);
+void free_extent_buffer_stale(struct extent_buffer *eb);
#define WAIT_NONE 0
#define WAIT_COMPLETE 1
#define WAIT_PAGE_LOCK 2
@@ -287,19 +300,12 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
unsigned long src_offset, unsigned long len);
void memset_extent_buffer(struct extent_buffer *eb, char c,
unsigned long start, unsigned long len);
-int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
-int clear_extent_buffer_dirty(struct extent_io_tree *tree,
- struct extent_buffer *eb);
-int set_extent_buffer_dirty(struct extent_io_tree *tree,
- struct extent_buffer *eb);
-int set_extent_buffer_uptodate(struct extent_io_tree *tree,
- struct extent_buffer *eb);
-int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
- struct extent_buffer *eb,
- struct extent_state **cached_state);
-int extent_buffer_uptodate(struct extent_io_tree *tree,
- struct extent_buffer *eb,
- struct extent_state *cached_state);
+void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
+void clear_extent_buffer_dirty(struct extent_buffer *eb);
+int set_extent_buffer_dirty(struct extent_buffer *eb);
+int set_extent_buffer_uptodate(struct extent_buffer *eb);
+int clear_extent_buffer_uptodate(struct extent_buffer *eb);
+int extent_buffer_uptodate(struct extent_buffer *eb);
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
unsigned long min_len, char **map,
unsigned long *map_start,
@@ -320,4 +326,6 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
u64 length, u64 logical, struct page *page,
int mirror_num);
int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
+int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
+ int mirror_num);
#endif
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index c7fb3a4247d..5d158d32023 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -25,10 +25,12 @@
#include "transaction.h"
#include "print-tree.h"
-#define MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \
+#define __MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \
sizeof(struct btrfs_item) * 2) / \
size) - 1))
+#define MAX_CSUM_ITEMS(r, size) (min(__MAX_CSUM_ITEMS(r, size), PAGE_CACHE_SIZE))
+
#define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \
sizeof(struct btrfs_ordered_sum)) / \
sizeof(struct btrfs_sector_sum) * \
@@ -59,7 +61,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
sizeof(*item));
if (ret < 0)
goto out;
- BUG_ON(ret);
+ BUG_ON(ret); /* Can't happen */
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
@@ -284,6 +286,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct btrfs_ordered_sum *sums;
struct btrfs_sector_sum *sector_sum;
struct btrfs_csum_item *item;
+ LIST_HEAD(tmplist);
unsigned long offset;
int ret;
size_t size;
@@ -358,7 +361,10 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
MAX_ORDERED_SUM_BYTES(root));
sums = kzalloc(btrfs_ordered_sum_size(root, size),
GFP_NOFS);
- BUG_ON(!sums);
+ if (!sums) {
+ ret = -ENOMEM;
+ goto fail;
+ }
sector_sum = sums->sums;
sums->bytenr = start;
@@ -380,12 +386,19 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
offset += csum_size;
sector_sum++;
}
- list_add_tail(&sums->list, list);
+ list_add_tail(&sums->list, &tmplist);
}
path->slots[0]++;
}
ret = 0;
fail:
+ while (ret < 0 && !list_empty(&tmplist)) {
+ sums = list_entry(&tmplist, struct btrfs_ordered_sum, list);
+ list_del(&sums->list);
+ kfree(sums);
+ }
+ list_splice_tail(&tmplist, list);
+
btrfs_free_path(path);
return ret;
}
@@ -420,7 +433,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
ordered = btrfs_lookup_ordered_extent(inode, offset);
- BUG_ON(!ordered);
+ BUG_ON(!ordered); /* Logic error */
sums->bytenr = ordered->start;
while (bio_index < bio->bi_vcnt) {
@@ -439,21 +452,21 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
GFP_NOFS);
- BUG_ON(!sums);
+ BUG_ON(!sums); /* -ENOMEM */
sector_sum = sums->sums;
sums->len = bytes_left;
ordered = btrfs_lookup_ordered_extent(inode, offset);
- BUG_ON(!ordered);
+ BUG_ON(!ordered); /* Logic error */
sums->bytenr = ordered->start;
}
- data = kmap_atomic(bvec->bv_page, KM_USER0);
+ data = kmap_atomic(bvec->bv_page);
sector_sum->sum = ~(u32)0;
sector_sum->sum = btrfs_csum_data(root,
data + bvec->bv_offset,
sector_sum->sum,
bvec->bv_len);
- kunmap_atomic(data, KM_USER0);
+ kunmap_atomic(data);
btrfs_csum_final(sector_sum->sum,
(char *)&sector_sum->sum);
sector_sum->bytenr = disk_bytenr;
@@ -483,18 +496,17 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
* This calls btrfs_truncate_item with the correct args based on the
* overlap, and fixes up the key as required.
*/
-static noinline int truncate_one_csum(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *key,
- u64 bytenr, u64 len)
+static noinline void truncate_one_csum(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *key,
+ u64 bytenr, u64 len)
{
struct extent_buffer *leaf;
u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
u64 csum_end;
u64 end_byte = bytenr + len;
u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits;
- int ret;
leaf = path->nodes[0];
csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
@@ -510,7 +522,7 @@ static noinline int truncate_one_csum(struct btrfs_trans_handle *trans,
*/
u32 new_size = (bytenr - key->offset) >> blocksize_bits;
new_size *= csum_size;
- ret = btrfs_truncate_item(trans, root, path, new_size, 1);
+ btrfs_truncate_item(trans, root, path, new_size, 1);
} else if (key->offset >= bytenr && csum_end > end_byte &&
end_byte > key->offset) {
/*
@@ -522,15 +534,13 @@ static noinline int truncate_one_csum(struct btrfs_trans_handle *trans,
u32 new_size = (csum_end - end_byte) >> blocksize_bits;
new_size *= csum_size;
- ret = btrfs_truncate_item(trans, root, path, new_size, 0);
+ btrfs_truncate_item(trans, root, path, new_size, 0);
key->offset = end_byte;
- ret = btrfs_set_item_key_safe(trans, root, path, key);
- BUG_ON(ret);
+ btrfs_set_item_key_safe(trans, root, path, key);
} else {
BUG();
}
- return 0;
}
/*
@@ -635,13 +645,14 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
* item changed size or key
*/
ret = btrfs_split_item(trans, root, path, &key, offset);
- BUG_ON(ret && ret != -EAGAIN);
+ if (ret && ret != -EAGAIN) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out;
+ }
key.offset = end_byte - 1;
} else {
- ret = truncate_one_csum(trans, root, path,
- &key, bytenr, len);
- BUG_ON(ret);
+ truncate_one_csum(trans, root, path, &key, bytenr, len);
if (key.offset < bytenr)
break;
}
@@ -772,7 +783,7 @@ again:
if (diff != csum_size)
goto insert;
- ret = btrfs_extend_item(trans, root, path, diff);
+ btrfs_extend_item(trans, root, path, diff);
goto csum;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e8d06b6b919..53bf2d764bb 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -452,7 +452,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split = alloc_extent_map();
if (!split2)
split2 = alloc_extent_map();
- BUG_ON(!split || !split2);
+ BUG_ON(!split || !split2); /* -ENOMEM */
write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
@@ -494,7 +494,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split->flags = flags;
split->compress_type = em->compress_type;
ret = add_extent_mapping(em_tree, split);
- BUG_ON(ret);
+ BUG_ON(ret); /* Logic error */
free_extent_map(split);
split = split2;
split2 = NULL;
@@ -520,7 +520,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
}
ret = add_extent_mapping(em_tree, split);
- BUG_ON(ret);
+ BUG_ON(ret); /* Logic error */
free_extent_map(split);
split = NULL;
}
@@ -567,6 +567,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
int extent_type;
int recow;
int ret;
+ int modify_tree = -1;
if (drop_cache)
btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -575,10 +576,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
if (!path)
return -ENOMEM;
+ if (start >= BTRFS_I(inode)->disk_i_size)
+ modify_tree = 0;
+
while (1) {
recow = 0;
ret = btrfs_lookup_file_extent(trans, root, path, ino,
- search_start, -1);
+ search_start, modify_tree);
if (ret < 0)
break;
if (ret > 0 && path->slots[0] > 0 && search_start == start) {
@@ -634,7 +638,8 @@ next_slot:
}
search_start = max(key.offset, start);
- if (recow) {
+ if (recow || !modify_tree) {
+ modify_tree = -1;
btrfs_release_path(path);
continue;
}
@@ -679,7 +684,7 @@ next_slot:
root->root_key.objectid,
new_key.objectid,
start - extent_offset, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
*hint_byte = disk_bytenr;
}
key.offset = start;
@@ -754,7 +759,7 @@ next_slot:
root->root_key.objectid,
key.objectid, key.offset -
extent_offset, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
inode_sub_bytes(inode,
extent_end - key.offset);
*hint_byte = disk_bytenr;
@@ -770,7 +775,10 @@ next_slot:
ret = btrfs_del_items(trans, root, path, del_slot,
del_nr);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out;
+ }
del_nr = 0;
del_slot = 0;
@@ -782,11 +790,13 @@ next_slot:
BUG_ON(1);
}
- if (del_nr > 0) {
+ if (!ret && del_nr > 0) {
ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
- BUG_ON(ret);
+ if (ret)
+ btrfs_abort_transaction(trans, root, ret);
}
+out:
btrfs_free_path(path);
return ret;
}
@@ -944,7 +954,10 @@ again:
btrfs_release_path(path);
goto again;
}
- BUG_ON(ret < 0);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out;
+ }
leaf = path->nodes[0];
fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
@@ -963,7 +976,7 @@ again:
ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
root->root_key.objectid,
ino, orig_offset, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
if (split == start) {
key.offset = start;
@@ -990,7 +1003,7 @@ again:
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
ino, orig_offset, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
other_start = 0;
other_end = start;
@@ -1007,7 +1020,7 @@ again:
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
ino, orig_offset, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
if (del_nr == 0) {
fi = btrfs_item_ptr(leaf, path->slots[0],
@@ -1025,7 +1038,10 @@ again:
btrfs_mark_buffer_dirty(leaf);
ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
- BUG_ON(ret);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out;
+ }
}
out:
btrfs_free_path(path);
@@ -1105,8 +1121,7 @@ again:
if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered;
lock_extent_bits(&BTRFS_I(inode)->io_tree,
- start_pos, last_pos - 1, 0, &cached_state,
- GFP_NOFS);
+ start_pos, last_pos - 1, 0, &cached_state);
ordered = btrfs_lookup_first_ordered_extent(inode,
last_pos - 1);
if (ordered &&
@@ -1638,7 +1653,7 @@ static long btrfs_fallocate(struct file *file, int mode,
* transaction
*/
lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
- locked_end, 0, &cached_state, GFP_NOFS);
+ locked_end, 0, &cached_state);
ordered = btrfs_lookup_first_ordered_extent(inode,
alloc_end - 1);
if (ordered &&
@@ -1667,7 +1682,13 @@ static long btrfs_fallocate(struct file *file, int mode,
em = btrfs_get_extent(inode, NULL, 0, cur_offset,
alloc_end - cur_offset, 0);
- BUG_ON(IS_ERR_OR_NULL(em));
+ if (IS_ERR_OR_NULL(em)) {
+ if (!em)
+ ret = -ENOMEM;
+ else
+ ret = PTR_ERR(em);
+ break;
+ }
last_byte = min(extent_map_end(em), alloc_end);
actual_end = min_t(u64, extent_map_end(em), offset + len);
last_byte = (last_byte + mask) & ~mask;
@@ -1737,7 +1758,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
return -ENXIO;
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
- &cached_state, GFP_NOFS);
+ &cached_state);
/*
* Delalloc is such a pain. If we have a hole and we have pending
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 710ea380c7e..202008ec367 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -230,11 +230,13 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
if (ret) {
trans->block_rsv = rsv;
- WARN_ON(1);
+ btrfs_abort_transaction(trans, root, ret);
return ret;
}
ret = btrfs_update_inode(trans, root, inode);
+ if (ret)
+ btrfs_abort_transaction(trans, root, ret);
trans->block_rsv = rsv;
return ret;
@@ -746,13 +748,6 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
u64 used = btrfs_block_group_used(&block_group->item);
/*
- * If we're unmounting then just return, since this does a search on the
- * normal root and not the commit root and we could deadlock.
- */
- if (btrfs_fs_closing(fs_info))
- return 0;
-
- /*
* If this block group has been marked to be cleared for one reason or
* another then we can't trust the on disk cache, so just return.
*/
@@ -766,6 +761,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
path = btrfs_alloc_path();
if (!path)
return 0;
+ path->search_commit_root = 1;
+ path->skip_locking = 1;
inode = lookup_free_space_inode(root, block_group, path);
if (IS_ERR(inode)) {
@@ -869,7 +866,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
io_ctl_prepare_pages(&io_ctl, inode, 0);
lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
- 0, &cached_state, GFP_NOFS);
+ 0, &cached_state);
node = rb_first(&ctl->free_space_offset);
if (!node && cluster) {
@@ -1068,7 +1065,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
spin_unlock(&block_group->lock);
ret = 0;
#ifdef DEBUG
- printk(KERN_ERR "btrfs: failed to write free space cace "
+ printk(KERN_ERR "btrfs: failed to write free space cache "
"for block group %llu\n", block_group->key.objectid);
#endif
}
@@ -1948,14 +1945,14 @@ again:
*/
ret = btrfs_add_free_space(block_group, old_start,
offset - old_start);
- WARN_ON(ret);
+ WARN_ON(ret); /* -ENOMEM */
goto out;
}
ret = remove_from_bitmap(ctl, info, &offset, &bytes);
if (ret == -EAGAIN)
goto again;
- BUG_ON(ret);
+ BUG_ON(ret); /* logic error */
out_lock:
spin_unlock(&ctl->tree_lock);
out:
@@ -2346,7 +2343,7 @@ again:
rb_erase(&entry->offset_index, &ctl->free_space_offset);
ret = tree_insert_offset(&cluster->root, entry->offset,
&entry->offset_index, 1);
- BUG_ON(ret);
+ BUG_ON(ret); /* -EEXIST; Logic error */
trace_btrfs_setup_cluster(block_group, cluster,
total_found * block_group->sectorsize, 1);
@@ -2439,7 +2436,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
ret = tree_insert_offset(&cluster->root, entry->offset,
&entry->offset_index, 0);
total_size += entry->bytes;
- BUG_ON(ret);
+ BUG_ON(ret); /* -EEXIST; Logic error */
} while (node && entry != last);
cluster->max_size = max_extent;
@@ -2830,6 +2827,7 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
int ret;
ret = search_bitmap(ctl, entry, &offset, &count);
+ /* Logic error; Should be empty if it can't find anything */
BUG_ON(ret);
ino = offset;
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index baa74f3db69..a13cf1a96c7 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -19,6 +19,7 @@
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
+#include "print-tree.h"
static int find_name_in_backref(struct btrfs_path *path, const char *name,
int name_len, struct btrfs_inode_ref **ref_ret)
@@ -128,13 +129,14 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
item_size - (ptr + sub_item_len - item_start));
- ret = btrfs_truncate_item(trans, root, path,
+ btrfs_truncate_item(trans, root, path,
item_size - sub_item_len, 1);
out:
btrfs_free_path(path);
return ret;
}
+/* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */
int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
@@ -165,7 +167,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
goto out;
old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
- ret = btrfs_extend_item(trans, root, path, ins_len);
+ btrfs_extend_item(trans, root, path, ins_len);
ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_ref);
ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index ee15d88b33d..b1a1c929ba8 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -178,7 +178,7 @@ static void start_caching(struct btrfs_root *root)
tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n",
root->root_key.objectid);
- BUG_ON(IS_ERR(tsk));
+ BUG_ON(IS_ERR(tsk)); /* -ENOMEM */
}
int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
@@ -271,7 +271,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
break;
info = rb_entry(n, struct btrfs_free_space, offset_index);
- BUG_ON(info->bitmap);
+ BUG_ON(info->bitmap); /* Logic error */
if (info->offset > root->cache_progress)
goto free;
@@ -439,17 +439,16 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
if (ret)
goto out;
trace_btrfs_space_reservation(root->fs_info, "ino_cache",
- (u64)(unsigned long)trans,
- trans->bytes_reserved, 1);
+ trans->transid, trans->bytes_reserved, 1);
again:
inode = lookup_free_ino_inode(root, path);
- if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
+ if (IS_ERR(inode) && (PTR_ERR(inode) != -ENOENT || retry)) {
ret = PTR_ERR(inode);
goto out_release;
}
if (IS_ERR(inode)) {
- BUG_ON(retry);
+ BUG_ON(retry); /* Logic error */
retry = true;
ret = create_free_ino_inode(root, trans, path);
@@ -460,12 +459,17 @@ again:
BTRFS_I(inode)->generation = 0;
ret = btrfs_update_inode(trans, root, inode);
- WARN_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_put;
+ }
if (i_size_read(inode) > 0) {
ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
goto out_put;
+ }
}
spin_lock(&root->cache_lock);
@@ -502,8 +506,7 @@ out_put:
iput(inode);
out_release:
trace_btrfs_space_reservation(root->fs_info, "ino_cache",
- (u64)(unsigned long)trans,
- trans->bytes_reserved, 0);
+ trans->transid, trans->bytes_reserved, 0);
btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
out:
trans->block_rsv = rsv;
@@ -532,7 +535,7 @@ static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
if (ret < 0)
goto error;
- BUG_ON(ret == 0);
+ BUG_ON(ret == 0); /* Corruption */
if (path->slots[0] > 0) {
slot = path->slots[0] - 1;
l = path->nodes[0];
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 892b34785cc..61b16c641ce 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -150,7 +150,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
inode_add_bytes(inode, size);
ret = btrfs_insert_empty_item(trans, root, path, &key,
datasize);
- BUG_ON(ret);
if (ret) {
err = ret;
goto fail;
@@ -173,9 +172,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
cur_size = min_t(unsigned long, compressed_size,
PAGE_CACHE_SIZE);
- kaddr = kmap_atomic(cpage, KM_USER0);
+ kaddr = kmap_atomic(cpage);
write_extent_buffer(leaf, kaddr, ptr, cur_size);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
i++;
ptr += cur_size;
@@ -187,10 +186,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
page = find_get_page(inode->i_mapping,
start >> PAGE_CACHE_SHIFT);
btrfs_set_file_extent_compression(leaf, ei, 0);
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
offset = start & (PAGE_CACHE_SIZE - 1);
write_extent_buffer(leaf, kaddr + offset, ptr, size);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
page_cache_release(page);
}
btrfs_mark_buffer_dirty(leaf);
@@ -206,9 +205,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
* could end up racing with unlink.
*/
BTRFS_I(inode)->disk_i_size = inode->i_size;
- btrfs_update_inode(trans, root, inode);
+ ret = btrfs_update_inode(trans, root, inode);
- return 0;
+ return ret;
fail:
btrfs_free_path(path);
return err;
@@ -250,14 +249,18 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
ret = btrfs_drop_extents(trans, inode, start, aligned_end,
&hint_byte, 1);
- BUG_ON(ret);
+ if (ret)
+ return ret;
if (isize > actual_end)
inline_len = min_t(u64, isize, actual_end);
ret = insert_inline_extent(trans, root, inode, start,
inline_len, compressed_size,
compress_type, compressed_pages);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
btrfs_delalloc_release_metadata(inode, end + 1 - start);
btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
return 0;
@@ -293,7 +296,7 @@ static noinline int add_async_extent(struct async_cow *cow,
struct async_extent *async_extent;
async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
- BUG_ON(!async_extent);
+ BUG_ON(!async_extent); /* -ENOMEM */
async_extent->start = start;
async_extent->ram_size = ram_size;
async_extent->compressed_size = compressed_size;
@@ -344,8 +347,9 @@ static noinline int compress_file_range(struct inode *inode,
int will_compress;
int compress_type = root->fs_info->compress_type;
- /* if this is a small write inside eof, kick off a defragbot */
- if (end <= BTRFS_I(inode)->disk_i_size && (end - start + 1) < 16 * 1024)
+ /* if this is a small write inside eof, kick off a defrag */
+ if ((end - start + 1) < 16 * 1024 &&
+ (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
btrfs_add_inode_defrag(NULL, inode);
actual_end = min_t(u64, isize, end + 1);
@@ -422,10 +426,10 @@ again:
* sending it down to disk
*/
if (offset) {
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
memset(kaddr + offset, 0,
PAGE_CACHE_SIZE - offset);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
}
will_compress = 1;
}
@@ -433,7 +437,11 @@ again:
cont:
if (start == 0) {
trans = btrfs_join_transaction(root);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ trans = NULL;
+ goto cleanup_and_out;
+ }
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
/* lets try to make an inline extent */
@@ -450,11 +458,11 @@ cont:
total_compressed,
compress_type, pages);
}
- if (ret == 0) {
+ if (ret <= 0) {
/*
- * inline extent creation worked, we don't need
- * to create any more async work items. Unlock
- * and free up our temp pages.
+ * inline extent creation worked or returned error,
+ * we don't need to create any more async work items.
+ * Unlock and free up our temp pages.
*/
extent_clear_unlock_delalloc(inode,
&BTRFS_I(inode)->io_tree,
@@ -547,7 +555,7 @@ cleanup_and_bail_uncompressed:
}
out:
- return 0;
+ return ret;
free_pages_out:
for (i = 0; i < nr_pages_ret; i++) {
@@ -557,6 +565,20 @@ free_pages_out:
kfree(pages);
goto out;
+
+cleanup_and_out:
+ extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
+ start, end, NULL,
+ EXTENT_CLEAR_UNLOCK_PAGE |
+ EXTENT_CLEAR_DIRTY |
+ EXTENT_CLEAR_DELALLOC |
+ EXTENT_SET_WRITEBACK |
+ EXTENT_END_WRITEBACK);
+ if (!trans || IS_ERR(trans))
+ btrfs_error(root->fs_info, ret, "Failed to join transaction");
+ else
+ btrfs_abort_transaction(trans, root, ret);
+ goto free_pages_out;
}
/*
@@ -597,7 +619,7 @@ retry:
lock_extent(io_tree, async_extent->start,
async_extent->start +
- async_extent->ram_size - 1, GFP_NOFS);
+ async_extent->ram_size - 1);
/* allocate blocks */
ret = cow_file_range(inode, async_cow->locked_page,
@@ -606,6 +628,8 @@ retry:
async_extent->ram_size - 1,
&page_started, &nr_written, 0);
+ /* JDM XXX */
+
/*
* if page_started, cow_file_range inserted an
* inline extent and took care of all the unlocking
@@ -625,18 +649,21 @@ retry:
}
lock_extent(io_tree, async_extent->start,
- async_extent->start + async_extent->ram_size - 1,
- GFP_NOFS);
+ async_extent->start + async_extent->ram_size - 1);
trans = btrfs_join_transaction(root);
- BUG_ON(IS_ERR(trans));
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
- ret = btrfs_reserve_extent(trans, root,
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ } else {
+ trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+ ret = btrfs_reserve_extent(trans, root,
async_extent->compressed_size,
async_extent->compressed_size,
- 0, alloc_hint,
- (u64)-1, &ins, 1);
- btrfs_end_transaction(trans, root);
+ 0, alloc_hint, &ins, 1);
+ if (ret)
+ btrfs_abort_transaction(trans, root, ret);
+ btrfs_end_transaction(trans, root);
+ }
if (ret) {
int i;
@@ -649,8 +676,10 @@ retry:
async_extent->pages = NULL;
unlock_extent(io_tree, async_extent->start,
async_extent->start +
- async_extent->ram_size - 1, GFP_NOFS);
- goto retry;
+ async_extent->ram_size - 1);
+ if (ret == -ENOSPC)
+ goto retry;
+ goto out_free; /* JDM: Requeue? */
}
/*
@@ -662,7 +691,7 @@ retry:
async_extent->ram_size - 1, 0);
em = alloc_extent_map();
- BUG_ON(!em);
+ BUG_ON(!em); /* -ENOMEM */
em->start = async_extent->start;
em->len = async_extent->ram_size;
em->orig_start = em->start;
@@ -694,7 +723,7 @@ retry:
ins.offset,
BTRFS_ORDERED_COMPRESSED,
async_extent->compress_type);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
/*
* clear dirty, set writeback and unlock the pages.
@@ -716,13 +745,17 @@ retry:
ins.offset, async_extent->pages,
async_extent->nr_pages);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
alloc_hint = ins.objectid + ins.offset;
kfree(async_extent);
cond_resched();
}
-
- return 0;
+ ret = 0;
+out:
+ return ret;
+out_free:
+ kfree(async_extent);
+ goto out;
}
static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
@@ -791,7 +824,18 @@ static noinline int cow_file_range(struct inode *inode,
BUG_ON(btrfs_is_free_space_inode(root, inode));
trans = btrfs_join_transaction(root);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans)) {
+ extent_clear_unlock_delalloc(inode,
+ &BTRFS_I(inode)->io_tree,
+ start, end, NULL,
+ EXTENT_CLEAR_UNLOCK_PAGE |
+ EXTENT_CLEAR_UNLOCK |
+ EXTENT_CLEAR_DELALLOC |
+ EXTENT_CLEAR_DIRTY |
+ EXTENT_SET_WRITEBACK |
+ EXTENT_END_WRITEBACK);
+ return PTR_ERR(trans);
+ }
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
num_bytes = (end - start + blocksize) & ~(blocksize - 1);
@@ -800,7 +844,8 @@ static noinline int cow_file_range(struct inode *inode,
ret = 0;
/* if this is a small write inside eof, kick off defrag */
- if (end <= BTRFS_I(inode)->disk_i_size && num_bytes < 64 * 1024)
+ if (num_bytes < 64 * 1024 &&
+ (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
btrfs_add_inode_defrag(trans, inode);
if (start == 0) {
@@ -821,8 +866,10 @@ static noinline int cow_file_range(struct inode *inode,
*nr_written = *nr_written +
(end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
*page_started = 1;
- ret = 0;
goto out;
+ } else if (ret < 0) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_unlock;
}
}
@@ -838,11 +885,14 @@ static noinline int cow_file_range(struct inode *inode,
cur_alloc_size = disk_num_bytes;
ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
root->sectorsize, 0, alloc_hint,
- (u64)-1, &ins, 1);
- BUG_ON(ret);
+ &ins, 1);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_unlock;
+ }
em = alloc_extent_map();
- BUG_ON(!em);
+ BUG_ON(!em); /* -ENOMEM */
em->start = start;
em->orig_start = em->start;
ram_size = ins.offset;
@@ -868,13 +918,16 @@ static noinline int cow_file_range(struct inode *inode,
cur_alloc_size = ins.offset;
ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
ram_size, cur_alloc_size, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
if (root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_reloc_clone_csums(inode, start,
cur_alloc_size);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_unlock;
+ }
}
if (disk_num_bytes < cur_alloc_size)
@@ -899,11 +952,23 @@ static noinline int cow_file_range(struct inode *inode,
alloc_hint = ins.objectid + ins.offset;
start += cur_alloc_size;
}
-out:
ret = 0;
+out:
btrfs_end_transaction(trans, root);
return ret;
+out_unlock:
+ extent_clear_unlock_delalloc(inode,
+ &BTRFS_I(inode)->io_tree,
+ start, end, NULL,
+ EXTENT_CLEAR_UNLOCK_PAGE |
+ EXTENT_CLEAR_UNLOCK |
+ EXTENT_CLEAR_DELALLOC |
+ EXTENT_CLEAR_DIRTY |
+ EXTENT_SET_WRITEBACK |
+ EXTENT_END_WRITEBACK);
+
+ goto out;
}
/*
@@ -969,7 +1034,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
1, 0, NULL, GFP_NOFS);
while (start < end) {
async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
- BUG_ON(!async_cow);
+ BUG_ON(!async_cow); /* -ENOMEM */
async_cow->inode = inode;
async_cow->root = root;
async_cow->locked_page = locked_page;
@@ -1060,7 +1125,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
u64 disk_bytenr;
u64 num_bytes;
int extent_type;
- int ret;
+ int ret, err;
int type;
int nocow;
int check_prev = 1;
@@ -1078,7 +1143,11 @@ static noinline int run_delalloc_nocow(struct inode *inode,
else
trans = btrfs_join_transaction(root);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
+
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
cow_start = (u64)-1;
@@ -1086,7 +1155,10 @@ static noinline int run_delalloc_nocow(struct inode *inode,
while (1) {
ret = btrfs_lookup_file_extent(trans, root, path, ino,
cur_offset, 0);
- BUG_ON(ret < 0);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto error;
+ }
if (ret > 0 && path->slots[0] > 0 && check_prev) {
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key,
@@ -1100,8 +1172,10 @@ next_slot:
leaf = path->nodes[0];
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- BUG_ON(1);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto error;
+ }
if (ret > 0)
break;
leaf = path->nodes[0];
@@ -1189,7 +1263,10 @@ out_check:
ret = cow_file_range(inode, locked_page, cow_start,
found_key.offset - 1, page_started,
nr_written, 1);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto error;
+ }
cow_start = (u64)-1;
}
@@ -1198,7 +1275,7 @@ out_check:
struct extent_map_tree *em_tree;
em_tree = &BTRFS_I(inode)->extent_tree;
em = alloc_extent_map();
- BUG_ON(!em);
+ BUG_ON(!em); /* -ENOMEM */
em->start = cur_offset;
em->orig_start = em->start;
em->len = num_bytes;
@@ -1224,13 +1301,16 @@ out_check:
ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
num_bytes, num_bytes, type);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
if (root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_reloc_clone_csums(inode, cur_offset,
num_bytes);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto error;
+ }
}
extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
@@ -1249,18 +1329,23 @@ out_check:
if (cow_start != (u64)-1) {
ret = cow_file_range(inode, locked_page, cow_start, end,
page_started, nr_written, 1);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto error;
+ }
}
+error:
if (nolock) {
- ret = btrfs_end_transaction_nolock(trans, root);
- BUG_ON(ret);
+ err = btrfs_end_transaction_nolock(trans, root);
} else {
- ret = btrfs_end_transaction(trans, root);
- BUG_ON(ret);
+ err = btrfs_end_transaction(trans, root);
}
+ if (!ret)
+ ret = err;
+
btrfs_free_path(path);
- return 0;
+ return ret;
}
/*
@@ -1425,10 +1510,11 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
map_length = length;
ret = btrfs_map_block(map_tree, READ, logical,
&map_length, NULL, 0);
-
+ /* Will always return 0 or 1 with map_multi == NULL */
+ BUG_ON(ret < 0);
if (map_length < length + size)
return 1;
- return ret;
+ return 0;
}
/*
@@ -1448,7 +1534,7 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw,
int ret = 0;
ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
return 0;
}
@@ -1479,14 +1565,16 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
int skip_sum;
+ int metadata = 0;
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
if (btrfs_is_free_space_inode(root, inode))
- ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
- else
- ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
- BUG_ON(ret);
+ metadata = 2;
+
+ ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
+ if (ret)
+ return ret;
if (!(rw & REQ_WRITE)) {
if (bio_flags & EXTENT_BIO_COMPRESSED) {
@@ -1571,7 +1659,7 @@ again:
page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0,
- &cached_state, GFP_NOFS);
+ &cached_state);
/* already ordered? We're done */
if (PagePrivate2(page))
@@ -1675,13 +1763,15 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
*/
ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
&hint, 0);
- BUG_ON(ret);
+ if (ret)
+ goto out;
ins.objectid = btrfs_ino(inode);
ins.offset = file_pos;
ins.type = BTRFS_EXTENT_DATA_KEY;
ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi));
- BUG_ON(ret);
+ if (ret)
+ goto out;
leaf = path->nodes[0];
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
@@ -1709,10 +1799,10 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
ret = btrfs_alloc_reserved_file_extent(trans, root,
root->root_key.objectid,
btrfs_ino(inode), file_pos, &ins);
- BUG_ON(ret);
+out:
btrfs_free_path(path);
- return 0;
+ return ret;
}
/*
@@ -1740,35 +1830,41 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
end - start + 1);
if (!ret)
return 0;
- BUG_ON(!ordered_extent);
+ BUG_ON(!ordered_extent); /* Logic error */
nolock = btrfs_is_free_space_inode(root, inode);
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
- BUG_ON(!list_empty(&ordered_extent->list));
+ BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
if (!ret) {
if (nolock)
trans = btrfs_join_transaction_nolock(root);
else
trans = btrfs_join_transaction(root);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
ret = btrfs_update_inode_fallback(trans, root, inode);
- BUG_ON(ret);
+ if (ret) /* -ENOMEM or corruption */
+ btrfs_abort_transaction(trans, root, ret);
}
goto out;
}
lock_extent_bits(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->len - 1,
- 0, &cached_state, GFP_NOFS);
+ 0, &cached_state);
if (nolock)
trans = btrfs_join_transaction_nolock(root);
else
trans = btrfs_join_transaction(root);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ trans = NULL;
+ goto out_unlock;
+ }
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
@@ -1779,7 +1875,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ordered_extent->file_offset,
ordered_extent->file_offset +
ordered_extent->len);
- BUG_ON(ret);
} else {
BUG_ON(root == root->fs_info->tree_root);
ret = insert_reserved_file_extent(trans, inode,
@@ -1793,11 +1888,14 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
ordered_extent->file_offset,
ordered_extent->len);
- BUG_ON(ret);
}
unlock_extent_cached(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset +
ordered_extent->len - 1, &cached_state, GFP_NOFS);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out;
+ }
add_pending_csums(trans, inode, ordered_extent->file_offset,
&ordered_extent->list);
@@ -1805,7 +1903,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
ret = btrfs_update_inode_fallback(trans, root, inode);
- BUG_ON(ret);
+ if (ret) { /* -ENOMEM or corruption */
+ btrfs_abort_transaction(trans, root, ret);
+ goto out;
+ }
}
ret = 0;
out:
@@ -1824,6 +1925,11 @@ out:
btrfs_put_ordered_extent(ordered_extent);
return 0;
+out_unlock:
+ unlock_extent_cached(io_tree, ordered_extent->file_offset,
+ ordered_extent->file_offset +
+ ordered_extent->len - 1, &cached_state, GFP_NOFS);
+ goto out;
}
static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
@@ -1841,7 +1947,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
* extent_io.c will try to find good copies for us.
*/
static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
- struct extent_state *state)
+ struct extent_state *state, int mirror)
{
size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
struct inode *inode = page->mapping->host;
@@ -1873,7 +1979,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
} else {
ret = get_state_private(io_tree, start, &private);
}
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
if (ret)
goto zeroit;
@@ -1882,7 +1988,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
if (csum != private)
goto zeroit;
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
good:
return 0;
@@ -1894,7 +2000,7 @@ zeroit:
(unsigned long long)private);
memset(kaddr + offset, 1, end - start + 1);
flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
if (private == 0)
return 0;
return -EIO;
@@ -1905,6 +2011,8 @@ struct delayed_iput {
struct inode *inode;
};
+/* JDM: If this is fs-wide, why can't we add a pointer to
+ * btrfs_inode instead and avoid the allocation? */
void btrfs_add_delayed_iput(struct inode *inode)
{
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
@@ -2051,20 +2159,27 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
/* grab metadata reservation from transaction handle */
if (reserve) {
ret = btrfs_orphan_reserve_metadata(trans, inode);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOSPC in reservation; Logic error? JDM */
}
/* insert an orphan item to track this unlinked/truncated file */
if (insert >= 1) {
ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
- BUG_ON(ret && ret != -EEXIST);
+ if (ret && ret != -EEXIST) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
+ ret = 0;
}
/* insert an orphan item to track subvolume contains orphan files */
if (insert >= 2) {
ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
root->root_key.objectid);
- BUG_ON(ret);
+ if (ret && ret != -EEXIST) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
}
return 0;
}
@@ -2094,7 +2209,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
if (trans && delete_item) {
ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
}
if (release_rsv)
@@ -2228,7 +2343,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
}
ret = btrfs_del_orphan_item(trans, root,
found_key.objectid);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
btrfs_end_transaction(trans, root);
continue;
}
@@ -2610,16 +2725,22 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
printk(KERN_INFO "btrfs failed to delete reference to %.*s, "
"inode %llu parent %llu\n", name_len, name,
(unsigned long long)ino, (unsigned long long)dir_ino);
+ btrfs_abort_transaction(trans, root, ret);
goto err;
}
ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
goto err;
+ }
ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
inode, dir_ino);
- BUG_ON(ret != 0 && ret != -ENOENT);
+ if (ret != 0 && ret != -ENOENT) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto err;
+ }
ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
dir, index);
@@ -2777,7 +2898,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
err = ret;
goto out;
}
- BUG_ON(ret == 0);
+ BUG_ON(ret == 0); /* Corruption */
if (check_path_shared(root, path))
goto out;
btrfs_release_path(path);
@@ -2810,7 +2931,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
err = PTR_ERR(ref);
goto out;
}
- BUG_ON(!ref);
+ BUG_ON(!ref); /* Logic error */
if (check_path_shared(root, path))
goto out;
index = btrfs_inode_ref_index(path->nodes[0], ref);
@@ -2917,23 +3038,42 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
name, name_len, -1);
- BUG_ON(IS_ERR_OR_NULL(di));
+ if (IS_ERR_OR_NULL(di)) {
+ if (!di)
+ ret = -ENOENT;
+ else
+ ret = PTR_ERR(di);
+ goto out;
+ }
leaf = path->nodes[0];
btrfs_dir_item_key_to_cpu(leaf, di, &key);
WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
ret = btrfs_delete_one_dir_name(trans, root, path, di);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out;
+ }
btrfs_release_path(path);
ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
objectid, root->root_key.objectid,
dir_ino, &index, name, name_len);
if (ret < 0) {
- BUG_ON(ret != -ENOENT);
+ if (ret != -ENOENT) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out;
+ }
di = btrfs_search_dir_index_item(root, path, dir_ino,
name, name_len);
- BUG_ON(IS_ERR_OR_NULL(di));
+ if (IS_ERR_OR_NULL(di)) {
+ if (!di)
+ ret = -ENOENT;
+ else
+ ret = PTR_ERR(di);
+ btrfs_abort_transaction(trans, root, ret);
+ goto out;
+ }
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
@@ -2943,15 +3083,19 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out;
+ }
btrfs_i_size_write(dir, dir->i_size - name_len * 2);
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
ret = btrfs_update_inode(trans, root, dir);
- BUG_ON(ret);
-
+ if (ret)
+ btrfs_abort_transaction(trans, root, ret);
+out:
btrfs_free_path(path);
- return 0;
+ return ret;
}
static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
@@ -3161,8 +3305,8 @@ search_again:
}
size =
btrfs_file_extent_calc_inline_size(size);
- ret = btrfs_truncate_item(trans, root, path,
- size, 1);
+ btrfs_truncate_item(trans, root, path,
+ size, 1);
} else if (root->ref_cows) {
inode_sub_bytes(inode, item_end + 1 -
found_key.offset);
@@ -3210,7 +3354,11 @@ delete:
ret = btrfs_del_items(trans, root, path,
pending_del_slot,
pending_del_nr);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans,
+ root, ret);
+ goto error;
+ }
pending_del_nr = 0;
}
btrfs_release_path(path);
@@ -3223,8 +3371,10 @@ out:
if (pending_del_nr) {
ret = btrfs_del_items(trans, root, path, pending_del_slot,
pending_del_nr);
- BUG_ON(ret);
+ if (ret)
+ btrfs_abort_transaction(trans, root, ret);
}
+error:
btrfs_free_path(path);
return err;
}
@@ -3282,8 +3432,7 @@ again:
}
wait_on_page_writeback(page);
- lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state,
- GFP_NOFS);
+ lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
set_page_extent_mapped(page);
ordered = btrfs_lookup_ordered_extent(inode, page_start);
@@ -3359,7 +3508,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
btrfs_wait_ordered_range(inode, hole_start,
block_end - hole_start);
lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
- &cached_state, GFP_NOFS);
+ &cached_state);
ordered = btrfs_lookup_ordered_extent(inode, hole_start);
if (!ordered)
break;
@@ -3372,7 +3521,10 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
while (1) {
em = btrfs_get_extent(inode, NULL, 0, cur_offset,
block_end - cur_offset, 0);
- BUG_ON(IS_ERR_OR_NULL(em));
+ if (IS_ERR(em)) {
+ err = PTR_ERR(em);
+ break;
+ }
last_byte = min(extent_map_end(em), block_end);
last_byte = (last_byte + mask) & ~mask;
if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
@@ -3389,7 +3541,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
cur_offset + hole_size,
&hint_byte, 1);
if (err) {
- btrfs_update_inode(trans, root, inode);
+ btrfs_abort_transaction(trans, root, err);
btrfs_end_transaction(trans, root);
break;
}
@@ -3399,7 +3551,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
0, hole_size, 0, hole_size,
0, 0, 0);
if (err) {
- btrfs_update_inode(trans, root, inode);
+ btrfs_abort_transaction(trans, root, err);
btrfs_end_transaction(trans, root);
break;
}
@@ -3779,7 +3931,7 @@ static void inode_tree_del(struct inode *inode)
}
}
-int btrfs_invalidate_inodes(struct btrfs_root *root)
+void btrfs_invalidate_inodes(struct btrfs_root *root)
{
struct rb_node *node;
struct rb_node *prev;
@@ -3839,7 +3991,6 @@ again:
node = rb_next(node);
}
spin_unlock(&root->inode_lock);
- return 0;
}
static int btrfs_init_locked_inode(struct inode *inode, void *p)
@@ -3918,7 +4069,7 @@ static struct inode *new_simple_dir(struct super_block *s,
BTRFS_I(inode)->dummy_inode = 1;
inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
- inode->i_op = &simple_dir_inode_operations;
+ inode->i_op = &btrfs_dir_ro_inode_operations;
inode->i_fop = &simple_dir_operations;
inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
@@ -3989,14 +4140,18 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
static int btrfs_dentry_delete(const struct dentry *dentry)
{
struct btrfs_root *root;
+ struct inode *inode = dentry->d_inode;
- if (!dentry->d_inode && !IS_ROOT(dentry))
- dentry = dentry->d_parent;
+ if (!inode && !IS_ROOT(dentry))
+ inode = dentry->d_parent->d_inode;
- if (dentry->d_inode) {
- root = BTRFS_I(dentry->d_inode)->root;
+ if (inode) {
+ root = BTRFS_I(inode)->root;
if (btrfs_root_refs(&root->root_item) == 0)
return 1;
+
+ if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+ return 1;
}
return 0;
}
@@ -4037,7 +4192,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
struct btrfs_path *path;
struct list_head ins_list;
struct list_head del_list;
- struct qstr q;
int ret;
struct extent_buffer *leaf;
int slot;
@@ -4128,7 +4282,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
while (di_cur < di_total) {
struct btrfs_key location;
- struct dentry *tmp;
if (verify_dir_item(root, leaf, di))
break;
@@ -4149,35 +4302,15 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
btrfs_dir_item_key_to_cpu(leaf, di, &location);
- q.name = name_ptr;
- q.len = name_len;
- q.hash = full_name_hash(q.name, q.len);
- tmp = d_lookup(filp->f_dentry, &q);
- if (!tmp) {
- struct btrfs_key *newkey;
-
- newkey = kzalloc(sizeof(struct btrfs_key),
- GFP_NOFS);
- if (!newkey)
- goto no_dentry;
- tmp = d_alloc(filp->f_dentry, &q);
- if (!tmp) {
- kfree(newkey);
- dput(tmp);
- goto no_dentry;
- }
- memcpy(newkey, &location,
- sizeof(struct btrfs_key));
- tmp->d_fsdata = newkey;
- tmp->d_flags |= DCACHE_NEED_LOOKUP;
- d_rehash(tmp);
- dput(tmp);
- } else {
- dput(tmp);
- }
-no_dentry:
+
/* is this a reference to our own snapshot? If so
- * skip it
+ * skip it.
+ *
+ * In contrast to old kernels, we insert the snapshot's
+ * dir item and dir index after it has been created, so
+ * we won't find a reference to our own snapshot. We
+ * still keep the following code for backward
+ * compatibility.
*/
if (location.type == BTRFS_ROOT_ITEM_KEY &&
location.objectid == root->root_key.objectid) {
@@ -4581,18 +4714,26 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
parent_ino, index);
}
- if (ret == 0) {
- ret = btrfs_insert_dir_item(trans, root, name, name_len,
- parent_inode, &key,
- btrfs_inode_type(inode), index);
- if (ret)
- goto fail_dir_item;
+ /* Nothing to clean up yet */
+ if (ret)
+ return ret;
- btrfs_i_size_write(parent_inode, parent_inode->i_size +
- name_len * 2);
- parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
- ret = btrfs_update_inode(trans, root, parent_inode);
+ ret = btrfs_insert_dir_item(trans, root, name, name_len,
+ parent_inode, &key,
+ btrfs_inode_type(inode), index);
+ if (ret == -EEXIST)
+ goto fail_dir_item;
+ else if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
}
+
+ btrfs_i_size_write(parent_inode, parent_inode->i_size +
+ name_len * 2);
+ parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
+ ret = btrfs_update_inode(trans, root, parent_inode);
+ if (ret)
+ btrfs_abort_transaction(trans, root, ret);
return ret;
fail_dir_item:
@@ -4806,7 +4947,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
} else {
struct dentry *parent = dentry->d_parent;
err = btrfs_update_inode(trans, root, inode);
- BUG_ON(err);
+ if (err)
+ goto fail;
d_instantiate(dentry, inode);
btrfs_log_new_name(trans, inode, NULL, parent);
}
@@ -4937,12 +5079,12 @@ static noinline int uncompress_inline(struct btrfs_path *path,
ret = btrfs_decompress(compress_type, tmp, page,
extent_offset, inline_size, max_size);
if (ret) {
- char *kaddr = kmap_atomic(page, KM_USER0);
+ char *kaddr = kmap_atomic(page);
unsigned long copy_size = min_t(u64,
PAGE_CACHE_SIZE - pg_offset,
max_size - extent_offset);
memset(kaddr + pg_offset, 0, copy_size);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
}
kfree(tmp);
return 0;
@@ -5137,7 +5279,7 @@ again:
ret = uncompress_inline(path, inode, page,
pg_offset,
extent_offset, item);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
} else {
map = kmap(page);
read_extent_buffer(leaf, map + pg_offset, ptr,
@@ -5252,6 +5394,7 @@ out:
free_extent_map(em);
return ERR_PTR(err);
}
+ BUG_ON(!em); /* Error is always set */
return em;
}
@@ -5414,7 +5557,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
alloc_hint = get_extent_allocation_hint(inode, start, len);
ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0,
- alloc_hint, (u64)-1, &ins, 1);
+ alloc_hint, &ins, 1);
if (ret) {
em = ERR_PTR(ret);
goto out;
@@ -5602,7 +5745,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
free_extent_map(em);
/* DIO will do one hole at a time, so just unlock a sector */
unlock_extent(&BTRFS_I(inode)->io_tree, start,
- start + root->sectorsize - 1, GFP_NOFS);
+ start + root->sectorsize - 1);
return 0;
}
@@ -5719,11 +5862,11 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
unsigned long flags;
local_irq_save(flags);
- kaddr = kmap_atomic(page, KM_IRQ0);
+ kaddr = kmap_atomic(page);
csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
csum, bvec->bv_len);
btrfs_csum_final(csum, (char *)&csum);
- kunmap_atomic(kaddr, KM_IRQ0);
+ kunmap_atomic(kaddr);
local_irq_restore(flags);
flush_dcache_page(bvec->bv_page);
@@ -5743,7 +5886,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
} while (bvec <= bvec_end);
unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
- dip->logical_offset + dip->bytes - 1, GFP_NOFS);
+ dip->logical_offset + dip->bytes - 1);
bio->bi_private = dip->private;
kfree(dip->csums);
@@ -5794,7 +5937,7 @@ again:
lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset,
ordered->file_offset + ordered->len - 1, 0,
- &cached_state, GFP_NOFS);
+ &cached_state);
if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
ret = btrfs_mark_extent_written(trans, inode,
@@ -5868,7 +6011,7 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
int ret;
struct btrfs_root *root = BTRFS_I(inode)->root;
ret = btrfs_csum_one_bio(root, inode, bio, offset, 1);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
return 0;
}
@@ -6209,7 +6352,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
while (1) {
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- 0, &cached_state, GFP_NOFS);
+ 0, &cached_state);
/*
* We're concerned with the entire range that we're going to be
* doing DIO to, so we need to make sure theres no ordered
@@ -6233,7 +6376,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
if (writing) {
write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- EXTENT_DELALLOC, 0, NULL, &cached_state,
+ EXTENT_DELALLOC, NULL, &cached_state,
GFP_NOFS);
if (ret) {
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
@@ -6363,8 +6506,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
btrfs_releasepage(page, GFP_NOFS);
return;
}
- lock_extent_bits(tree, page_start, page_end, 0, &cached_state,
- GFP_NOFS);
+ lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
ordered = btrfs_lookup_ordered_extent(page->mapping->host,
page_offset(page));
if (ordered) {
@@ -6386,8 +6528,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
}
btrfs_put_ordered_extent(ordered);
cached_state = NULL;
- lock_extent_bits(tree, page_start, page_end, 0, &cached_state,
- GFP_NOFS);
+ lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
}
clear_extent_bit(tree, page_start, page_end,
EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
@@ -6462,8 +6603,7 @@ again:
}
wait_on_page_writeback(page);
- lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state,
- GFP_NOFS);
+ lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
set_page_extent_mapped(page);
/*
@@ -6737,10 +6877,9 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
btrfs_i_size_write(inode, 0);
err = btrfs_update_inode(trans, new_root, inode);
- BUG_ON(err);
iput(inode);
- return 0;
+ return err;
}
struct inode *btrfs_alloc_inode(struct super_block *sb)
@@ -6783,6 +6922,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
extent_map_tree_init(&ei->extent_tree);
extent_io_tree_init(&ei->io_tree, &inode->i_data);
extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
+ ei->io_tree.track_uptodate = 1;
+ ei->io_failure_tree.track_uptodate = 1;
mutex_init(&ei->log_mutex);
mutex_init(&ei->delalloc_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
@@ -7072,7 +7213,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!ret)
ret = btrfs_update_inode(trans, root, old_inode);
}
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_fail;
+ }
if (new_inode) {
new_inode->i_ctime = CURRENT_TIME;
@@ -7090,11 +7234,14 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_dentry->d_name.name,
new_dentry->d_name.len);
}
- BUG_ON(ret);
- if (new_inode->i_nlink == 0) {
+ if (!ret && new_inode->i_nlink == 0) {
ret = btrfs_orphan_add(trans, new_dentry->d_inode);
BUG_ON(ret);
}
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_fail;
+ }
}
fixup_inode_flags(new_dir, old_inode);
@@ -7102,7 +7249,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
ret = btrfs_add_link(trans, new_dir, old_inode,
new_dentry->d_name.name,
new_dentry->d_name.len, 0, index);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_fail;
+ }
if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
struct dentry *parent = new_dentry->d_parent;
@@ -7315,7 +7465,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
}
ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
- 0, *alloc_hint, (u64)-1, &ins, 1);
+ 0, *alloc_hint, &ins, 1);
if (ret) {
if (own_trans)
btrfs_end_transaction(trans, root);
@@ -7327,7 +7477,12 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
ins.offset, ins.offset,
ins.offset, 0, 0, 0,
BTRFS_FILE_EXTENT_PREALLOC);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ if (own_trans)
+ btrfs_end_transaction(trans, root);
+ break;
+ }
btrfs_drop_extent_cache(inode, cur_offset,
cur_offset + ins.offset -1, 0);
@@ -7349,7 +7504,13 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
}
ret = btrfs_update_inode(trans, root, inode);
- BUG_ON(ret);
+
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ if (own_trans)
+ btrfs_end_transaction(trans, root);
+ break;
+ }
if (own_trans)
btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 1b36f1932ef..14f8e1faa46 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -425,22 +425,37 @@ static noinline int create_subvol(struct btrfs_root *root,
key.offset = (u64)-1;
new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
- BUG_ON(IS_ERR(new_root));
+ if (IS_ERR(new_root)) {
+ btrfs_abort_transaction(trans, root, PTR_ERR(new_root));
+ ret = PTR_ERR(new_root);
+ goto fail;
+ }
btrfs_record_root_in_trans(trans, new_root);
ret = btrfs_create_subvol_root(trans, new_root, new_dirid);
+ if (ret) {
+ /* We potentially lose an unused inode item here */
+ btrfs_abort_transaction(trans, root, ret);
+ goto fail;
+ }
+
/*
* insert the directory item
*/
ret = btrfs_set_inode_index(dir, &index);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto fail;
+ }
ret = btrfs_insert_dir_item(trans, root,
name, namelen, dir, &key,
BTRFS_FT_DIR, index);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
goto fail;
+ }
btrfs_i_size_write(dir, dir->i_size + namelen * 2);
ret = btrfs_update_inode(trans, root, dir);
@@ -769,6 +784,31 @@ none:
return -ENOENT;
}
+/*
+ * Validaty check of prev em and next em:
+ * 1) no prev/next em
+ * 2) prev/next em is an hole/inline extent
+ */
+static int check_adjacent_extents(struct inode *inode, struct extent_map *em)
+{
+ struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map *prev = NULL, *next = NULL;
+ int ret = 0;
+
+ read_lock(&em_tree->lock);
+ prev = lookup_extent_mapping(em_tree, em->start - 1, (u64)-1);
+ next = lookup_extent_mapping(em_tree, em->start + em->len, (u64)-1);
+ read_unlock(&em_tree->lock);
+
+ if ((!prev || prev->block_start >= EXTENT_MAP_LAST_BYTE) &&
+ (!next || next->block_start >= EXTENT_MAP_LAST_BYTE))
+ ret = 1;
+ free_extent_map(prev);
+ free_extent_map(next);
+
+ return ret;
+}
+
static int should_defrag_range(struct inode *inode, u64 start, u64 len,
int thresh, u64 *last_len, u64 *skip,
u64 *defrag_end)
@@ -797,17 +837,25 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
if (!em) {
/* get the big lock and read metadata off disk */
- lock_extent(io_tree, start, start + len - 1, GFP_NOFS);
+ lock_extent(io_tree, start, start + len - 1);
em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
- unlock_extent(io_tree, start, start + len - 1, GFP_NOFS);
+ unlock_extent(io_tree, start, start + len - 1);
if (IS_ERR(em))
return 0;
}
/* this will cover holes, and inline extents */
- if (em->block_start >= EXTENT_MAP_LAST_BYTE)
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
ret = 0;
+ goto out;
+ }
+
+ /* If we have nothing to merge with us, just skip. */
+ if (check_adjacent_extents(inode, em)) {
+ ret = 0;
+ goto out;
+ }
/*
* we hit a real extent, if it is big don't bother defragging it again
@@ -815,6 +863,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh)
ret = 0;
+out:
/*
* last_len ends up being a counter of how many bytes we've defragged.
* every time we choose not to defrag an extent, we reset *last_len
@@ -856,6 +905,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
u64 isize = i_size_read(inode);
u64 page_start;
u64 page_end;
+ u64 page_cnt;
int ret;
int i;
int i_done;
@@ -864,19 +914,21 @@ static int cluster_pages_for_defrag(struct inode *inode,
struct extent_io_tree *tree;
gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
- if (isize == 0)
- return 0;
file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
+ if (!isize || start_index > file_end)
+ return 0;
+
+ page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
ret = btrfs_delalloc_reserve_space(inode,
- num_pages << PAGE_CACHE_SHIFT);
+ page_cnt << PAGE_CACHE_SHIFT);
if (ret)
return ret;
i_done = 0;
tree = &BTRFS_I(inode)->io_tree;
/* step one, lock all the pages */
- for (i = 0; i < num_pages; i++) {
+ for (i = 0; i < page_cnt; i++) {
struct page *page;
again:
page = find_or_create_page(inode->i_mapping,
@@ -887,10 +939,10 @@ again:
page_start = page_offset(page);
page_end = page_start + PAGE_CACHE_SIZE - 1;
while (1) {
- lock_extent(tree, page_start, page_end, GFP_NOFS);
+ lock_extent(tree, page_start, page_end);
ordered = btrfs_lookup_ordered_extent(inode,
page_start);
- unlock_extent(tree, page_start, page_end, GFP_NOFS);
+ unlock_extent(tree, page_start, page_end);
if (!ordered)
break;
@@ -898,6 +950,15 @@ again:
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
lock_page(page);
+ /*
+ * we unlocked the page above, so we need check if
+ * it was released or not.
+ */
+ if (page->mapping != inode->i_mapping) {
+ unlock_page(page);
+ page_cache_release(page);
+ goto again;
+ }
}
if (!PageUptodate(page)) {
@@ -911,15 +972,6 @@ again:
}
}
- isize = i_size_read(inode);
- file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
- if (!isize || page->index > file_end) {
- /* whoops, we blew past eof, skip this page */
- unlock_page(page);
- page_cache_release(page);
- break;
- }
-
if (page->mapping != inode->i_mapping) {
unlock_page(page);
page_cache_release(page);
@@ -946,19 +998,18 @@ again:
page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
lock_extent_bits(&BTRFS_I(inode)->io_tree,
- page_start, page_end - 1, 0, &cached_state,
- GFP_NOFS);
+ page_start, page_end - 1, 0, &cached_state);
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
GFP_NOFS);
- if (i_done != num_pages) {
+ if (i_done != page_cnt) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
btrfs_delalloc_release_space(inode,
- (num_pages - i_done) << PAGE_CACHE_SHIFT);
+ (page_cnt - i_done) << PAGE_CACHE_SHIFT);
}
@@ -983,7 +1034,7 @@ out:
unlock_page(pages[i]);
page_cache_release(pages[i]);
}
- btrfs_delalloc_release_space(inode, num_pages << PAGE_CACHE_SHIFT);
+ btrfs_delalloc_release_space(inode, page_cnt << PAGE_CACHE_SHIFT);
return ret;
}
@@ -1089,12 +1140,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
if (!(inode->i_sb->s_flags & MS_ACTIVE))
break;
- if (!newer_than &&
- !should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
- PAGE_CACHE_SIZE,
- extent_thresh,
- &last_len, &skip,
- &defrag_end)) {
+ if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE, extent_thresh,
+ &last_len, &skip, &defrag_end)) {
unsigned long next;
/*
* the should_defrag function tells us how much to skip
@@ -1123,17 +1171,24 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
ra_index += max_cluster;
}
+ mutex_lock(&inode->i_mutex);
ret = cluster_pages_for_defrag(inode, pages, i, cluster);
- if (ret < 0)
+ if (ret < 0) {
+ mutex_unlock(&inode->i_mutex);
goto out_ra;
+ }
defrag_count += ret;
balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret);
+ mutex_unlock(&inode->i_mutex);
if (newer_than) {
if (newer_off == (u64)-1)
break;
+ if (ret > 0)
+ i += ret;
+
newer_off = max(newer_off + 1,
(u64)i << PAGE_CACHE_SHIFT);
@@ -1966,7 +2021,11 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
dest->root_key.objectid,
dentry->d_name.name,
dentry->d_name.len);
- BUG_ON(ret);
+ if (ret) {
+ err = ret;
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_end_trans;
+ }
btrfs_record_root_in_trans(trans, dest);
@@ -1979,11 +2038,16 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
ret = btrfs_insert_orphan_item(trans,
root->fs_info->tree_root,
dest->root_key.objectid);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ err = ret;
+ goto out_end_trans;
+ }
}
-
+out_end_trans:
ret = btrfs_end_transaction(trans, root);
- BUG_ON(ret);
+ if (ret && !err)
+ err = ret;
inode->i_flags |= S_DEAD;
out_up_write:
up_write(&root->fs_info->subvol_sem);
@@ -2198,7 +2262,10 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
di_args->bytes_used = dev->bytes_used;
di_args->total_bytes = dev->total_bytes;
memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
- strncpy(di_args->path, dev->name, sizeof(di_args->path));
+ if (dev->name)
+ strncpy(di_args->path, dev->name, sizeof(di_args->path));
+ else
+ di_args->path[0] = '\0';
out:
if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
@@ -2326,13 +2393,13 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
another, and lock file content */
while (1) {
struct btrfs_ordered_extent *ordered;
- lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
+ lock_extent(&BTRFS_I(src)->io_tree, off, off+len);
ordered = btrfs_lookup_first_ordered_extent(src, off+len);
if (!ordered &&
!test_range_bit(&BTRFS_I(src)->io_tree, off, off+len,
EXTENT_DELALLOC, 0, NULL))
break;
- unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
+ unlock_extent(&BTRFS_I(src)->io_tree, off, off+len);
if (ordered)
btrfs_put_ordered_extent(ordered);
btrfs_wait_ordered_range(src, off, len);
@@ -2447,11 +2514,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
new_key.offset,
new_key.offset + datal,
&hint_byte, 1);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root,
+ ret);
+ btrfs_end_transaction(trans, root);
+ goto out;
+ }
ret = btrfs_insert_empty_item(trans, root, path,
&new_key, size);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root,
+ ret);
+ btrfs_end_transaction(trans, root);
+ goto out;
+ }
leaf = path->nodes[0];
slot = path->slots[0];
@@ -2478,7 +2555,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
btrfs_ino(inode),
new_key.offset - datao,
0);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans,
+ root,
+ ret);
+ btrfs_end_transaction(trans,
+ root);
+ goto out;
+
+ }
}
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
u64 skip = 0;
@@ -2503,11 +2588,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
new_key.offset,
new_key.offset + datal,
&hint_byte, 1);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root,
+ ret);
+ btrfs_end_transaction(trans, root);
+ goto out;
+ }
ret = btrfs_insert_empty_item(trans, root, path,
&new_key, size);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root,
+ ret);
+ btrfs_end_transaction(trans, root);
+ goto out;
+ }
if (skip) {
u32 start =
@@ -2541,8 +2636,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
btrfs_i_size_write(inode, endoff);
ret = btrfs_update_inode(trans, root, inode);
- BUG_ON(ret);
- btrfs_end_transaction(trans, root);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ btrfs_end_transaction(trans, root);
+ goto out;
+ }
+ ret = btrfs_end_transaction(trans, root);
}
next:
btrfs_release_path(path);
@@ -2551,7 +2650,7 @@ next:
ret = 0;
out:
btrfs_release_path(path);
- unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
+ unlock_extent(&BTRFS_I(src)->io_tree, off, off+len);
out_unlock:
mutex_unlock(&src->i_mutex);
mutex_unlock(&inode->i_mutex);
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 4f69028a68c..086e6bdae1c 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -252,7 +252,7 @@ struct btrfs_data_container {
struct btrfs_ioctl_ino_path_args {
__u64 inum; /* in */
- __u32 size; /* in */
+ __u64 size; /* in */
__u64 reserved[4];
/* struct btrfs_data_container *fspath; out */
__u64 fspath; /* out */
@@ -260,7 +260,7 @@ struct btrfs_ioctl_ino_path_args {
struct btrfs_ioctl_logical_ino_args {
__u64 logical; /* in */
- __u32 size; /* in */
+ __u64 size; /* in */
__u64 reserved[4];
/* struct btrfs_data_container *inodes; out */
__u64 inodes;
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 5e178d8f716..272f911203f 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -208,7 +208,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
* take a spinning write lock. This will wait for both
* blocking readers or writers
*/
-int btrfs_tree_lock(struct extent_buffer *eb)
+void btrfs_tree_lock(struct extent_buffer *eb)
{
again:
wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
@@ -230,13 +230,12 @@ again:
atomic_inc(&eb->spinning_writers);
atomic_inc(&eb->write_locks);
eb->lock_owner = current->pid;
- return 0;
}
/*
* drop a spinning or a blocking write lock.
*/
-int btrfs_tree_unlock(struct extent_buffer *eb)
+void btrfs_tree_unlock(struct extent_buffer *eb)
{
int blockers = atomic_read(&eb->blocking_writers);
@@ -255,7 +254,6 @@ int btrfs_tree_unlock(struct extent_buffer *eb)
atomic_dec(&eb->spinning_writers);
write_unlock(&eb->lock);
}
- return 0;
}
void btrfs_assert_tree_locked(struct extent_buffer *eb)
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index 17247ddb81a..ca52681e5f4 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -24,8 +24,8 @@
#define BTRFS_WRITE_LOCK_BLOCKING 3
#define BTRFS_READ_LOCK_BLOCKING 4
-int btrfs_tree_lock(struct extent_buffer *eb);
-int btrfs_tree_unlock(struct extent_buffer *eb);
+void btrfs_tree_lock(struct extent_buffer *eb);
+void btrfs_tree_unlock(struct extent_buffer *eb);
int btrfs_try_spin_lock(struct extent_buffer *eb);
void btrfs_tree_read_lock(struct extent_buffer *eb);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index a178f5ebea7..743b86fa4fc 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -411,9 +411,9 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
bytes = min_t(unsigned long, destlen, out_len - start_byte);
- kaddr = kmap_atomic(dest_page, KM_USER0);
+ kaddr = kmap_atomic(dest_page);
memcpy(kaddr, workspace->buf + start_byte, bytes);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
out:
return ret;
}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index a1c94042530..bbf6d0d9aeb 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -59,6 +59,14 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
return NULL;
}
+static void ordered_data_tree_panic(struct inode *inode, int errno,
+ u64 offset)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset "
+ "%llu\n", (unsigned long long)offset);
+}
+
/*
* look for a given offset in the tree, and if it can't be found return the
* first lesser offset
@@ -207,7 +215,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
spin_lock(&tree->lock);
node = tree_insert(&tree->tree, file_offset,
&entry->rb_node);
- BUG_ON(node);
+ if (node)
+ ordered_data_tree_panic(inode, -EEXIST, file_offset);
spin_unlock(&tree->lock);
spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
@@ -215,7 +224,6 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
&BTRFS_I(inode)->root->fs_info->ordered_extents);
spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
- BUG_ON(node);
return 0;
}
@@ -249,9 +257,9 @@ int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
* when an ordered extent is finished. If the list covers more than one
* ordered extent, it is split across multiples.
*/
-int btrfs_add_ordered_sum(struct inode *inode,
- struct btrfs_ordered_extent *entry,
- struct btrfs_ordered_sum *sum)
+void btrfs_add_ordered_sum(struct inode *inode,
+ struct btrfs_ordered_extent *entry,
+ struct btrfs_ordered_sum *sum)
{
struct btrfs_ordered_inode_tree *tree;
@@ -259,7 +267,6 @@ int btrfs_add_ordered_sum(struct inode *inode,
spin_lock(&tree->lock);
list_add_tail(&sum->list, &entry->list);
spin_unlock(&tree->lock);
- return 0;
}
/*
@@ -384,7 +391,7 @@ out:
* used to drop a reference on an ordered extent. This will free
* the extent if the last reference is dropped
*/
-int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
+void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
{
struct list_head *cur;
struct btrfs_ordered_sum *sum;
@@ -400,7 +407,6 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
}
kfree(entry);
}
- return 0;
}
/*
@@ -408,8 +414,8 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
* and you must wake_up entry->wait. You must hold the tree lock
* while you call this function.
*/
-static int __btrfs_remove_ordered_extent(struct inode *inode,
- struct btrfs_ordered_extent *entry)
+static void __btrfs_remove_ordered_extent(struct inode *inode,
+ struct btrfs_ordered_extent *entry)
{
struct btrfs_ordered_inode_tree *tree;
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -436,35 +442,30 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
list_del_init(&BTRFS_I(inode)->ordered_operations);
}
spin_unlock(&root->fs_info->ordered_extent_lock);
-
- return 0;
}
/*
* remove an ordered extent from the tree. No references are dropped
* but any waiters are woken.
*/
-int btrfs_remove_ordered_extent(struct inode *inode,
- struct btrfs_ordered_extent *entry)
+void btrfs_remove_ordered_extent(struct inode *inode,
+ struct btrfs_ordered_extent *entry)
{
struct btrfs_ordered_inode_tree *tree;
- int ret;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
- ret = __btrfs_remove_ordered_extent(inode, entry);
+ __btrfs_remove_ordered_extent(inode, entry);
spin_unlock(&tree->lock);
wake_up(&entry->wait);
-
- return ret;
}
/*
* wait for all the ordered extents in a root. This is done when balancing
* space between drives.
*/
-int btrfs_wait_ordered_extents(struct btrfs_root *root,
- int nocow_only, int delay_iput)
+void btrfs_wait_ordered_extents(struct btrfs_root *root,
+ int nocow_only, int delay_iput)
{
struct list_head splice;
struct list_head *cur;
@@ -512,7 +513,6 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root,
spin_lock(&root->fs_info->ordered_extent_lock);
}
spin_unlock(&root->fs_info->ordered_extent_lock);
- return 0;
}
/*
@@ -525,7 +525,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root,
* extra check to make sure the ordered operation list really is empty
* before we return
*/
-int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
+void btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
{
struct btrfs_inode *btrfs_inode;
struct inode *inode;
@@ -573,8 +573,6 @@ again:
spin_unlock(&root->fs_info->ordered_extent_lock);
mutex_unlock(&root->fs_info->ordered_operations_mutex);
-
- return 0;
}
/*
@@ -609,7 +607,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
/*
* Used to wait on ordered extents across a large range of bytes.
*/
-int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
+void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
{
u64 end;
u64 orig_end;
@@ -664,7 +662,6 @@ again:
schedule_timeout(1);
goto again;
}
- return 0;
}
/*
@@ -948,9 +945,8 @@ out:
* If trans is not null, we'll do a friendly check for a transaction that
* is already flushing things and force the IO down ourselves.
*/
-int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode)
+void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct inode *inode)
{
u64 last_mod;
@@ -961,7 +957,7 @@ int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
* commit, we can safely return without doing anything
*/
if (last_mod < root->fs_info->last_trans_committed)
- return 0;
+ return;
/*
* the transaction is already committing. Just start the IO and
@@ -969,7 +965,7 @@ int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
*/
if (trans && root->fs_info->running_transaction->blocked) {
btrfs_wait_ordered_range(inode, 0, (u64)-1);
- return 0;
+ return;
}
spin_lock(&root->fs_info->ordered_extent_lock);
@@ -978,6 +974,4 @@ int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
&root->fs_info->ordered_operations);
}
spin_unlock(&root->fs_info->ordered_extent_lock);
-
- return 0;
}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index ff1f69aa188..c355ad4dc1a 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -138,8 +138,8 @@ btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
t->last = NULL;
}
-int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
-int btrfs_remove_ordered_extent(struct inode *inode,
+void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
+void btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry);
int btrfs_dec_test_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
@@ -154,14 +154,14 @@ int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len,
int type, int compress_type);
-int btrfs_add_ordered_sum(struct inode *inode,
- struct btrfs_ordered_extent *entry,
- struct btrfs_ordered_sum *sum);
+void btrfs_add_ordered_sum(struct inode *inode,
+ struct btrfs_ordered_extent *entry,
+ struct btrfs_ordered_sum *sum);
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
u64 file_offset);
void btrfs_start_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry, int wait);
-int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
+void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
@@ -170,10 +170,10 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
-int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
-int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode);
-int btrfs_wait_ordered_extents(struct btrfs_root *root,
- int nocow_only, int delay_iput);
+void btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
+void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode);
+void btrfs_wait_ordered_extents(struct btrfs_root *root,
+ int nocow_only, int delay_iput);
#endif
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
index f8be250963a..24cad1695af 100644
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -58,7 +58,7 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0)
goto out;
- if (ret) {
+ if (ret) { /* JDM: Really? */
ret = -ENOENT;
goto out;
}
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 22db04550f6..ac5d0108588 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -54,7 +54,6 @@
* than the 2 started one after another.
*/
-#define MAX_MIRRORS 2
#define MAX_IN_FLIGHT 6
struct reada_extctl {
@@ -71,7 +70,7 @@ struct reada_extent {
struct list_head extctl;
struct kref refcnt;
spinlock_t lock;
- struct reada_zone *zones[MAX_MIRRORS];
+ struct reada_zone *zones[BTRFS_MAX_MIRRORS];
int nzones;
struct btrfs_device *scheduled_for;
};
@@ -84,7 +83,8 @@ struct reada_zone {
spinlock_t lock;
int locked;
struct btrfs_device *device;
- struct btrfs_device *devs[MAX_MIRRORS]; /* full list, incl self */
+ struct btrfs_device *devs[BTRFS_MAX_MIRRORS]; /* full list, incl
+ * self */
int ndevs;
struct kref refcnt;
};
@@ -250,14 +250,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
struct btrfs_bio *bbio)
{
int ret;
- int looped = 0;
struct reada_zone *zone;
struct btrfs_block_group_cache *cache = NULL;
u64 start;
u64 end;
int i;
-again:
zone = NULL;
spin_lock(&fs_info->reada_lock);
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
@@ -274,9 +272,6 @@ again:
spin_unlock(&fs_info->reada_lock);
}
- if (looped)
- return NULL;
-
cache = btrfs_lookup_block_group(fs_info, logical);
if (!cache)
return NULL;
@@ -307,13 +302,15 @@ again:
ret = radix_tree_insert(&dev->reada_zones,
(unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
zone);
- spin_unlock(&fs_info->reada_lock);
- if (ret) {
+ if (ret == -EEXIST) {
kfree(zone);
- looped = 1;
- goto again;
+ ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
+ logical >> PAGE_CACHE_SHIFT, 1);
+ if (ret == 1)
+ kref_get(&zone->refcnt);
}
+ spin_unlock(&fs_info->reada_lock);
return zone;
}
@@ -323,26 +320,26 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
struct btrfs_key *top, int level)
{
int ret;
- int looped = 0;
struct reada_extent *re = NULL;
+ struct reada_extent *re_exist = NULL;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
struct btrfs_bio *bbio = NULL;
struct btrfs_device *dev;
+ struct btrfs_device *prev_dev;
u32 blocksize;
u64 length;
int nzones = 0;
int i;
unsigned long index = logical >> PAGE_CACHE_SHIFT;
-again:
spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index);
if (re)
kref_get(&re->refcnt);
spin_unlock(&fs_info->reada_lock);
- if (re || looped)
+ if (re)
return re;
re = kzalloc(sizeof(*re), GFP_NOFS);
@@ -365,9 +362,9 @@ again:
if (ret || !bbio || length < blocksize)
goto error;
- if (bbio->num_stripes > MAX_MIRRORS) {
+ if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
printk(KERN_ERR "btrfs readahead: more than %d copies not "
- "supported", MAX_MIRRORS);
+ "supported", BTRFS_MAX_MIRRORS);
goto error;
}
@@ -398,16 +395,31 @@ again:
/* insert extent in reada_tree + all per-device trees, all or nothing */
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&fs_info->reada_tree, index, re);
+ if (ret == -EEXIST) {
+ re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
+ BUG_ON(!re_exist);
+ kref_get(&re_exist->refcnt);
+ spin_unlock(&fs_info->reada_lock);
+ goto error;
+ }
if (ret) {
spin_unlock(&fs_info->reada_lock);
- if (ret != -ENOMEM) {
- /* someone inserted the extent in the meantime */
- looped = 1;
- }
goto error;
}
+ prev_dev = NULL;
for (i = 0; i < nzones; ++i) {
dev = bbio->stripes[i].dev;
+ if (dev == prev_dev) {
+ /*
+ * in case of DUP, just add the first zone. As both
+ * are on the same device, there's nothing to gain
+ * from adding both.
+ * Also, it wouldn't work, as the tree is per device
+ * and adding would fail with EEXIST
+ */
+ continue;
+ }
+ prev_dev = dev;
ret = radix_tree_insert(&dev->reada_extents, index, re);
if (ret) {
while (--i >= 0) {
@@ -450,9 +462,7 @@ error:
}
kfree(bbio);
kfree(re);
- if (looped)
- goto again;
- return NULL;
+ return re_exist;
}
static void reada_kref_dummy(struct kref *kr)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 8c1aae2c845..646ee21bb03 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -326,6 +326,19 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
return NULL;
}
+void backref_tree_panic(struct rb_node *rb_node, int errno,
+ u64 bytenr)
+{
+
+ struct btrfs_fs_info *fs_info = NULL;
+ struct backref_node *bnode = rb_entry(rb_node, struct backref_node,
+ rb_node);
+ if (bnode->root)
+ fs_info = bnode->root->fs_info;
+ btrfs_panic(fs_info, errno, "Inconsistency in backref cache "
+ "found at offset %llu\n", (unsigned long long)bytenr);
+}
+
/*
* walk up backref nodes until reach node presents tree root
*/
@@ -452,7 +465,8 @@ static void update_backref_node(struct backref_cache *cache,
rb_erase(&node->rb_node, &cache->rb_root);
node->bytenr = bytenr;
rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
- BUG_ON(rb_node);
+ if (rb_node)
+ backref_tree_panic(rb_node, -EEXIST, bytenr);
}
/*
@@ -999,7 +1013,8 @@ next:
if (!cowonly) {
rb_node = tree_insert(&cache->rb_root, node->bytenr,
&node->rb_node);
- BUG_ON(rb_node);
+ if (rb_node)
+ backref_tree_panic(rb_node, -EEXIST, node->bytenr);
list_add_tail(&node->lower, &cache->leaves);
}
@@ -1034,7 +1049,9 @@ next:
if (!cowonly) {
rb_node = tree_insert(&cache->rb_root, upper->bytenr,
&upper->rb_node);
- BUG_ON(rb_node);
+ if (rb_node)
+ backref_tree_panic(rb_node, -EEXIST,
+ upper->bytenr);
}
list_add_tail(&edge->list[UPPER], &upper->lower);
@@ -1180,7 +1197,8 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
rb_node = tree_insert(&cache->rb_root, new_node->bytenr,
&new_node->rb_node);
- BUG_ON(rb_node);
+ if (rb_node)
+ backref_tree_panic(rb_node, -EEXIST, new_node->bytenr);
if (!new_node->lowest) {
list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
@@ -1203,14 +1221,15 @@ fail:
/*
* helper to add 'address of tree root -> reloc tree' mapping
*/
-static int __add_reloc_root(struct btrfs_root *root)
+static int __must_check __add_reloc_root(struct btrfs_root *root)
{
struct rb_node *rb_node;
struct mapping_node *node;
struct reloc_control *rc = root->fs_info->reloc_ctl;
node = kmalloc(sizeof(*node), GFP_NOFS);
- BUG_ON(!node);
+ if (!node)
+ return -ENOMEM;
node->bytenr = root->node->start;
node->data = root;
@@ -1219,7 +1238,12 @@ static int __add_reloc_root(struct btrfs_root *root)
rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
node->bytenr, &node->rb_node);
spin_unlock(&rc->reloc_root_tree.lock);
- BUG_ON(rb_node);
+ if (rb_node) {
+ kfree(node);
+ btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found "
+ "for start=%llu while inserting into relocation "
+ "tree\n");
+ }
list_add_tail(&root->root_list, &rc->reloc_roots);
return 0;
@@ -1252,9 +1276,12 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
node->bytenr, &node->rb_node);
spin_unlock(&rc->reloc_root_tree.lock);
- BUG_ON(rb_node);
+ if (rb_node)
+ backref_tree_panic(rb_node, -EEXIST, node->bytenr);
} else {
+ spin_lock(&root->fs_info->trans_lock);
list_del_init(&root->root_list);
+ spin_unlock(&root->fs_info->trans_lock);
kfree(node);
}
return 0;
@@ -1334,6 +1361,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *reloc_root;
struct reloc_control *rc = root->fs_info->reloc_ctl;
int clear_rsv = 0;
+ int ret;
if (root->reloc_root) {
reloc_root = root->reloc_root;
@@ -1353,7 +1381,8 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
if (clear_rsv)
trans->block_rsv = NULL;
- __add_reloc_root(reloc_root);
+ ret = __add_reloc_root(reloc_root);
+ BUG_ON(ret < 0);
root->reloc_root = reloc_root;
return 0;
}
@@ -1577,15 +1606,14 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
WARN_ON(!IS_ALIGNED(end, root->sectorsize));
end--;
ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
- key.offset, end,
- GFP_NOFS);
+ key.offset, end);
if (!ret)
continue;
btrfs_drop_extent_cache(inode, key.offset, end,
1);
unlock_extent(&BTRFS_I(inode)->io_tree,
- key.offset, end, GFP_NOFS);
+ key.offset, end);
}
}
@@ -1956,9 +1984,9 @@ static int invalidate_extent_cache(struct btrfs_root *root,
}
/* the lock_extent waits for readpage to complete */
- lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+ lock_extent(&BTRFS_I(inode)->io_tree, start, end);
btrfs_drop_extent_cache(inode, start, end, 1);
- unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+ unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
}
return 0;
}
@@ -2246,7 +2274,8 @@ again:
} else {
list_del_init(&reloc_root->root_list);
}
- btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
+ ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
+ BUG_ON(ret < 0);
}
if (found) {
@@ -2862,12 +2891,12 @@ int prealloc_file_extent_cluster(struct inode *inode,
else
end = cluster->end - offset;
- lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+ lock_extent(&BTRFS_I(inode)->io_tree, start, end);
num_bytes = end + 1 - start;
ret = btrfs_prealloc_file_range(inode, 0, start,
num_bytes, num_bytes,
end + 1, &alloc_hint);
- unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+ unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
if (ret)
break;
nr++;
@@ -2899,7 +2928,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
- lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+ lock_extent(&BTRFS_I(inode)->io_tree, start, end);
while (1) {
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
@@ -2910,7 +2939,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
}
btrfs_drop_extent_cache(inode, start, end, 0);
}
- unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+ unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
return ret;
}
@@ -2990,8 +3019,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
page_start = (u64)page->index << PAGE_CACHE_SHIFT;
page_end = page_start + PAGE_CACHE_SIZE - 1;
- lock_extent(&BTRFS_I(inode)->io_tree,
- page_start, page_end, GFP_NOFS);
+ lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
set_page_extent_mapped(page);
@@ -3007,7 +3035,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
set_page_dirty(page);
unlock_extent(&BTRFS_I(inode)->io_tree,
- page_start, page_end, GFP_NOFS);
+ page_start, page_end);
unlock_page(page);
page_cache_release(page);
@@ -3154,7 +3182,8 @@ static int add_tree_block(struct reloc_control *rc,
block->key_ready = 0;
rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
- BUG_ON(rb_node);
+ if (rb_node)
+ backref_tree_panic(rb_node, -EEXIST, block->bytenr);
return 0;
}
@@ -3426,7 +3455,9 @@ static int find_data_references(struct reloc_control *rc,
block->key_ready = 1;
rb_node = tree_insert(blocks, block->bytenr,
&block->rb_node);
- BUG_ON(rb_node);
+ if (rb_node)
+ backref_tree_panic(rb_node, -EEXIST,
+ block->bytenr);
}
if (counted)
added = 1;
@@ -3782,7 +3813,7 @@ restart:
ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5);
if (ret < 0) {
- if (ret != -EAGAIN) {
+ if (ret != -ENOSPC) {
err = ret;
WARN_ON(1);
break;
@@ -4073,10 +4104,11 @@ out:
static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
{
struct btrfs_trans_handle *trans;
- int ret;
+ int ret, err;
trans = btrfs_start_transaction(root->fs_info->tree_root, 0);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
memset(&root->root_item.drop_progress, 0,
sizeof(root->root_item.drop_progress));
@@ -4084,11 +4116,11 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
btrfs_set_root_refs(&root->root_item, 0);
ret = btrfs_update_root(trans, root->fs_info->tree_root,
&root->root_key, &root->root_item);
- BUG_ON(ret);
- ret = btrfs_end_transaction(trans, root->fs_info->tree_root);
- BUG_ON(ret);
- return 0;
+ err = btrfs_end_transaction(trans, root->fs_info->tree_root);
+ if (err)
+ return err;
+ return ret;
}
/*
@@ -4156,7 +4188,11 @@ int btrfs_recover_relocation(struct btrfs_root *root)
err = ret;
goto out;
}
- mark_garbage_root(reloc_root);
+ ret = mark_garbage_root(reloc_root);
+ if (ret < 0) {
+ err = ret;
+ goto out;
+ }
}
}
@@ -4202,13 +4238,19 @@ int btrfs_recover_relocation(struct btrfs_root *root)
fs_root = read_fs_root(root->fs_info,
reloc_root->root_key.offset);
- BUG_ON(IS_ERR(fs_root));
+ if (IS_ERR(fs_root)) {
+ err = PTR_ERR(fs_root);
+ goto out_free;
+ }
- __add_reloc_root(reloc_root);
+ err = __add_reloc_root(reloc_root);
+ BUG_ON(err < 0); /* -ENOMEM or logic error */
fs_root->reloc_root = reloc_root;
}
- btrfs_commit_transaction(trans, rc->extent_root);
+ err = btrfs_commit_transaction(trans, rc->extent_root);
+ if (err)
+ goto out_free;
merge_reloc_roots(rc);
@@ -4218,7 +4260,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
if (IS_ERR(trans))
err = PTR_ERR(trans);
else
- btrfs_commit_transaction(trans, rc->extent_root);
+ err = btrfs_commit_transaction(trans, rc->extent_root);
out_free:
kfree(rc);
out:
@@ -4267,6 +4309,8 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
disk_bytenr + len - 1, &list, 0);
+ if (ret)
+ goto out;
while (!list_empty(&list)) {
sums = list_entry(list.next, struct btrfs_ordered_sum, list);
@@ -4284,6 +4328,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
btrfs_add_ordered_sum(inode, ordered, sums);
}
+out:
btrfs_put_ordered_extent(ordered);
return ret;
}
@@ -4380,7 +4425,7 @@ void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
* called after snapshot is created. migrate block reservation
* and create reloc root for the newly created snapshot
*/
-void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
+int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending)
{
struct btrfs_root *root = pending->root;
@@ -4390,7 +4435,7 @@ void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
int ret;
if (!root->reloc_root)
- return;
+ return 0;
rc = root->fs_info->reloc_ctl;
rc->merging_rsv_size += rc->nodes_relocated;
@@ -4399,18 +4444,21 @@ void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
ret = btrfs_block_rsv_migrate(&pending->block_rsv,
rc->block_rsv,
rc->nodes_relocated);
- BUG_ON(ret);
+ if (ret)
+ return ret;
}
new_root = pending->snap;
reloc_root = create_reloc_root(trans, root->reloc_root,
new_root->root_key.objectid);
+ if (IS_ERR(reloc_root))
+ return PTR_ERR(reloc_root);
- __add_reloc_root(reloc_root);
+ ret = __add_reloc_root(reloc_root);
+ BUG_ON(ret < 0);
new_root->reloc_root = reloc_root;
- if (rc->create_reloc_tree) {
+ if (rc->create_reloc_tree)
ret = clone_backref_node(trans, rc, root, reloc_root);
- BUG_ON(ret);
- }
+ return ret;
}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index f4099904565..24fb8ce4e07 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -93,10 +93,14 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
unsigned long ptr;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
+
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, root, ret);
goto out;
+ }
if (ret != 0) {
btrfs_print_leaf(root, path->nodes[0]);
@@ -116,13 +120,10 @@ out:
return ret;
}
-int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *key, struct btrfs_root_item
- *item)
+int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_key *key, struct btrfs_root_item *item)
{
- int ret;
- ret = btrfs_insert_item(trans, root, key, item, sizeof(*item));
- return ret;
+ return btrfs_insert_item(trans, root, key, item, sizeof(*item));
}
/*
@@ -384,6 +385,8 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root,
*
* For a back ref the root_id is the id of the subvol or snapshot and
* ref_id is the id of the tree referencing it.
+ *
+ * Will return 0, -ENOMEM, or anything from the CoW path
*/
int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root,
@@ -407,7 +410,11 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
again:
ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
sizeof(*ref) + name_len);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_free_path(path);
+ return ret;
+ }
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index b9b84cdfc35..2f3d6f917fb 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -36,37 +36,30 @@
* Future enhancements:
* - In case an unrepairable extent is encountered, track which files are
* affected and report them
- * - In case of a read error on files with nodatasum, map the file and read
- * the extent to trigger a writeback of the good copy
* - track and record media errors, throw out bad devices
* - add a mode to also read unallocated space
*/
-struct scrub_bio;
-struct scrub_page;
+struct scrub_block;
struct scrub_dev;
-static void scrub_bio_end_io(struct bio *bio, int err);
-static void scrub_checksum(struct btrfs_work *work);
-static int scrub_checksum_data(struct scrub_dev *sdev,
- struct scrub_page *spag, void *buffer);
-static int scrub_checksum_tree_block(struct scrub_dev *sdev,
- struct scrub_page *spag, u64 logical,
- void *buffer);
-static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer);
-static int scrub_fixup_check(struct scrub_bio *sbio, int ix);
-static void scrub_fixup_end_io(struct bio *bio, int err);
-static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
- struct page *page);
-static void scrub_fixup(struct scrub_bio *sbio, int ix);
#define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */
#define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */
+#define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */
struct scrub_page {
+ struct scrub_block *sblock;
+ struct page *page;
+ struct block_device *bdev;
u64 flags; /* extent flags */
u64 generation;
- int mirror_num;
- int have_csum;
+ u64 logical;
+ u64 physical;
+ struct {
+ unsigned int mirror_num:8;
+ unsigned int have_csum:1;
+ unsigned int io_error:1;
+ };
u8 csum[BTRFS_CSUM_SIZE];
};
@@ -77,12 +70,25 @@ struct scrub_bio {
int err;
u64 logical;
u64 physical;
- struct scrub_page spag[SCRUB_PAGES_PER_BIO];
- u64 count;
+ struct scrub_page *pagev[SCRUB_PAGES_PER_BIO];
+ int page_count;
int next_free;
struct btrfs_work work;
};
+struct scrub_block {
+ struct scrub_page pagev[SCRUB_MAX_PAGES_PER_BLOCK];
+ int page_count;
+ atomic_t outstanding_pages;
+ atomic_t ref_count; /* free mem on transition to zero */
+ struct scrub_dev *sdev;
+ struct {
+ unsigned int header_error:1;
+ unsigned int checksum_error:1;
+ unsigned int no_io_error_seen:1;
+ };
+};
+
struct scrub_dev {
struct scrub_bio *bios[SCRUB_BIOS_PER_DEV];
struct btrfs_device *dev;
@@ -96,6 +102,10 @@ struct scrub_dev {
struct list_head csum_list;
atomic_t cancel_req;
int readonly;
+ int pages_per_bio; /* <= SCRUB_PAGES_PER_BIO */
+ u32 sectorsize;
+ u32 nodesize;
+ u32 leafsize;
/*
* statistics
*/
@@ -124,6 +134,43 @@ struct scrub_warning {
int scratch_bufsize;
};
+
+static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
+static int scrub_setup_recheck_block(struct scrub_dev *sdev,
+ struct btrfs_mapping_tree *map_tree,
+ u64 length, u64 logical,
+ struct scrub_block *sblock);
+static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
+ struct scrub_block *sblock, int is_metadata,
+ int have_csum, u8 *csum, u64 generation,
+ u16 csum_size);
+static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
+ struct scrub_block *sblock,
+ int is_metadata, int have_csum,
+ const u8 *csum, u64 generation,
+ u16 csum_size);
+static void scrub_complete_bio_end_io(struct bio *bio, int err);
+static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
+ struct scrub_block *sblock_good,
+ int force_write);
+static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
+ struct scrub_block *sblock_good,
+ int page_num, int force_write);
+static int scrub_checksum_data(struct scrub_block *sblock);
+static int scrub_checksum_tree_block(struct scrub_block *sblock);
+static int scrub_checksum_super(struct scrub_block *sblock);
+static void scrub_block_get(struct scrub_block *sblock);
+static void scrub_block_put(struct scrub_block *sblock);
+static int scrub_add_page_to_bio(struct scrub_dev *sdev,
+ struct scrub_page *spage);
+static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
+ u64 physical, u64 flags, u64 gen, int mirror_num,
+ u8 *csum, int force);
+static void scrub_bio_end_io(struct bio *bio, int err);
+static void scrub_bio_end_io_worker(struct btrfs_work *work);
+static void scrub_block_complete(struct scrub_block *sblock);
+
+
static void scrub_free_csums(struct scrub_dev *sdev)
{
while (!list_empty(&sdev->csum_list)) {
@@ -135,23 +182,6 @@ static void scrub_free_csums(struct scrub_dev *sdev)
}
}
-static void scrub_free_bio(struct bio *bio)
-{
- int i;
- struct page *last_page = NULL;
-
- if (!bio)
- return;
-
- for (i = 0; i < bio->bi_vcnt; ++i) {
- if (bio->bi_io_vec[i].bv_page == last_page)
- continue;
- last_page = bio->bi_io_vec[i].bv_page;
- __free_page(last_page);
- }
- bio_put(bio);
-}
-
static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
{
int i;
@@ -159,13 +189,23 @@ static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
if (!sdev)
return;
+ /* this can happen when scrub is cancelled */
+ if (sdev->curr != -1) {
+ struct scrub_bio *sbio = sdev->bios[sdev->curr];
+
+ for (i = 0; i < sbio->page_count; i++) {
+ BUG_ON(!sbio->pagev[i]);
+ BUG_ON(!sbio->pagev[i]->page);
+ scrub_block_put(sbio->pagev[i]->sblock);
+ }
+ bio_put(sbio->bio);
+ }
+
for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
struct scrub_bio *sbio = sdev->bios[i];
if (!sbio)
break;
-
- scrub_free_bio(sbio->bio);
kfree(sbio);
}
@@ -179,11 +219,16 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
struct scrub_dev *sdev;
int i;
struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
+ int pages_per_bio;
+ pages_per_bio = min_t(int, SCRUB_PAGES_PER_BIO,
+ bio_get_nr_vecs(dev->bdev));
sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
if (!sdev)
goto nomem;
sdev->dev = dev;
+ sdev->pages_per_bio = pages_per_bio;
+ sdev->curr = -1;
for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
struct scrub_bio *sbio;
@@ -194,8 +239,8 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
sbio->index = i;
sbio->sdev = sdev;
- sbio->count = 0;
- sbio->work.func = scrub_checksum;
+ sbio->page_count = 0;
+ sbio->work.func = scrub_bio_end_io_worker;
if (i != SCRUB_BIOS_PER_DEV-1)
sdev->bios[i]->next_free = i + 1;
@@ -203,7 +248,9 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
sdev->bios[i]->next_free = -1;
}
sdev->first_free = 0;
- sdev->curr = -1;
+ sdev->nodesize = dev->dev_root->nodesize;
+ sdev->leafsize = dev->dev_root->leafsize;
+ sdev->sectorsize = dev->dev_root->sectorsize;
atomic_set(&sdev->in_flight, 0);
atomic_set(&sdev->fixup_cnt, 0);
atomic_set(&sdev->cancel_req, 0);
@@ -294,10 +341,9 @@ err:
return 0;
}
-static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
- int ix)
+static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
{
- struct btrfs_device *dev = sbio->sdev->dev;
+ struct btrfs_device *dev = sblock->sdev->dev;
struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
struct btrfs_path *path;
struct btrfs_key found_key;
@@ -316,8 +362,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS);
swarn.msg_buf = kmalloc(bufsize, GFP_NOFS);
- swarn.sector = (sbio->physical + ix * PAGE_SIZE) >> 9;
- swarn.logical = sbio->logical + ix * PAGE_SIZE;
+ BUG_ON(sblock->page_count < 1);
+ swarn.sector = (sblock->pagev[0].physical) >> 9;
+ swarn.logical = sblock->pagev[0].logical;
swarn.errstr = errstr;
swarn.dev = dev;
swarn.msg_bufsize = bufsize;
@@ -342,7 +389,8 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
do {
ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
&ref_root, &ref_level);
- printk(KERN_WARNING "%s at logical %llu on dev %s, "
+ printk(KERN_WARNING
+ "btrfs: %s at logical %llu on dev %s, "
"sector %llu: metadata %s (level %d) in tree "
"%llu\n", errstr, swarn.logical, dev->name,
(unsigned long long)swarn.sector,
@@ -531,9 +579,9 @@ out:
spin_lock(&sdev->stat_lock);
++sdev->stat.uncorrectable_errors;
spin_unlock(&sdev->stat_lock);
- printk_ratelimited(KERN_ERR "btrfs: unable to fixup "
- "(nodatasum) error at logical %llu\n",
- fixup->logical);
+ printk_ratelimited(KERN_ERR
+ "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
+ (unsigned long long)fixup->logical, sdev->dev->name);
}
btrfs_free_path(path);
@@ -550,91 +598,168 @@ out:
}
/*
- * scrub_recheck_error gets called when either verification of the page
- * failed or the bio failed to read, e.g. with EIO. In the latter case,
- * recheck_error gets called for every page in the bio, even though only
- * one may be bad
+ * scrub_handle_errored_block gets called when either verification of the
+ * pages failed or the bio failed to read, e.g. with EIO. In the latter
+ * case, this function handles all pages in the bio, even though only one
+ * may be bad.
+ * The goal of this function is to repair the errored block by using the
+ * contents of one of the mirrors.
*/
-static int scrub_recheck_error(struct scrub_bio *sbio, int ix)
+static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
{
- struct scrub_dev *sdev = sbio->sdev;
- u64 sector = (sbio->physical + ix * PAGE_SIZE) >> 9;
+ struct scrub_dev *sdev = sblock_to_check->sdev;
+ struct btrfs_fs_info *fs_info;
+ u64 length;
+ u64 logical;
+ u64 generation;
+ unsigned int failed_mirror_index;
+ unsigned int is_metadata;
+ unsigned int have_csum;
+ u8 *csum;
+ struct scrub_block *sblocks_for_recheck; /* holds one for each mirror */
+ struct scrub_block *sblock_bad;
+ int ret;
+ int mirror_index;
+ int page_num;
+ int success;
static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
- DEFAULT_RATELIMIT_BURST);
+ DEFAULT_RATELIMIT_BURST);
+
+ BUG_ON(sblock_to_check->page_count < 1);
+ fs_info = sdev->dev->dev_root->fs_info;
+ length = sblock_to_check->page_count * PAGE_SIZE;
+ logical = sblock_to_check->pagev[0].logical;
+ generation = sblock_to_check->pagev[0].generation;
+ BUG_ON(sblock_to_check->pagev[0].mirror_num < 1);
+ failed_mirror_index = sblock_to_check->pagev[0].mirror_num - 1;
+ is_metadata = !(sblock_to_check->pagev[0].flags &
+ BTRFS_EXTENT_FLAG_DATA);
+ have_csum = sblock_to_check->pagev[0].have_csum;
+ csum = sblock_to_check->pagev[0].csum;
- if (sbio->err) {
- if (scrub_fixup_io(READ, sbio->sdev->dev->bdev, sector,
- sbio->bio->bi_io_vec[ix].bv_page) == 0) {
- if (scrub_fixup_check(sbio, ix) == 0)
- return 0;
- }
- if (__ratelimit(&_rs))
- scrub_print_warning("i/o error", sbio, ix);
- } else {
- if (__ratelimit(&_rs))
- scrub_print_warning("checksum error", sbio, ix);
+ /*
+ * read all mirrors one after the other. This includes to
+ * re-read the extent or metadata block that failed (that was
+ * the cause that this fixup code is called) another time,
+ * page by page this time in order to know which pages
+ * caused I/O errors and which ones are good (for all mirrors).
+ * It is the goal to handle the situation when more than one
+ * mirror contains I/O errors, but the errors do not
+ * overlap, i.e. the data can be repaired by selecting the
+ * pages from those mirrors without I/O error on the
+ * particular pages. One example (with blocks >= 2 * PAGE_SIZE)
+ * would be that mirror #1 has an I/O error on the first page,
+ * the second page is good, and mirror #2 has an I/O error on
+ * the second page, but the first page is good.
+ * Then the first page of the first mirror can be repaired by
+ * taking the first page of the second mirror, and the
+ * second page of the second mirror can be repaired by
+ * copying the contents of the 2nd page of the 1st mirror.
+ * One more note: if the pages of one mirror contain I/O
+ * errors, the checksum cannot be verified. In order to get
+ * the best data for repairing, the first attempt is to find
+ * a mirror without I/O errors and with a validated checksum.
+ * Only if this is not possible, the pages are picked from
+ * mirrors with I/O errors without considering the checksum.
+ * If the latter is the case, at the end, the checksum of the
+ * repaired area is verified in order to correctly maintain
+ * the statistics.
+ */
+
+ sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS *
+ sizeof(*sblocks_for_recheck),
+ GFP_NOFS);
+ if (!sblocks_for_recheck) {
+ spin_lock(&sdev->stat_lock);
+ sdev->stat.malloc_errors++;
+ sdev->stat.read_errors++;
+ sdev->stat.uncorrectable_errors++;
+ spin_unlock(&sdev->stat_lock);
+ goto out;
}
- spin_lock(&sdev->stat_lock);
- ++sdev->stat.read_errors;
- spin_unlock(&sdev->stat_lock);
+ /* setup the context, map the logical blocks and alloc the pages */
+ ret = scrub_setup_recheck_block(sdev, &fs_info->mapping_tree, length,
+ logical, sblocks_for_recheck);
+ if (ret) {
+ spin_lock(&sdev->stat_lock);
+ sdev->stat.read_errors++;
+ sdev->stat.uncorrectable_errors++;
+ spin_unlock(&sdev->stat_lock);
+ goto out;
+ }
+ BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
+ sblock_bad = sblocks_for_recheck + failed_mirror_index;
- scrub_fixup(sbio, ix);
- return 1;
-}
+ /* build and submit the bios for the failed mirror, check checksums */
+ ret = scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
+ csum, generation, sdev->csum_size);
+ if (ret) {
+ spin_lock(&sdev->stat_lock);
+ sdev->stat.read_errors++;
+ sdev->stat.uncorrectable_errors++;
+ spin_unlock(&sdev->stat_lock);
+ goto out;
+ }
-static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
-{
- int ret = 1;
- struct page *page;
- void *buffer;
- u64 flags = sbio->spag[ix].flags;
+ if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
+ sblock_bad->no_io_error_seen) {
+ /*
+ * the error disappeared after reading page by page, or
+ * the area was part of a huge bio and other parts of the
+ * bio caused I/O errors, or the block layer merged several
+ * read requests into one and the error is caused by a
+ * different bio (usually one of the two latter cases is
+ * the cause)
+ */
+ spin_lock(&sdev->stat_lock);
+ sdev->stat.unverified_errors++;
+ spin_unlock(&sdev->stat_lock);
- page = sbio->bio->bi_io_vec[ix].bv_page;
- buffer = kmap_atomic(page, KM_USER0);
- if (flags & BTRFS_EXTENT_FLAG_DATA) {
- ret = scrub_checksum_data(sbio->sdev,
- sbio->spag + ix, buffer);
- } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- ret = scrub_checksum_tree_block(sbio->sdev,
- sbio->spag + ix,
- sbio->logical + ix * PAGE_SIZE,
- buffer);
- } else {
- WARN_ON(1);
+ goto out;
}
- kunmap_atomic(buffer, KM_USER0);
- return ret;
-}
+ if (!sblock_bad->no_io_error_seen) {
+ spin_lock(&sdev->stat_lock);
+ sdev->stat.read_errors++;
+ spin_unlock(&sdev->stat_lock);
+ if (__ratelimit(&_rs))
+ scrub_print_warning("i/o error", sblock_to_check);
+ } else if (sblock_bad->checksum_error) {
+ spin_lock(&sdev->stat_lock);
+ sdev->stat.csum_errors++;
+ spin_unlock(&sdev->stat_lock);
+ if (__ratelimit(&_rs))
+ scrub_print_warning("checksum error", sblock_to_check);
+ } else if (sblock_bad->header_error) {
+ spin_lock(&sdev->stat_lock);
+ sdev->stat.verify_errors++;
+ spin_unlock(&sdev->stat_lock);
+ if (__ratelimit(&_rs))
+ scrub_print_warning("checksum/header error",
+ sblock_to_check);
+ }
-static void scrub_fixup_end_io(struct bio *bio, int err)
-{
- complete((struct completion *)bio->bi_private);
-}
+ if (sdev->readonly)
+ goto did_not_correct_error;
-static void scrub_fixup(struct scrub_bio *sbio, int ix)
-{
- struct scrub_dev *sdev = sbio->sdev;
- struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
- struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
- struct btrfs_bio *bbio = NULL;
- struct scrub_fixup_nodatasum *fixup;
- u64 logical = sbio->logical + ix * PAGE_SIZE;
- u64 length;
- int i;
- int ret;
- DECLARE_COMPLETION_ONSTACK(complete);
-
- if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) &&
- (sbio->spag[ix].have_csum == 0)) {
- fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
- if (!fixup)
- goto uncorrectable;
- fixup->sdev = sdev;
- fixup->logical = logical;
- fixup->root = fs_info->extent_root;
- fixup->mirror_num = sbio->spag[ix].mirror_num;
+ if (!is_metadata && !have_csum) {
+ struct scrub_fixup_nodatasum *fixup_nodatasum;
+
+ /*
+ * !is_metadata and !have_csum, this means that the data
+ * might not be COW'ed, that it might be modified
+ * concurrently. The general strategy to work on the
+ * commit root does not help in the case when COW is not
+ * used.
+ */
+ fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
+ if (!fixup_nodatasum)
+ goto did_not_correct_error;
+ fixup_nodatasum->sdev = sdev;
+ fixup_nodatasum->logical = logical;
+ fixup_nodatasum->root = fs_info->extent_root;
+ fixup_nodatasum->mirror_num = failed_mirror_index + 1;
/*
* increment scrubs_running to prevent cancel requests from
* completing as long as a fixup worker is running. we must also
@@ -649,235 +774,533 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
atomic_inc(&fs_info->scrubs_paused);
mutex_unlock(&fs_info->scrub_lock);
atomic_inc(&sdev->fixup_cnt);
- fixup->work.func = scrub_fixup_nodatasum;
- btrfs_queue_worker(&fs_info->scrub_workers, &fixup->work);
- return;
+ fixup_nodatasum->work.func = scrub_fixup_nodatasum;
+ btrfs_queue_worker(&fs_info->scrub_workers,
+ &fixup_nodatasum->work);
+ goto out;
}
- length = PAGE_SIZE;
- ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length,
- &bbio, 0);
- if (ret || !bbio || length < PAGE_SIZE) {
- printk(KERN_ERR
- "scrub_fixup: btrfs_map_block failed us for %llu\n",
- (unsigned long long)logical);
- WARN_ON(1);
- kfree(bbio);
- return;
+ /*
+ * now build and submit the bios for the other mirrors, check
+ * checksums
+ */
+ for (mirror_index = 0;
+ mirror_index < BTRFS_MAX_MIRRORS &&
+ sblocks_for_recheck[mirror_index].page_count > 0;
+ mirror_index++) {
+ if (mirror_index == failed_mirror_index)
+ continue;
+
+ /* build and submit the bios, check checksums */
+ ret = scrub_recheck_block(fs_info,
+ sblocks_for_recheck + mirror_index,
+ is_metadata, have_csum, csum,
+ generation, sdev->csum_size);
+ if (ret)
+ goto did_not_correct_error;
}
- if (bbio->num_stripes == 1)
- /* there aren't any replicas */
- goto uncorrectable;
+ /*
+ * first try to pick the mirror which is completely without I/O
+ * errors and also does not have a checksum error.
+ * If one is found, and if a checksum is present, the full block
+ * that is known to contain an error is rewritten. Afterwards
+ * the block is known to be corrected.
+ * If a mirror is found which is completely correct, and no
+ * checksum is present, only those pages are rewritten that had
+ * an I/O error in the block to be repaired, since it cannot be
+ * determined, which copy of the other pages is better (and it
+ * could happen otherwise that a correct page would be
+ * overwritten by a bad one).
+ */
+ for (mirror_index = 0;
+ mirror_index < BTRFS_MAX_MIRRORS &&
+ sblocks_for_recheck[mirror_index].page_count > 0;
+ mirror_index++) {
+ struct scrub_block *sblock_other = sblocks_for_recheck +
+ mirror_index;
+
+ if (!sblock_other->header_error &&
+ !sblock_other->checksum_error &&
+ sblock_other->no_io_error_seen) {
+ int force_write = is_metadata || have_csum;
+
+ ret = scrub_repair_block_from_good_copy(sblock_bad,
+ sblock_other,
+ force_write);
+ if (0 == ret)
+ goto corrected_error;
+ }
+ }
/*
- * first find a good copy
+ * in case of I/O errors in the area that is supposed to be
+ * repaired, continue by picking good copies of those pages.
+ * Select the good pages from mirrors to rewrite bad pages from
+ * the area to fix. Afterwards verify the checksum of the block
+ * that is supposed to be repaired. This verification step is
+ * only done for the purpose of statistic counting and for the
+ * final scrub report, whether errors remain.
+ * A perfect algorithm could make use of the checksum and try
+ * all possible combinations of pages from the different mirrors
+ * until the checksum verification succeeds. For example, when
+ * the 2nd page of mirror #1 faces I/O errors, and the 2nd page
+ * of mirror #2 is readable but the final checksum test fails,
+ * then the 2nd page of mirror #3 could be tried, whether now
+ * the final checksum succeedes. But this would be a rare
+ * exception and is therefore not implemented. At least it is
+ * avoided that the good copy is overwritten.
+ * A more useful improvement would be to pick the sectors
+ * without I/O error based on sector sizes (512 bytes on legacy
+ * disks) instead of on PAGE_SIZE. Then maybe 512 byte of one
+ * mirror could be repaired by taking 512 byte of a different
+ * mirror, even if other 512 byte sectors in the same PAGE_SIZE
+ * area are unreadable.
*/
- for (i = 0; i < bbio->num_stripes; ++i) {
- if (i + 1 == sbio->spag[ix].mirror_num)
- continue;
- if (scrub_fixup_io(READ, bbio->stripes[i].dev->bdev,
- bbio->stripes[i].physical >> 9,
- sbio->bio->bi_io_vec[ix].bv_page)) {
- /* I/O-error, this is not a good copy */
+ /* can only fix I/O errors from here on */
+ if (sblock_bad->no_io_error_seen)
+ goto did_not_correct_error;
+
+ success = 1;
+ for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
+ struct scrub_page *page_bad = sblock_bad->pagev + page_num;
+
+ if (!page_bad->io_error)
continue;
+
+ for (mirror_index = 0;
+ mirror_index < BTRFS_MAX_MIRRORS &&
+ sblocks_for_recheck[mirror_index].page_count > 0;
+ mirror_index++) {
+ struct scrub_block *sblock_other = sblocks_for_recheck +
+ mirror_index;
+ struct scrub_page *page_other = sblock_other->pagev +
+ page_num;
+
+ if (!page_other->io_error) {
+ ret = scrub_repair_page_from_good_copy(
+ sblock_bad, sblock_other, page_num, 0);
+ if (0 == ret) {
+ page_bad->io_error = 0;
+ break; /* succeeded for this page */
+ }
+ }
}
- if (scrub_fixup_check(sbio, ix) == 0)
- break;
+ if (page_bad->io_error) {
+ /* did not find a mirror to copy the page from */
+ success = 0;
+ }
}
- if (i == bbio->num_stripes)
- goto uncorrectable;
- if (!sdev->readonly) {
- /*
- * bi_io_vec[ix].bv_page now contains good data, write it back
- */
- if (scrub_fixup_io(WRITE, sdev->dev->bdev,
- (sbio->physical + ix * PAGE_SIZE) >> 9,
- sbio->bio->bi_io_vec[ix].bv_page)) {
- /* I/O-error, writeback failed, give up */
- goto uncorrectable;
+ if (success) {
+ if (is_metadata || have_csum) {
+ /*
+ * need to verify the checksum now that all
+ * sectors on disk are repaired (the write
+ * request for data to be repaired is on its way).
+ * Just be lazy and use scrub_recheck_block()
+ * which re-reads the data before the checksum
+ * is verified, but most likely the data comes out
+ * of the page cache.
+ */
+ ret = scrub_recheck_block(fs_info, sblock_bad,
+ is_metadata, have_csum, csum,
+ generation, sdev->csum_size);
+ if (!ret && !sblock_bad->header_error &&
+ !sblock_bad->checksum_error &&
+ sblock_bad->no_io_error_seen)
+ goto corrected_error;
+ else
+ goto did_not_correct_error;
+ } else {
+corrected_error:
+ spin_lock(&sdev->stat_lock);
+ sdev->stat.corrected_errors++;
+ spin_unlock(&sdev->stat_lock);
+ printk_ratelimited(KERN_ERR
+ "btrfs: fixed up error at logical %llu on dev %s\n",
+ (unsigned long long)logical, sdev->dev->name);
}
+ } else {
+did_not_correct_error:
+ spin_lock(&sdev->stat_lock);
+ sdev->stat.uncorrectable_errors++;
+ spin_unlock(&sdev->stat_lock);
+ printk_ratelimited(KERN_ERR
+ "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
+ (unsigned long long)logical, sdev->dev->name);
}
- kfree(bbio);
- spin_lock(&sdev->stat_lock);
- ++sdev->stat.corrected_errors;
- spin_unlock(&sdev->stat_lock);
+out:
+ if (sblocks_for_recheck) {
+ for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
+ mirror_index++) {
+ struct scrub_block *sblock = sblocks_for_recheck +
+ mirror_index;
+ int page_index;
+
+ for (page_index = 0; page_index < SCRUB_PAGES_PER_BIO;
+ page_index++)
+ if (sblock->pagev[page_index].page)
+ __free_page(
+ sblock->pagev[page_index].page);
+ }
+ kfree(sblocks_for_recheck);
+ }
- printk_ratelimited(KERN_ERR "btrfs: fixed up error at logical %llu\n",
- (unsigned long long)logical);
- return;
+ return 0;
+}
-uncorrectable:
- kfree(bbio);
- spin_lock(&sdev->stat_lock);
- ++sdev->stat.uncorrectable_errors;
- spin_unlock(&sdev->stat_lock);
+static int scrub_setup_recheck_block(struct scrub_dev *sdev,
+ struct btrfs_mapping_tree *map_tree,
+ u64 length, u64 logical,
+ struct scrub_block *sblocks_for_recheck)
+{
+ int page_index;
+ int mirror_index;
+ int ret;
+
+ /*
+ * note: the three members sdev, ref_count and outstanding_pages
+ * are not used (and not set) in the blocks that are used for
+ * the recheck procedure
+ */
+
+ page_index = 0;
+ while (length > 0) {
+ u64 sublen = min_t(u64, length, PAGE_SIZE);
+ u64 mapped_length = sublen;
+ struct btrfs_bio *bbio = NULL;
+
+ /*
+ * with a length of PAGE_SIZE, each returned stripe
+ * represents one mirror
+ */
+ ret = btrfs_map_block(map_tree, WRITE, logical, &mapped_length,
+ &bbio, 0);
+ if (ret || !bbio || mapped_length < sublen) {
+ kfree(bbio);
+ return -EIO;
+ }
- printk_ratelimited(KERN_ERR "btrfs: unable to fixup (regular) error at "
- "logical %llu\n", (unsigned long long)logical);
+ BUG_ON(page_index >= SCRUB_PAGES_PER_BIO);
+ for (mirror_index = 0; mirror_index < (int)bbio->num_stripes;
+ mirror_index++) {
+ struct scrub_block *sblock;
+ struct scrub_page *page;
+
+ if (mirror_index >= BTRFS_MAX_MIRRORS)
+ continue;
+
+ sblock = sblocks_for_recheck + mirror_index;
+ page = sblock->pagev + page_index;
+ page->logical = logical;
+ page->physical = bbio->stripes[mirror_index].physical;
+ /* for missing devices, bdev is NULL */
+ page->bdev = bbio->stripes[mirror_index].dev->bdev;
+ page->mirror_num = mirror_index + 1;
+ page->page = alloc_page(GFP_NOFS);
+ if (!page->page) {
+ spin_lock(&sdev->stat_lock);
+ sdev->stat.malloc_errors++;
+ spin_unlock(&sdev->stat_lock);
+ return -ENOMEM;
+ }
+ sblock->page_count++;
+ }
+ kfree(bbio);
+ length -= sublen;
+ logical += sublen;
+ page_index++;
+ }
+
+ return 0;
}
-static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
- struct page *page)
+/*
+ * this function will check the on disk data for checksum errors, header
+ * errors and read I/O errors. If any I/O errors happen, the exact pages
+ * which are errored are marked as being bad. The goal is to enable scrub
+ * to take those pages that are not errored from all the mirrors so that
+ * the pages that are errored in the just handled mirror can be repaired.
+ */
+static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
+ struct scrub_block *sblock, int is_metadata,
+ int have_csum, u8 *csum, u64 generation,
+ u16 csum_size)
{
- struct bio *bio = NULL;
- int ret;
- DECLARE_COMPLETION_ONSTACK(complete);
+ int page_num;
- bio = bio_alloc(GFP_NOFS, 1);
- bio->bi_bdev = bdev;
- bio->bi_sector = sector;
- bio_add_page(bio, page, PAGE_SIZE, 0);
- bio->bi_end_io = scrub_fixup_end_io;
- bio->bi_private = &complete;
- btrfsic_submit_bio(rw, bio);
+ sblock->no_io_error_seen = 1;
+ sblock->header_error = 0;
+ sblock->checksum_error = 0;
- /* this will also unplug the queue */
- wait_for_completion(&complete);
+ for (page_num = 0; page_num < sblock->page_count; page_num++) {
+ struct bio *bio;
+ int ret;
+ struct scrub_page *page = sblock->pagev + page_num;
+ DECLARE_COMPLETION_ONSTACK(complete);
- ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
- bio_put(bio);
- return ret;
+ if (page->bdev == NULL) {
+ page->io_error = 1;
+ sblock->no_io_error_seen = 0;
+ continue;
+ }
+
+ BUG_ON(!page->page);
+ bio = bio_alloc(GFP_NOFS, 1);
+ if (!bio)
+ return -EIO;
+ bio->bi_bdev = page->bdev;
+ bio->bi_sector = page->physical >> 9;
+ bio->bi_end_io = scrub_complete_bio_end_io;
+ bio->bi_private = &complete;
+
+ ret = bio_add_page(bio, page->page, PAGE_SIZE, 0);
+ if (PAGE_SIZE != ret) {
+ bio_put(bio);
+ return -EIO;
+ }
+ btrfsic_submit_bio(READ, bio);
+
+ /* this will also unplug the queue */
+ wait_for_completion(&complete);
+
+ page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+ sblock->no_io_error_seen = 0;
+ bio_put(bio);
+ }
+
+ if (sblock->no_io_error_seen)
+ scrub_recheck_block_checksum(fs_info, sblock, is_metadata,
+ have_csum, csum, generation,
+ csum_size);
+
+ return 0;
}
-static void scrub_bio_end_io(struct bio *bio, int err)
+static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
+ struct scrub_block *sblock,
+ int is_metadata, int have_csum,
+ const u8 *csum, u64 generation,
+ u16 csum_size)
{
- struct scrub_bio *sbio = bio->bi_private;
- struct scrub_dev *sdev = sbio->sdev;
- struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
+ int page_num;
+ u8 calculated_csum[BTRFS_CSUM_SIZE];
+ u32 crc = ~(u32)0;
+ struct btrfs_root *root = fs_info->extent_root;
+ void *mapped_buffer;
+
+ BUG_ON(!sblock->pagev[0].page);
+ if (is_metadata) {
+ struct btrfs_header *h;
+
+ mapped_buffer = kmap_atomic(sblock->pagev[0].page);
+ h = (struct btrfs_header *)mapped_buffer;
+
+ if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) ||
+ generation != le64_to_cpu(h->generation) ||
+ memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
+ memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
+ BTRFS_UUID_SIZE))
+ sblock->header_error = 1;
+ csum = h->csum;
+ } else {
+ if (!have_csum)
+ return;
- sbio->err = err;
- sbio->bio = bio;
+ mapped_buffer = kmap_atomic(sblock->pagev[0].page);
+ }
- btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
+ for (page_num = 0;;) {
+ if (page_num == 0 && is_metadata)
+ crc = btrfs_csum_data(root,
+ ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
+ crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
+ else
+ crc = btrfs_csum_data(root, mapped_buffer, crc,
+ PAGE_SIZE);
+
+ kunmap_atomic(mapped_buffer);
+ page_num++;
+ if (page_num >= sblock->page_count)
+ break;
+ BUG_ON(!sblock->pagev[page_num].page);
+
+ mapped_buffer = kmap_atomic(sblock->pagev[page_num].page);
+ }
+
+ btrfs_csum_final(crc, calculated_csum);
+ if (memcmp(calculated_csum, csum, csum_size))
+ sblock->checksum_error = 1;
}
-static void scrub_checksum(struct btrfs_work *work)
+static void scrub_complete_bio_end_io(struct bio *bio, int err)
{
- struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
- struct scrub_dev *sdev = sbio->sdev;
- struct page *page;
- void *buffer;
- int i;
- u64 flags;
- u64 logical;
- int ret;
+ complete((struct completion *)bio->bi_private);
+}
- if (sbio->err) {
- ret = 0;
- for (i = 0; i < sbio->count; ++i)
- ret |= scrub_recheck_error(sbio, i);
- if (!ret) {
- spin_lock(&sdev->stat_lock);
- ++sdev->stat.unverified_errors;
- spin_unlock(&sdev->stat_lock);
- }
+static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
+ struct scrub_block *sblock_good,
+ int force_write)
+{
+ int page_num;
+ int ret = 0;
- sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
- sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
- sbio->bio->bi_phys_segments = 0;
- sbio->bio->bi_idx = 0;
+ for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
+ int ret_sub;
- for (i = 0; i < sbio->count; i++) {
- struct bio_vec *bi;
- bi = &sbio->bio->bi_io_vec[i];
- bi->bv_offset = 0;
- bi->bv_len = PAGE_SIZE;
- }
- goto out;
+ ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
+ sblock_good,
+ page_num,
+ force_write);
+ if (ret_sub)
+ ret = ret_sub;
}
- for (i = 0; i < sbio->count; ++i) {
- page = sbio->bio->bi_io_vec[i].bv_page;
- buffer = kmap_atomic(page, KM_USER0);
- flags = sbio->spag[i].flags;
- logical = sbio->logical + i * PAGE_SIZE;
- ret = 0;
- if (flags & BTRFS_EXTENT_FLAG_DATA) {
- ret = scrub_checksum_data(sdev, sbio->spag + i, buffer);
- } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- ret = scrub_checksum_tree_block(sdev, sbio->spag + i,
- logical, buffer);
- } else if (flags & BTRFS_EXTENT_FLAG_SUPER) {
- BUG_ON(i);
- (void)scrub_checksum_super(sbio, buffer);
- } else {
- WARN_ON(1);
- }
- kunmap_atomic(buffer, KM_USER0);
- if (ret) {
- ret = scrub_recheck_error(sbio, i);
- if (!ret) {
- spin_lock(&sdev->stat_lock);
- ++sdev->stat.unverified_errors;
- spin_unlock(&sdev->stat_lock);
- }
+
+ return ret;
+}
+
+static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
+ struct scrub_block *sblock_good,
+ int page_num, int force_write)
+{
+ struct scrub_page *page_bad = sblock_bad->pagev + page_num;
+ struct scrub_page *page_good = sblock_good->pagev + page_num;
+
+ BUG_ON(sblock_bad->pagev[page_num].page == NULL);
+ BUG_ON(sblock_good->pagev[page_num].page == NULL);
+ if (force_write || sblock_bad->header_error ||
+ sblock_bad->checksum_error || page_bad->io_error) {
+ struct bio *bio;
+ int ret;
+ DECLARE_COMPLETION_ONSTACK(complete);
+
+ bio = bio_alloc(GFP_NOFS, 1);
+ if (!bio)
+ return -EIO;
+ bio->bi_bdev = page_bad->bdev;
+ bio->bi_sector = page_bad->physical >> 9;
+ bio->bi_end_io = scrub_complete_bio_end_io;
+ bio->bi_private = &complete;
+
+ ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
+ if (PAGE_SIZE != ret) {
+ bio_put(bio);
+ return -EIO;
}
+ btrfsic_submit_bio(WRITE, bio);
+
+ /* this will also unplug the queue */
+ wait_for_completion(&complete);
+ bio_put(bio);
}
-out:
- scrub_free_bio(sbio->bio);
- sbio->bio = NULL;
- spin_lock(&sdev->list_lock);
- sbio->next_free = sdev->first_free;
- sdev->first_free = sbio->index;
- spin_unlock(&sdev->list_lock);
- atomic_dec(&sdev->in_flight);
- wake_up(&sdev->list_wait);
+ return 0;
}
-static int scrub_checksum_data(struct scrub_dev *sdev,
- struct scrub_page *spag, void *buffer)
+static void scrub_checksum(struct scrub_block *sblock)
{
+ u64 flags;
+ int ret;
+
+ BUG_ON(sblock->page_count < 1);
+ flags = sblock->pagev[0].flags;
+ ret = 0;
+ if (flags & BTRFS_EXTENT_FLAG_DATA)
+ ret = scrub_checksum_data(sblock);
+ else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
+ ret = scrub_checksum_tree_block(sblock);
+ else if (flags & BTRFS_EXTENT_FLAG_SUPER)
+ (void)scrub_checksum_super(sblock);
+ else
+ WARN_ON(1);
+ if (ret)
+ scrub_handle_errored_block(sblock);
+}
+
+static int scrub_checksum_data(struct scrub_block *sblock)
+{
+ struct scrub_dev *sdev = sblock->sdev;
u8 csum[BTRFS_CSUM_SIZE];
+ u8 *on_disk_csum;
+ struct page *page;
+ void *buffer;
u32 crc = ~(u32)0;
int fail = 0;
struct btrfs_root *root = sdev->dev->dev_root;
+ u64 len;
+ int index;
- if (!spag->have_csum)
+ BUG_ON(sblock->page_count < 1);
+ if (!sblock->pagev[0].have_csum)
return 0;
- crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE);
+ on_disk_csum = sblock->pagev[0].csum;
+ page = sblock->pagev[0].page;
+ buffer = kmap_atomic(page);
+
+ len = sdev->sectorsize;
+ index = 0;
+ for (;;) {
+ u64 l = min_t(u64, len, PAGE_SIZE);
+
+ crc = btrfs_csum_data(root, buffer, crc, l);
+ kunmap_atomic(buffer);
+ len -= l;
+ if (len == 0)
+ break;
+ index++;
+ BUG_ON(index >= sblock->page_count);
+ BUG_ON(!sblock->pagev[index].page);
+ page = sblock->pagev[index].page;
+ buffer = kmap_atomic(page);
+ }
+
btrfs_csum_final(crc, csum);
- if (memcmp(csum, spag->csum, sdev->csum_size))
+ if (memcmp(csum, on_disk_csum, sdev->csum_size))
fail = 1;
- spin_lock(&sdev->stat_lock);
- ++sdev->stat.data_extents_scrubbed;
- sdev->stat.data_bytes_scrubbed += PAGE_SIZE;
- if (fail)
- ++sdev->stat.csum_errors;
- spin_unlock(&sdev->stat_lock);
-
return fail;
}
-static int scrub_checksum_tree_block(struct scrub_dev *sdev,
- struct scrub_page *spag, u64 logical,
- void *buffer)
+static int scrub_checksum_tree_block(struct scrub_block *sblock)
{
+ struct scrub_dev *sdev = sblock->sdev;
struct btrfs_header *h;
struct btrfs_root *root = sdev->dev->dev_root;
struct btrfs_fs_info *fs_info = root->fs_info;
- u8 csum[BTRFS_CSUM_SIZE];
+ u8 calculated_csum[BTRFS_CSUM_SIZE];
+ u8 on_disk_csum[BTRFS_CSUM_SIZE];
+ struct page *page;
+ void *mapped_buffer;
+ u64 mapped_size;
+ void *p;
u32 crc = ~(u32)0;
int fail = 0;
int crc_fail = 0;
+ u64 len;
+ int index;
+
+ BUG_ON(sblock->page_count < 1);
+ page = sblock->pagev[0].page;
+ mapped_buffer = kmap_atomic(page);
+ h = (struct btrfs_header *)mapped_buffer;
+ memcpy(on_disk_csum, h->csum, sdev->csum_size);
/*
* we don't use the getter functions here, as we
* a) don't have an extent buffer and
* b) the page is already kmapped
*/
- h = (struct btrfs_header *)buffer;
- if (logical != le64_to_cpu(h->bytenr))
+ if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr))
++fail;
- if (spag->generation != le64_to_cpu(h->generation))
+ if (sblock->pagev[0].generation != le64_to_cpu(h->generation))
++fail;
if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
@@ -887,51 +1310,90 @@ static int scrub_checksum_tree_block(struct scrub_dev *sdev,
BTRFS_UUID_SIZE))
++fail;
- crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
- PAGE_SIZE - BTRFS_CSUM_SIZE);
- btrfs_csum_final(crc, csum);
- if (memcmp(csum, h->csum, sdev->csum_size))
- ++crc_fail;
+ BUG_ON(sdev->nodesize != sdev->leafsize);
+ len = sdev->nodesize - BTRFS_CSUM_SIZE;
+ mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
+ p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
+ index = 0;
+ for (;;) {
+ u64 l = min_t(u64, len, mapped_size);
- spin_lock(&sdev->stat_lock);
- ++sdev->stat.tree_extents_scrubbed;
- sdev->stat.tree_bytes_scrubbed += PAGE_SIZE;
- if (crc_fail)
- ++sdev->stat.csum_errors;
- if (fail)
- ++sdev->stat.verify_errors;
- spin_unlock(&sdev->stat_lock);
+ crc = btrfs_csum_data(root, p, crc, l);
+ kunmap_atomic(mapped_buffer);
+ len -= l;
+ if (len == 0)
+ break;
+ index++;
+ BUG_ON(index >= sblock->page_count);
+ BUG_ON(!sblock->pagev[index].page);
+ page = sblock->pagev[index].page;
+ mapped_buffer = kmap_atomic(page);
+ mapped_size = PAGE_SIZE;
+ p = mapped_buffer;
+ }
+
+ btrfs_csum_final(crc, calculated_csum);
+ if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
+ ++crc_fail;
return fail || crc_fail;
}
-static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
+static int scrub_checksum_super(struct scrub_block *sblock)
{
struct btrfs_super_block *s;
- u64 logical;
- struct scrub_dev *sdev = sbio->sdev;
+ struct scrub_dev *sdev = sblock->sdev;
struct btrfs_root *root = sdev->dev->dev_root;
struct btrfs_fs_info *fs_info = root->fs_info;
- u8 csum[BTRFS_CSUM_SIZE];
+ u8 calculated_csum[BTRFS_CSUM_SIZE];
+ u8 on_disk_csum[BTRFS_CSUM_SIZE];
+ struct page *page;
+ void *mapped_buffer;
+ u64 mapped_size;
+ void *p;
u32 crc = ~(u32)0;
int fail = 0;
+ u64 len;
+ int index;
- s = (struct btrfs_super_block *)buffer;
- logical = sbio->logical;
+ BUG_ON(sblock->page_count < 1);
+ page = sblock->pagev[0].page;
+ mapped_buffer = kmap_atomic(page);
+ s = (struct btrfs_super_block *)mapped_buffer;
+ memcpy(on_disk_csum, s->csum, sdev->csum_size);
- if (logical != le64_to_cpu(s->bytenr))
+ if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr))
++fail;
- if (sbio->spag[0].generation != le64_to_cpu(s->generation))
+ if (sblock->pagev[0].generation != le64_to_cpu(s->generation))
++fail;
if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
++fail;
- crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
- PAGE_SIZE - BTRFS_CSUM_SIZE);
- btrfs_csum_final(crc, csum);
- if (memcmp(csum, s->csum, sbio->sdev->csum_size))
+ len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
+ mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
+ p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
+ index = 0;
+ for (;;) {
+ u64 l = min_t(u64, len, mapped_size);
+
+ crc = btrfs_csum_data(root, p, crc, l);
+ kunmap_atomic(mapped_buffer);
+ len -= l;
+ if (len == 0)
+ break;
+ index++;
+ BUG_ON(index >= sblock->page_count);
+ BUG_ON(!sblock->pagev[index].page);
+ page = sblock->pagev[index].page;
+ mapped_buffer = kmap_atomic(page);
+ mapped_size = PAGE_SIZE;
+ p = mapped_buffer;
+ }
+
+ btrfs_csum_final(crc, calculated_csum);
+ if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
++fail;
if (fail) {
@@ -948,29 +1410,42 @@ static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
return fail;
}
-static int scrub_submit(struct scrub_dev *sdev)
+static void scrub_block_get(struct scrub_block *sblock)
+{
+ atomic_inc(&sblock->ref_count);
+}
+
+static void scrub_block_put(struct scrub_block *sblock)
+{
+ if (atomic_dec_and_test(&sblock->ref_count)) {
+ int i;
+
+ for (i = 0; i < sblock->page_count; i++)
+ if (sblock->pagev[i].page)
+ __free_page(sblock->pagev[i].page);
+ kfree(sblock);
+ }
+}
+
+static void scrub_submit(struct scrub_dev *sdev)
{
struct scrub_bio *sbio;
if (sdev->curr == -1)
- return 0;
+ return;
sbio = sdev->bios[sdev->curr];
- sbio->err = 0;
sdev->curr = -1;
atomic_inc(&sdev->in_flight);
btrfsic_submit_bio(READ, sbio->bio);
-
- return 0;
}
-static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
- u64 physical, u64 flags, u64 gen, int mirror_num,
- u8 *csum, int force)
+static int scrub_add_page_to_bio(struct scrub_dev *sdev,
+ struct scrub_page *spage)
{
+ struct scrub_block *sblock = spage->sblock;
struct scrub_bio *sbio;
- struct page *page;
int ret;
again:
@@ -983,7 +1458,7 @@ again:
if (sdev->curr != -1) {
sdev->first_free = sdev->bios[sdev->curr]->next_free;
sdev->bios[sdev->curr]->next_free = -1;
- sdev->bios[sdev->curr]->count = 0;
+ sdev->bios[sdev->curr]->page_count = 0;
spin_unlock(&sdev->list_lock);
} else {
spin_unlock(&sdev->list_lock);
@@ -991,62 +1466,200 @@ again:
}
}
sbio = sdev->bios[sdev->curr];
- if (sbio->count == 0) {
+ if (sbio->page_count == 0) {
struct bio *bio;
- sbio->physical = physical;
- sbio->logical = logical;
- bio = bio_alloc(GFP_NOFS, SCRUB_PAGES_PER_BIO);
- if (!bio)
- return -ENOMEM;
+ sbio->physical = spage->physical;
+ sbio->logical = spage->logical;
+ bio = sbio->bio;
+ if (!bio) {
+ bio = bio_alloc(GFP_NOFS, sdev->pages_per_bio);
+ if (!bio)
+ return -ENOMEM;
+ sbio->bio = bio;
+ }
bio->bi_private = sbio;
bio->bi_end_io = scrub_bio_end_io;
bio->bi_bdev = sdev->dev->bdev;
- bio->bi_sector = sbio->physical >> 9;
+ bio->bi_sector = spage->physical >> 9;
sbio->err = 0;
- sbio->bio = bio;
- } else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
- sbio->logical + sbio->count * PAGE_SIZE != logical) {
- ret = scrub_submit(sdev);
- if (ret)
- return ret;
+ } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
+ spage->physical ||
+ sbio->logical + sbio->page_count * PAGE_SIZE !=
+ spage->logical) {
+ scrub_submit(sdev);
goto again;
}
- sbio->spag[sbio->count].flags = flags;
- sbio->spag[sbio->count].generation = gen;
- sbio->spag[sbio->count].have_csum = 0;
- sbio->spag[sbio->count].mirror_num = mirror_num;
-
- page = alloc_page(GFP_NOFS);
- if (!page)
- return -ENOMEM;
- ret = bio_add_page(sbio->bio, page, PAGE_SIZE, 0);
- if (!ret) {
- __free_page(page);
- ret = scrub_submit(sdev);
- if (ret)
- return ret;
+ sbio->pagev[sbio->page_count] = spage;
+ ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
+ if (ret != PAGE_SIZE) {
+ if (sbio->page_count < 1) {
+ bio_put(sbio->bio);
+ sbio->bio = NULL;
+ return -EIO;
+ }
+ scrub_submit(sdev);
goto again;
}
- if (csum) {
- sbio->spag[sbio->count].have_csum = 1;
- memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
+ scrub_block_get(sblock); /* one for the added page */
+ atomic_inc(&sblock->outstanding_pages);
+ sbio->page_count++;
+ if (sbio->page_count == sdev->pages_per_bio)
+ scrub_submit(sdev);
+
+ return 0;
+}
+
+static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
+ u64 physical, u64 flags, u64 gen, int mirror_num,
+ u8 *csum, int force)
+{
+ struct scrub_block *sblock;
+ int index;
+
+ sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
+ if (!sblock) {
+ spin_lock(&sdev->stat_lock);
+ sdev->stat.malloc_errors++;
+ spin_unlock(&sdev->stat_lock);
+ return -ENOMEM;
}
- ++sbio->count;
- if (sbio->count == SCRUB_PAGES_PER_BIO || force) {
+
+ /* one ref inside this function, plus one for each page later on */
+ atomic_set(&sblock->ref_count, 1);
+ sblock->sdev = sdev;
+ sblock->no_io_error_seen = 1;
+
+ for (index = 0; len > 0; index++) {
+ struct scrub_page *spage = sblock->pagev + index;
+ u64 l = min_t(u64, len, PAGE_SIZE);
+
+ BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
+ spage->page = alloc_page(GFP_NOFS);
+ if (!spage->page) {
+ spin_lock(&sdev->stat_lock);
+ sdev->stat.malloc_errors++;
+ spin_unlock(&sdev->stat_lock);
+ while (index > 0) {
+ index--;
+ __free_page(sblock->pagev[index].page);
+ }
+ kfree(sblock);
+ return -ENOMEM;
+ }
+ spage->sblock = sblock;
+ spage->bdev = sdev->dev->bdev;
+ spage->flags = flags;
+ spage->generation = gen;
+ spage->logical = logical;
+ spage->physical = physical;
+ spage->mirror_num = mirror_num;
+ if (csum) {
+ spage->have_csum = 1;
+ memcpy(spage->csum, csum, sdev->csum_size);
+ } else {
+ spage->have_csum = 0;
+ }
+ sblock->page_count++;
+ len -= l;
+ logical += l;
+ physical += l;
+ }
+
+ BUG_ON(sblock->page_count == 0);
+ for (index = 0; index < sblock->page_count; index++) {
+ struct scrub_page *spage = sblock->pagev + index;
int ret;
- ret = scrub_submit(sdev);
- if (ret)
+ ret = scrub_add_page_to_bio(sdev, spage);
+ if (ret) {
+ scrub_block_put(sblock);
return ret;
+ }
}
+ if (force)
+ scrub_submit(sdev);
+
+ /* last one frees, either here or in bio completion for last page */
+ scrub_block_put(sblock);
return 0;
}
+static void scrub_bio_end_io(struct bio *bio, int err)
+{
+ struct scrub_bio *sbio = bio->bi_private;
+ struct scrub_dev *sdev = sbio->sdev;
+ struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
+
+ sbio->err = err;
+ sbio->bio = bio;
+
+ btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
+}
+
+static void scrub_bio_end_io_worker(struct btrfs_work *work)
+{
+ struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
+ struct scrub_dev *sdev = sbio->sdev;
+ int i;
+
+ BUG_ON(sbio->page_count > SCRUB_PAGES_PER_BIO);
+ if (sbio->err) {
+ for (i = 0; i < sbio->page_count; i++) {
+ struct scrub_page *spage = sbio->pagev[i];
+
+ spage->io_error = 1;
+ spage->sblock->no_io_error_seen = 0;
+ }
+ }
+
+ /* now complete the scrub_block items that have all pages completed */
+ for (i = 0; i < sbio->page_count; i++) {
+ struct scrub_page *spage = sbio->pagev[i];
+ struct scrub_block *sblock = spage->sblock;
+
+ if (atomic_dec_and_test(&sblock->outstanding_pages))
+ scrub_block_complete(sblock);
+ scrub_block_put(sblock);
+ }
+
+ if (sbio->err) {
+ /* what is this good for??? */
+ sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
+ sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
+ sbio->bio->bi_phys_segments = 0;
+ sbio->bio->bi_idx = 0;
+
+ for (i = 0; i < sbio->page_count; i++) {
+ struct bio_vec *bi;
+ bi = &sbio->bio->bi_io_vec[i];
+ bi->bv_offset = 0;
+ bi->bv_len = PAGE_SIZE;
+ }
+ }
+
+ bio_put(sbio->bio);
+ sbio->bio = NULL;
+ spin_lock(&sdev->list_lock);
+ sbio->next_free = sdev->first_free;
+ sdev->first_free = sbio->index;
+ spin_unlock(&sdev->list_lock);
+ atomic_dec(&sdev->in_flight);
+ wake_up(&sdev->list_wait);
+}
+
+static void scrub_block_complete(struct scrub_block *sblock)
+{
+ if (!sblock->no_io_error_seen)
+ scrub_handle_errored_block(sblock);
+ else
+ scrub_checksum(sblock);
+}
+
static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
u8 *csum)
{
@@ -1054,7 +1667,6 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
int ret = 0;
unsigned long i;
unsigned long num_sectors;
- u32 sectorsize = sdev->dev->dev_root->sectorsize;
while (!list_empty(&sdev->csum_list)) {
sum = list_first_entry(&sdev->csum_list,
@@ -1072,7 +1684,7 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
if (!sum)
return 0;
- num_sectors = sum->len / sectorsize;
+ num_sectors = sum->len / sdev->sectorsize;
for (i = 0; i < num_sectors; ++i) {
if (sum->sums[i].bytenr == logical) {
memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
@@ -1093,9 +1705,28 @@ static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
{
int ret;
u8 csum[BTRFS_CSUM_SIZE];
+ u32 blocksize;
+
+ if (flags & BTRFS_EXTENT_FLAG_DATA) {
+ blocksize = sdev->sectorsize;
+ spin_lock(&sdev->stat_lock);
+ sdev->stat.data_extents_scrubbed++;
+ sdev->stat.data_bytes_scrubbed += len;
+ spin_unlock(&sdev->stat_lock);
+ } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ BUG_ON(sdev->nodesize != sdev->leafsize);
+ blocksize = sdev->nodesize;
+ spin_lock(&sdev->stat_lock);
+ sdev->stat.tree_extents_scrubbed++;
+ sdev->stat.tree_bytes_scrubbed += len;
+ spin_unlock(&sdev->stat_lock);
+ } else {
+ blocksize = sdev->sectorsize;
+ BUG_ON(1);
+ }
while (len) {
- u64 l = min_t(u64, len, PAGE_SIZE);
+ u64 l = min_t(u64, len, blocksize);
int have_csum = 0;
if (flags & BTRFS_EXTENT_FLAG_DATA) {
@@ -1104,8 +1735,8 @@ static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
if (have_csum == 0)
++sdev->stat.no_csum;
}
- ret = scrub_page(sdev, logical, l, physical, flags, gen,
- mirror_num, have_csum ? csum : NULL, 0);
+ ret = scrub_pages(sdev, logical, l, physical, flags, gen,
+ mirror_num, have_csum ? csum : NULL, 0);
if (ret)
return ret;
len -= l;
@@ -1170,6 +1801,11 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
if (!path)
return -ENOMEM;
+ /*
+ * work on commit root. The related disk blocks are static as
+ * long as COW is applied. This means, it is save to rewrite
+ * them to repair disk errors without any race conditions
+ */
path->search_commit_root = 1;
path->skip_locking = 1;
@@ -1516,15 +2152,18 @@ static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
struct btrfs_device *device = sdev->dev;
struct btrfs_root *root = device->dev_root;
+ if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+ return -EIO;
+
gen = root->fs_info->last_trans_committed;
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
bytenr = btrfs_sb_offset(i);
- if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
+ if (bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
break;
- ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr,
- BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
+ ret = scrub_pages(sdev, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
+ BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
if (ret)
return ret;
}
@@ -1583,10 +2222,30 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
/*
* check some assumptions
*/
- if (root->sectorsize != PAGE_SIZE ||
- root->sectorsize != root->leafsize ||
- root->sectorsize != root->nodesize) {
- printk(KERN_ERR "btrfs_scrub: size assumptions fail\n");
+ if (root->nodesize != root->leafsize) {
+ printk(KERN_ERR
+ "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n",
+ root->nodesize, root->leafsize);
+ return -EINVAL;
+ }
+
+ if (root->nodesize > BTRFS_STRIPE_LEN) {
+ /*
+ * in this case scrub is unable to calculate the checksum
+ * the way scrub is implemented. Do not handle this
+ * situation at all because it won't ever happen.
+ */
+ printk(KERN_ERR
+ "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n",
+ root->nodesize, BTRFS_STRIPE_LEN);
+ return -EINVAL;
+ }
+
+ if (root->sectorsize != PAGE_SIZE) {
+ /* not supported for data w/o checksums */
+ printk(KERN_ERR
+ "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n",
+ root->sectorsize, (unsigned long long)PAGE_SIZE);
return -EINVAL;
}
@@ -1656,7 +2315,7 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
return ret;
}
-int btrfs_scrub_pause(struct btrfs_root *root)
+void btrfs_scrub_pause(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1671,34 +2330,28 @@ int btrfs_scrub_pause(struct btrfs_root *root)
mutex_lock(&fs_info->scrub_lock);
}
mutex_unlock(&fs_info->scrub_lock);
-
- return 0;
}
-int btrfs_scrub_continue(struct btrfs_root *root)
+void btrfs_scrub_continue(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
atomic_dec(&fs_info->scrub_pause_req);
wake_up(&fs_info->scrub_pause_wait);
- return 0;
}
-int btrfs_scrub_pause_super(struct btrfs_root *root)
+void btrfs_scrub_pause_super(struct btrfs_root *root)
{
down_write(&root->fs_info->scrub_super_lock);
- return 0;
}
-int btrfs_scrub_continue_super(struct btrfs_root *root)
+void btrfs_scrub_continue_super(struct btrfs_root *root)
{
up_write(&root->fs_info->scrub_super_lock);
- return 0;
}
-int btrfs_scrub_cancel(struct btrfs_root *root)
+int __btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
mutex_lock(&fs_info->scrub_lock);
if (!atomic_read(&fs_info->scrubs_running)) {
@@ -1719,6 +2372,11 @@ int btrfs_scrub_cancel(struct btrfs_root *root)
return 0;
}
+int btrfs_scrub_cancel(struct btrfs_root *root)
+{
+ return __btrfs_scrub_cancel(root->fs_info);
+}
+