summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-08-14 12:19:59 +0200
committerIngo Molnar <mingo@elte.hu>2008-08-14 12:19:59 +0200
commit8d7ccaa545490cdffdfaff0842436a8dd85cf47b (patch)
tree8129b5907161bc6ae26deb3645ce1e280c5e1f51 /fs
parentb2139aa0eec330c711c5a279db361e5ef1178e78 (diff)
parent30a2f3c60a84092c8084dfe788b710f8d0768cd4 (diff)
downloadkernel-crypto-8d7ccaa545490cdffdfaff0842436a8dd85cf47b.tar.gz
kernel-crypto-8d7ccaa545490cdffdfaff0842436a8dd85cf47b.tar.xz
kernel-crypto-8d7ccaa545490cdffdfaff0842436a8dd85cf47b.zip
Merge commit 'v2.6.27-rc3' into x86/prototypes
Conflicts: include/asm-x86/dma-mapping.h Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig87
-rw-r--r--fs/Kconfig.binfmt2
-rw-r--r--fs/Makefile1
-rw-r--r--fs/adfs/super.c2
-rw-r--r--fs/affs/affs.h3
-rw-r--r--fs/affs/bitmap.c18
-rw-r--r--fs/affs/file.c4
-rw-r--r--fs/affs/super.c4
-rw-r--r--fs/afs/internal.h4
-rw-r--r--fs/afs/mntpt.c2
-rw-r--r--fs/afs/security.c2
-rw-r--r--fs/afs/super.c4
-rw-r--r--fs/afs/write.c2
-rw-r--r--fs/aio.c8
-rw-r--r--fs/anon_inodes.c11
-rw-r--r--fs/attr.c7
-rw-r--r--fs/autofs4/autofs_i.h28
-rw-r--r--fs/autofs4/expire.c91
-rw-r--r--fs/autofs4/inode.c33
-rw-r--r--fs/autofs4/root.c589
-rw-r--r--fs/autofs4/waitq.c267
-rw-r--r--fs/bad_inode.c3
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/bfs/bfs.h5
-rw-r--r--fs/bfs/dir.c46
-rw-r--r--fs/bfs/file.c4
-rw-r--r--fs/bfs/inode.c29
-rw-r--r--fs/binfmt_aout.c6
-rw-r--r--fs/binfmt_elf.c106
-rw-r--r--fs/binfmt_elf_fdpic.c78
-rw-r--r--fs/binfmt_flat.c3
-rw-r--r--fs/binfmt_misc.c20
-rw-r--r--fs/binfmt_som.c2
-rw-r--r--fs/bio-integrity.c1
-rw-r--r--fs/bio.c17
-rw-r--r--fs/block_dev.c7
-rw-r--r--fs/buffer.c61
-rw-r--r--fs/cifs/CHANGES8
-rw-r--r--fs/cifs/asn1.c260
-rw-r--r--fs/cifs/cifs_debug.c696
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifs_spnego.c18
-rw-r--r--fs/cifs/cifsacl.c41
-rw-r--r--fs/cifs/cifsencrypt.c3
-rw-r--r--fs/cifs/cifsfs.c75
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h6
-rw-r--r--fs/cifs/cifspdu.h10
-rw-r--r--fs/cifs/cifsproto.h24
-rw-r--r--fs/cifs/cifssmb.c56
-rw-r--r--fs/cifs/connect.c210
-rw-r--r--fs/cifs/dir.c67
-rw-r--r--fs/cifs/file.c21
-rw-r--r--fs/cifs/inode.c570
-rw-r--r--fs/cifs/readdir.c1
-rw-r--r--fs/cifs/transport.c1
-rw-r--r--fs/coda/coda_linux.c6
-rw-r--r--fs/coda/dir.c4
-rw-r--r--fs/coda/inode.c2
-rw-r--r--fs/coda/pioctl.c20
-rw-r--r--fs/coda/psdev.c9
-rw-r--r--fs/coda/upcall.c15
-rw-r--r--fs/compat.c42
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/configfs/configfs_internal.h3
-rw-r--r--fs/configfs/dir.c210
-rw-r--r--fs/configfs/symlink.c26
-rw-r--r--fs/dcache.c438
-rw-r--r--fs/debugfs/inode.c114
-rw-r--r--fs/devpts/inode.c16
-rw-r--r--fs/direct-io.c10
-rw-r--r--fs/dlm/lock.c4
-rw-r--r--fs/dlm/lowcomms.c4
-rw-r--r--fs/dlm/plock.c2
-rw-r--r--fs/dlm/user.c2
-rw-r--r--fs/dquot.c162
-rw-r--r--fs/ecryptfs/Makefile2
-rw-r--r--fs/ecryptfs/crypto.c67
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h23
-rw-r--r--fs/ecryptfs/file.c17
-rw-r--r--fs/ecryptfs/inode.c52
-rw-r--r--fs/ecryptfs/keystore.c9
-rw-r--r--fs/ecryptfs/kthread.c203
-rw-r--r--fs/ecryptfs/main.c83
-rw-r--r--fs/ecryptfs/miscdev.c59
-rw-r--r--fs/ecryptfs/mmap.c11
-rw-r--r--fs/efs/super.c2
-rw-r--r--fs/eventfd.c17
-rw-r--r--fs/eventpoll.c35
-rw-r--r--fs/exec.c235
-rw-r--r--fs/ext2/acl.c2
-rw-r--r--fs/ext2/acl.h2
-rw-r--r--fs/ext2/inode.c1
-rw-r--r--fs/ext2/super.c3
-rw-r--r--fs/ext2/xattr_security.c2
-rw-r--r--fs/ext2/xattr_trusted.c4
-rw-r--r--fs/ext2/xattr_user.c4
-rw-r--r--fs/ext3/acl.c2
-rw-r--r--fs/ext3/acl.h2
-rw-r--r--fs/ext3/dir.c14
-rw-r--r--fs/ext3/ialloc.c9
-rw-r--r--fs/ext3/inode.c113
-rw-r--r--fs/ext3/namei.c26
-rw-r--r--fs/ext3/super.c83
-rw-r--r--fs/ext3/xattr_security.c2
-rw-r--r--fs/ext3/xattr_trusted.c4
-rw-r--r--fs/ext3/xattr_user.c4
-rw-r--r--fs/ext4/acl.c190
-rw-r--r--fs/ext4/acl.h2
-rw-r--r--fs/ext4/balloc.c11
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/extents.c55
-rw-r--r--fs/ext4/ialloc.c58
-rw-r--r--fs/ext4/inode.c256
-rw-r--r--fs/ext4/mballoc.c254
-rw-r--r--fs/ext4/mballoc.h10
-rw-r--r--fs/ext4/resize.c79
-rw-r--r--fs/ext4/super.c321
-rw-r--r--fs/ext4/xattr.c2
-rw-r--r--fs/fat/cache.c2
-rw-r--r--fs/fat/dir.c229
-rw-r--r--fs/fat/file.c21
-rw-r--r--fs/fat/inode.c36
-rw-r--r--fs/fat/misc.c10
-rw-r--r--fs/fcntl.c184
-rw-r--r--fs/fifo.c8
-rw-r--r--fs/file.c70
-rw-r--r--fs/file_table.c10
-rw-r--r--fs/fuse/dir.c145
-rw-r--r--fs/fuse/file.c13
-rw-r--r--fs/fuse/fuse_i.h10
-rw-r--r--fs/fuse/inode.c179
-rw-r--r--fs/gfs2/inode.c6
-rw-r--r--fs/gfs2/inode.h2
-rw-r--r--fs/gfs2/main.c4
-rw-r--r--fs/gfs2/ops_export.c2
-rw-r--r--fs/gfs2/ops_inode.c16
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/hfs/bitmap.c8
-rw-r--r--fs/hfs/btree.c2
-rw-r--r--fs/hfs/extent.c14
-rw-r--r--fs/hfs/hfs_fs.h5
-rw-r--r--fs/hfs/inode.c11
-rw-r--r--fs/hfs/super.c4
-rw-r--r--fs/hfsplus/extents.c14
-rw-r--r--fs/hfsplus/hfsplus_fs.h3
-rw-r--r--fs/hfsplus/inode.c10
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hpfs/super.c2
-rw-r--r--fs/hppfs/hppfs.c7
-rw-r--r--fs/hugetlbfs/inode.c103
-rw-r--r--fs/inode.c4
-rw-r--r--fs/inotify_user.c40
-rw-r--r--fs/isofs/inode.c2
-rw-r--r--fs/isofs/rock.c22
-rw-r--r--fs/jbd/commit.c68
-rw-r--r--fs/jbd/journal.c8
-rw-r--r--fs/jbd/revoke.c163
-rw-r--r--fs/jbd/transaction.c61
-rw-r--r--fs/jbd2/commit.c26
-rw-r--r--fs/jbd2/journal.c1
-rw-r--r--fs/jbd2/transaction.c4
-rw-r--r--fs/jffs2/acl.c2
-rw-r--r--fs/jffs2/acl.h2
-rw-r--r--fs/jffs2/dir.c2
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jffs2/ioctl.c3
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jffs2/summary.c40
-rw-r--r--fs/jffs2/summary.h6
-rw-r--r--fs/jffs2/super.c2
-rw-r--r--fs/jfs/acl.c2
-rw-r--r--fs/jfs/jfs_acl.h2
-rw-r--r--fs/jfs/jfs_metapage.c2
-rw-r--r--fs/jfs/super.c3
-rw-r--r--fs/libfs.c4
-rw-r--r--fs/lockd/clntproc.c10
-rw-r--r--fs/lockd/svc4proc.c4
-rw-r--r--fs/lockd/svclock.c13
-rw-r--r--fs/lockd/svcproc.c4
-rw-r--r--fs/locks.c92
-rw-r--r--fs/minix/inode.c5
-rw-r--r--fs/minix/minix.h6
-rw-r--r--fs/minix/namei.c24
-rw-r--r--fs/msdos/namei.c21
-rw-r--r--fs/namei.c369
-rw-r--r--fs/namespace.c125
-rw-r--r--fs/ncpfs/dir.c4
-rw-r--r--fs/ncpfs/inode.c2
-rw-r--r--fs/nfs/dir.c11
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/super.c6
-rw-r--r--fs/nfs/unlink.c3
-rw-r--r--fs/nfsd/export.c6
-rw-r--r--fs/nfsd/lockd.c13
-rw-r--r--fs/nfsd/nfs4proc.c5
-rw-r--r--fs/nfsd/nfsctl.c1
-rw-r--r--fs/nfsd/nfsfh.c2
-rw-r--r--fs/nfsd/vfs.c14
-rw-r--r--fs/ntfs/aops.c2
-rw-r--r--fs/ntfs/compress.c2
-rw-r--r--fs/ntfs/file.c2
-rw-r--r--fs/ntfs/mft.c4
-rw-r--r--fs/ntfs/super.c2
-rw-r--r--fs/ocfs2/aops.c29
-rw-r--r--fs/ocfs2/dlm/dlmfs.c3
-rw-r--r--fs/ocfs2/file.c4
-rw-r--r--fs/ocfs2/file.h3
-rw-r--r--fs/ocfs2/journal.c173
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/ocfs2_fs.h5
-rw-r--r--fs/ocfs2/super.c14
-rw-r--r--fs/omfs/Makefile4
-rw-r--r--fs/omfs/bitmap.c192
-rw-r--r--fs/omfs/dir.c504
-rw-r--r--fs/omfs/file.c346
-rw-r--r--fs/omfs/inode.c554
-rw-r--r--fs/omfs/omfs.h67
-rw-r--r--fs/omfs/omfs_fs.h80
-rw-r--r--fs/open.c238
-rw-r--r--fs/openpromfs/inode.c2
-rw-r--r--fs/partitions/check.c40
-rw-r--r--fs/partitions/efi.c42
-rw-r--r--fs/partitions/ldm.c70
-rw-r--r--fs/partitions/ldm.h5
-rw-r--r--fs/pipe.c86
-rw-r--r--fs/proc/Kconfig59
-rw-r--r--fs/proc/array.c9
-rw-r--r--fs/proc/base.c92
-rw-r--r--fs/proc/generic.c46
-rw-r--r--fs/proc/inode.c88
-rw-r--r--fs/proc/internal.h8
-rw-r--r--fs/proc/kcore.c10
-rw-r--r--fs/proc/kmsg.c2
-rw-r--r--fs/proc/proc_misc.c19
-rw-r--r--fs/proc/proc_net.c11
-rw-r--r--fs/proc/proc_sysctl.c429
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--fs/qnx4/inode.c2
-rw-r--r--fs/quota.c18
-rw-r--r--fs/quota_v1.c1
-rw-r--r--fs/quota_v2.c1
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/journal.c48
-rw-r--r--fs/reiserfs/super.c139
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/reiserfs/xattr_security.c2
-rw-r--r--fs/reiserfs/xattr_trusted.c2
-rw-r--r--fs/reiserfs/xattr_user.c2
-rw-r--r--fs/romfs/inode.c39
-rw-r--r--fs/seq_file.c14
-rw-r--r--fs/signalfd.c19
-rw-r--r--fs/smbfs/cache.c1
-rw-r--r--fs/smbfs/file.c4
-rw-r--r--fs/smbfs/inode.c2
-rw-r--r--fs/smbfs/proc.c1
-rw-r--r--fs/splice.c47
-rw-r--r--fs/stat.c32
-rw-r--r--fs/super.c1
-rw-r--r--fs/sync.c3
-rw-r--r--fs/sysfs/dir.c34
-rw-r--r--fs/sysfs/file.c8
-rw-r--r--fs/sysfs/group.c3
-rw-r--r--fs/sysfs/symlink.c41
-rw-r--r--fs/sysfs/sysfs.h1
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/timerfd.c9
-rw-r--r--fs/ubifs/file.c1
-rw-r--r--fs/ubifs/super.c2
-rw-r--r--fs/udf/super.c2
-rw-r--r--fs/ufs/super.c3
-rw-r--r--fs/utimes.c139
-rw-r--r--fs/vfat/namei.c2
-rw-r--r--fs/xattr.c98
-rw-r--r--fs/xfs/Makefile3
-rw-r--r--fs/xfs/linux-2.6/kmem.c6
-rw-r--r--fs/xfs/linux-2.6/kmem.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c390
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c348
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h8
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.h12
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c939
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c50
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h89
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c330
-rw-r--r--fs/xfs/quota/xfs_dquot.c3
-rw-r--r--fs/xfs/quota/xfs_dquot.h2
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c4
-rw-r--r--fs/xfs/quota/xfs_qm.c24
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c12
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h3
-rw-r--r--fs/xfs/support/ktrace.c4
-rw-r--r--fs/xfs/support/uuid.c8
-rw-r--r--fs/xfs/support/uuid.h1
-rw-r--r--fs/xfs/xfs_acl.c21
-rw-r--r--fs/xfs/xfs_acl.h4
-rw-r--r--fs/xfs/xfs_attr.c608
-rw-r--r--fs/xfs/xfs_attr.h90
-rw-r--r--fs/xfs/xfs_attr_leaf.c99
-rw-r--r--fs/xfs/xfs_attr_leaf.h29
-rw-r--r--fs/xfs/xfs_attr_sf.h10
-rw-r--r--fs/xfs/xfs_bmap.c118
-rw-r--r--fs/xfs/xfs_bmap.h13
-rw-r--r--fs/xfs/xfs_bmap_btree.c76
-rw-r--r--fs/xfs/xfs_buf_item.c8
-rw-r--r--fs/xfs/xfs_clnt.h1
-rw-r--r--fs/xfs/xfs_da_btree.c48
-rw-r--r--fs/xfs/xfs_da_btree.h36
-rw-r--r--fs/xfs/xfs_dfrag.c4
-rw-r--r--fs/xfs/xfs_dir2.c125
-rw-r--r--fs/xfs/xfs_dir2.h6
-rw-r--r--fs/xfs/xfs_dir2_block.c56
-rw-r--r--fs/xfs/xfs_dir2_data.c5
-rw-r--r--fs/xfs/xfs_dir2_leaf.c93
-rw-r--r--fs/xfs/xfs_dir2_node.c402
-rw-r--r--fs/xfs/xfs_dir2_sf.c83
-rw-r--r--fs/xfs/xfs_dir2_sf.h6
-rw-r--r--fs/xfs/xfs_dir2_trace.c20
-rw-r--r--fs/xfs/xfs_dmapi.h2
-rw-r--r--fs/xfs/xfs_error.c13
-rw-r--r--fs/xfs/xfs_error.h1
-rw-r--r--fs/xfs/xfs_extfree_item.c6
-rw-r--r--fs/xfs/xfs_filestream.c4
-rw-r--r--fs/xfs/xfs_fs.h4
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_inode.c165
-rw-r--r--fs/xfs/xfs_inode.h3
-rw-r--r--fs/xfs/xfs_inode_item.c7
-rw-r--r--fs/xfs/xfs_iomap.c10
-rw-r--r--fs/xfs/xfs_itable.c6
-rw-r--r--fs/xfs/xfs_log.c49
-rw-r--r--fs/xfs/xfs_log_priv.h6
-rw-r--r--fs/xfs/xfs_log_recover.c21
-rw-r--r--fs/xfs/xfs_mount.c118
-rw-r--r--fs/xfs/xfs_mount.h17
-rw-r--r--fs/xfs/xfs_mru_cache.c21
-rw-r--r--fs/xfs/xfs_rename.c22
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_sb.h17
-rw-r--r--fs/xfs/xfs_trans.c4
-rw-r--r--fs/xfs/xfs_trans_inode.c2
-rw-r--r--fs/xfs/xfs_trans_item.c8
-rw-r--r--fs/xfs/xfs_vfsops.c610
-rw-r--r--fs/xfs/xfs_vfsops.h5
-rw-r--r--fs/xfs/xfs_vnodeops.c722
-rw-r--r--fs/xfs/xfs_vnodeops.h12
360 files changed, 11432 insertions, 8086 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 37db79a2ff9..d3873583360 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -902,65 +902,7 @@ endif # BLOCK
menu "Pseudo filesystems"
-config PROC_FS
- bool "/proc file system support" if EMBEDDED
- default y
- help
- This is a virtual file system providing information about the status
- of the system. "Virtual" means that it doesn't take up any space on
- your hard disk: the files are created on the fly by the kernel when
- you try to access them. Also, you cannot read the files with older
- version of the program less: you need to use more or cat.
-
- It's totally cool; for example, "cat /proc/interrupts" gives
- information about what the different IRQs are used for at the moment
- (there is a small number of Interrupt ReQuest lines in your computer
- that are used by the attached devices to gain the CPU's attention --
- often a source of trouble if two devices are mistakenly configured
- to use the same IRQ). The program procinfo to display some
- information about your system gathered from the /proc file system.
-
- Before you can use the /proc file system, it has to be mounted,
- meaning it has to be given a location in the directory hierarchy.
- That location should be /proc. A command such as "mount -t proc proc
- /proc" or the equivalent line in /etc/fstab does the job.
-
- The /proc file system is explained in the file
- <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
- ("man 5 proc").
-
- This option will enlarge your kernel by about 67 KB. Several
- programs depend on this, so everyone should say Y here.
-
-config PROC_KCORE
- bool "/proc/kcore support" if !ARM
- depends on PROC_FS && MMU
-
-config PROC_VMCORE
- bool "/proc/vmcore support (EXPERIMENTAL)"
- depends on PROC_FS && CRASH_DUMP
- default y
- help
- Exports the dump image of crashed kernel in ELF format.
-
-config PROC_SYSCTL
- bool "Sysctl support (/proc/sys)" if EMBEDDED
- depends on PROC_FS
- select SYSCTL
- default y
- ---help---
- The sysctl interface provides a means of dynamically changing
- certain kernel parameters and variables on the fly without requiring
- a recompile of the kernel or reboot of the system. The primary
- interface is through /proc/sys. If you say Y here a tree of
- modifiable sysctl entries will be generated beneath the
- /proc/sys directory. They are explained in the files
- in <file:Documentation/sysctl/>. Note that enabling this
- option will enlarge the kernel by at least 8 KB.
-
- As it is generally a good thing, you should say Y here unless
- building a kernel for install/rescue disks or your system is very
- limited in memory.
+source "fs/proc/Kconfig"
config SYSFS
bool "sysfs file system support" if EMBEDDED
@@ -1441,6 +1383,19 @@ config MINIX_FS
partition (the one containing the directory /) cannot be compiled as
a module.
+config OMFS_FS
+ tristate "SonicBlue Optimized MPEG File System support"
+ depends on BLOCK
+ select CRC_ITU_T
+ help
+ This is the proprietary file system used by the Rio Karma music
+ player and ReplayTV DVR. Despite the name, this filesystem is not
+ more efficient than a standard FS for MPEG files, in fact likely
+ the opposite is true. Say Y if you have either of these devices
+ and wish to mount its disk.
+
+ To compile this file system support as a module, choose M here: the
+ module will be called omfs. If unsure, say N.
config HPFS_FS
tristate "OS/2 HPFS file system support"
@@ -2093,20 +2048,6 @@ config CODA_FS
To compile the coda client support as a module, choose M here: the
module will be called coda.
-config CODA_FS_OLD_API
- bool "Use 96-bit Coda file identifiers"
- depends on CODA_FS
- help
- A new kernel-userspace API had to be introduced for Coda v6.0
- to support larger 128-bit file identifiers as needed by the
- new realms implementation.
-
- However this new API is not backward compatible with older
- clients. If you really need to run the old Coda userspace
- cache manager then say Y.
-
- For most cases you probably want to say N.
-
config AFS_FS
tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
depends on INET && EXPERIMENTAL
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 3263084eef9..4a551af6f3f 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -30,7 +30,7 @@ config COMPAT_BINFMT_ELF
config BINFMT_ELF_FDPIC
bool "Kernel support for FDPIC ELF binaries"
default y
- depends on (FRV || BLACKFIN)
+ depends on (FRV || BLACKFIN || (SUPERH32 && !MMU))
help
ELF FDPIC binaries are based on ELF, but allow the individual load
segments of a binary to be located in memory independently of each
diff --git a/fs/Makefile b/fs/Makefile
index 3b2178b4bb6..a1482a5eff1 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -111,6 +111,7 @@ obj-$(CONFIG_ADFS_FS) += adfs/
obj-$(CONFIG_FUSE_FS) += fuse/
obj-$(CONFIG_UDF_FS) += udf/
obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
+obj-$(CONFIG_OMFS_FS) += omfs/
obj-$(CONFIG_JFS_FS) += jfs/
obj-$(CONFIG_XFS_FS) += xfs/
obj-$(CONFIG_9P_FS) += 9p/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 9e421eeb672..26f3b43726b 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -249,7 +249,7 @@ static void adfs_destroy_inode(struct inode *inode)
kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
}
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 223b1917093..e9ec915f755 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -2,6 +2,7 @@
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/amigaffs.h>
+#include <linux/mutex.h>
/* AmigaOS allows file names with up to 30 characters length.
* Names longer than that will be silently truncated. If you
@@ -98,7 +99,7 @@ struct affs_sb_info {
gid_t s_gid; /* gid to override */
umode_t s_mode; /* mode to override */
struct buffer_head *s_root_bh; /* Cached root block. */
- struct semaphore s_bmlock; /* Protects bitmap access. */
+ struct mutex s_bmlock; /* Protects bitmap access. */
struct affs_bm_info *s_bitmap; /* Bitmap infos. */
u32 s_bmap_count; /* # of bitmap blocks. */
u32 s_bmap_bits; /* # of bits in one bitmap blocks */
diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index c4a5ad09ddf..dc5ef14bdc1 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -45,14 +45,14 @@ affs_count_free_blocks(struct super_block *sb)
if (sb->s_flags & MS_RDONLY)
return 0;
- down(&AFFS_SB(sb)->s_bmlock);
+ mutex_lock(&AFFS_SB(sb)->s_bmlock);
bm = AFFS_SB(sb)->s_bitmap;
free = 0;
for (i = AFFS_SB(sb)->s_bmap_count; i > 0; bm++, i--)
free += bm->bm_free;
- up(&AFFS_SB(sb)->s_bmlock);
+ mutex_unlock(&AFFS_SB(sb)->s_bmlock);
return free;
}
@@ -76,7 +76,7 @@ affs_free_block(struct super_block *sb, u32 block)
bit = blk % sbi->s_bmap_bits;
bm = &sbi->s_bitmap[bmap];
- down(&sbi->s_bmlock);
+ mutex_lock(&sbi->s_bmlock);
bh = sbi->s_bmap_bh;
if (sbi->s_last_bmap != bmap) {
@@ -105,19 +105,19 @@ affs_free_block(struct super_block *sb, u32 block)
sb->s_dirt = 1;
bm->bm_free++;
- up(&sbi->s_bmlock);
+ mutex_unlock(&sbi->s_bmlock);
return;
err_free:
affs_warning(sb,"affs_free_block","Trying to free block %u which is already free", block);
- up(&sbi->s_bmlock);
+ mutex_unlock(&sbi->s_bmlock);
return;
err_bh_read:
affs_error(sb,"affs_free_block","Cannot read bitmap block %u", bm->bm_key);
sbi->s_bmap_bh = NULL;
sbi->s_last_bmap = ~0;
- up(&sbi->s_bmlock);
+ mutex_unlock(&sbi->s_bmlock);
return;
err_range:
@@ -168,7 +168,7 @@ affs_alloc_block(struct inode *inode, u32 goal)
bmap = blk / sbi->s_bmap_bits;
bm = &sbi->s_bitmap[bmap];
- down(&sbi->s_bmlock);
+ mutex_lock(&sbi->s_bmlock);
if (bm->bm_free)
goto find_bmap_bit;
@@ -249,7 +249,7 @@ find_bit:
mark_buffer_dirty(bh);
sb->s_dirt = 1;
- up(&sbi->s_bmlock);
+ mutex_unlock(&sbi->s_bmlock);
pr_debug("%d\n", blk);
return blk;
@@ -259,7 +259,7 @@ err_bh_read:
sbi->s_bmap_bh = NULL;
sbi->s_last_bmap = ~0;
err_full:
- up(&sbi->s_bmlock);
+ mutex_unlock(&sbi->s_bmlock);
pr_debug("failed\n");
return 0;
}
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 6eac7bdeec9..1377b1240b6 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -46,8 +46,6 @@ const struct inode_operations affs_file_inode_operations = {
static int
affs_file_open(struct inode *inode, struct file *filp)
{
- if (atomic_read(&filp->f_count) != 1)
- return 0;
pr_debug("AFFS: open(%lu,%d)\n",
inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
atomic_inc(&AFFS_I(inode)->i_opencnt);
@@ -57,8 +55,6 @@ affs_file_open(struct inode *inode, struct file *filp)
static int
affs_file_release(struct inode *inode, struct file *filp)
{
- if (atomic_read(&filp->f_count) != 0)
- return 0;
pr_debug("AFFS: release(%lu, %d)\n",
inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
diff --git a/fs/affs/super.c b/fs/affs/super.c
index d214837d5e4..3a89094f93d 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -90,7 +90,7 @@ static void affs_destroy_inode(struct inode *inode)
kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
}
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct affs_inode_info *ei = (struct affs_inode_info *) foo;
@@ -290,7 +290,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
if (!sbi)
return -ENOMEM;
sb->s_fs_info = sbi;
- init_MUTEX(&sbi->s_bmlock);
+ mutex_init(&sbi->s_bmlock);
if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block,
&blocksize,&sbi->s_prefix,
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 7102824ba84..3cb6920ff30 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -469,8 +469,6 @@ extern bool afs_cm_incoming_call(struct afs_call *);
extern const struct inode_operations afs_dir_inode_operations;
extern const struct file_operations afs_dir_file_operations;
-extern int afs_permission(struct inode *, int, struct nameidata *);
-
/*
* file.c
*/
@@ -605,7 +603,7 @@ extern void afs_clear_permits(struct afs_vnode *);
extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
extern void afs_zap_permits(struct rcu_head *);
extern struct key *afs_request_key(struct afs_cell *);
-extern int afs_permission(struct inode *, int, struct nameidata *);
+extern int afs_permission(struct inode *, int);
/*
* server.c
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 2f5503902c3..78db4953a80 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -232,7 +232,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
}
mntget(newmnt);
- err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts);
+ err = do_add_mount(newmnt, &nd->path, MNT_SHRINKABLE, &afs_vfsmounts);
switch (err) {
case 0:
path_put(&nd->path);
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 3bcbeceba1b..3ef50437003 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -284,7 +284,7 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
* - AFS ACLs are attached to directories only, and a file is controlled by its
* parent directory's ACL
*/
-int afs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int afs_permission(struct inode *inode, int mask)
{
struct afs_vnode *vnode = AFS_FS_I(inode);
afs_access_t uninitialized_var(access);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 7e3faeef681..250d8c4d66e 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -27,7 +27,7 @@
#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
-static void afs_i_init_once(struct kmem_cache *cachep, void *foo);
+static void afs_i_init_once(void *foo);
static int afs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name,
void *data, struct vfsmount *mnt);
@@ -449,7 +449,7 @@ static void afs_put_super(struct super_block *sb)
/*
* initialise an inode cache slab element prior to any use
*/
-static void afs_i_init_once(struct kmem_cache *cachep, void *_vnode)
+static void afs_i_init_once(void *_vnode)
{
struct afs_vnode *vnode = _vnode;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9a849ad3c48..065b4e10681 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -404,7 +404,7 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
page = pages[loop];
if (page->index > wb->last)
break;
- if (TestSetPageLocked(page))
+ if (!trylock_page(page))
break;
if (!PageDirty(page) ||
page_private(page) != (unsigned long) wb) {
diff --git a/fs/aio.c b/fs/aio.c
index 0fb3117ddd9..f658441d566 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -512,8 +512,8 @@ static void aio_fput_routine(struct work_struct *data)
*/
static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
{
- dprintk(KERN_DEBUG "aio_put(%p): f_count=%d\n",
- req, atomic_read(&req->ki_filp->f_count));
+ dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
+ req, atomic_long_read(&req->ki_filp->f_count));
assert_spin_locked(&ctx->ctx_lock);
@@ -528,7 +528,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
/* Must be done under the lock to serialise against cancellation.
* Call this aio_fput as it duplicates fput via the fput_work.
*/
- if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) {
+ if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
get_ioctx(ctx);
spin_lock(&fput_lock);
list_add(&req->ki_list, &fput_head);
@@ -586,7 +586,6 @@ static void use_mm(struct mm_struct *mm)
struct task_struct *tsk = current;
task_lock(tsk);
- tsk->flags |= PF_BORROWED_MM;
active_mm = tsk->active_mm;
atomic_inc(&mm->mm_count);
tsk->mm = mm;
@@ -610,7 +609,6 @@ static void unuse_mm(struct mm_struct *mm)
struct task_struct *tsk = current;
task_lock(tsk);
- tsk->flags &= ~PF_BORROWED_MM;
tsk->mm = NULL;
/* active_mm is still 'mm' */
enter_lazy_tlb(mm, tsk);
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 977ef208c05..3662dd44896 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -58,8 +58,9 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
* of the file
*
* @name: [in] name of the "class" of the new file
- * @fops [in] file operations for the new file
- * @priv [in] private data for the new file (will be file's private_data)
+ * @fops: [in] file operations for the new file
+ * @priv: [in] private data for the new file (will be file's private_data)
+ * @flags: [in] flags
*
* Creates a new file by hooking it on a single inode. This is useful for files
* that do not need to have a full-fledged inode in order to operate correctly.
@@ -68,7 +69,7 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
* setup. Returns new descriptor or -error.
*/
int anon_inode_getfd(const char *name, const struct file_operations *fops,
- void *priv)
+ void *priv, int flags)
{
struct qstr this;
struct dentry *dentry;
@@ -78,7 +79,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
if (IS_ERR(anon_inode_inode))
return -ENODEV;
- error = get_unused_fd();
+ error = get_unused_fd_flags(flags);
if (error < 0)
return error;
fd = error;
@@ -115,7 +116,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
file->f_mapping = anon_inode_inode->i_mapping;
file->f_pos = 0;
- file->f_flags = O_RDWR;
+ file->f_flags = O_RDWR | (flags & O_NONBLOCK);
file->f_version = 0;
file->private_data = priv;
diff --git a/fs/attr.c b/fs/attr.c
index 966b73e25f8..26c71ba1eed 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -51,7 +51,7 @@ int inode_change_ok(struct inode *inode, struct iattr *attr)
}
/* Check for setting the inode time. */
- if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) {
+ if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
if (!is_owner_or_cap(inode))
goto error;
}
@@ -108,6 +108,11 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
struct timespec now;
unsigned int ia_valid = attr->ia_valid;
+ if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ return -EPERM;
+ }
+
now = current_fs_time(inode->i_sb);
attr->ia_ctime = now;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c3d352d7fa9..69a2f5c9231 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -52,7 +52,10 @@ struct autofs_info {
int flags;
- struct list_head rehash;
+ struct completion expire_complete;
+
+ struct list_head active;
+ struct list_head expiring;
struct autofs_sb_info *sbi;
unsigned long last_used;
@@ -68,15 +71,14 @@ struct autofs_info {
};
#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
+#define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */
struct autofs_wait_queue {
wait_queue_head_t queue;
struct autofs_wait_queue *next;
autofs_wqt_t wait_queue_token;
/* We use the following to see what we are waiting for */
- unsigned int hash;
- unsigned int len;
- char *name;
+ struct qstr name;
u32 dev;
u64 ino;
uid_t uid;
@@ -85,7 +87,7 @@ struct autofs_wait_queue {
pid_t tgid;
/* This is for status reporting upon return */
int status;
- atomic_t wait_ctr;
+ unsigned int wait_ctr;
};
#define AUTOFS_SBI_MAGIC 0x6d4a556d
@@ -112,8 +114,9 @@ struct autofs_sb_info {
struct mutex wq_mutex;
spinlock_t fs_lock;
struct autofs_wait_queue *queues; /* Wait queue pointer */
- spinlock_t rehash_lock;
- struct list_head rehash_list;
+ spinlock_t lookup_lock;
+ struct list_head active_list;
+ struct list_head expiring_list;
};
static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
@@ -138,18 +141,14 @@ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
static inline int autofs4_ispending(struct dentry *dentry)
{
struct autofs_info *inf = autofs4_dentry_ino(dentry);
- int pending = 0;
if (dentry->d_flags & DCACHE_AUTOFS_PENDING)
return 1;
- if (inf) {
- spin_lock(&inf->sbi->fs_lock);
- pending = inf->flags & AUTOFS_INF_EXPIRING;
- spin_unlock(&inf->sbi->fs_lock);
- }
+ if (inf->flags & AUTOFS_INF_EXPIRING)
+ return 1;
- return pending;
+ return 0;
}
static inline void autofs4_copy_atime(struct file *src, struct file *dst)
@@ -164,6 +163,7 @@ void autofs4_free_ino(struct autofs_info *);
/* Expiration */
int is_autofs4_dentry(struct dentry *);
+int autofs4_expire_wait(struct dentry *dentry);
int autofs4_expire_run(struct super_block *, struct vfsmount *,
struct autofs_sb_info *,
struct autofs_packet_expire __user *);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 894fee54d4d..cdabb796ff0 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -259,13 +259,15 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb,
now = jiffies;
timeout = sbi->exp_timeout;
- /* Lock the tree as we must expire as a whole */
spin_lock(&sbi->fs_lock);
if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
struct autofs_info *ino = autofs4_dentry_ino(root);
-
- /* Set this flag early to catch sys_chdir and the like */
+ if (d_mountpoint(root)) {
+ ino->flags |= AUTOFS_INF_MOUNTPOINT;
+ root->d_mounted--;
+ }
ino->flags |= AUTOFS_INF_EXPIRING;
+ init_completion(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
return root;
}
@@ -292,6 +294,8 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
struct list_head *next;
int do_now = how & AUTOFS_EXP_IMMEDIATE;
int exp_leaves = how & AUTOFS_EXP_LEAVES;
+ struct autofs_info *ino;
+ unsigned int ino_count;
if (!root)
return NULL;
@@ -316,6 +320,9 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
dentry = dget(dentry);
spin_unlock(&dcache_lock);
+ spin_lock(&sbi->fs_lock);
+ ino = autofs4_dentry_ino(dentry);
+
/*
* Case 1: (i) indirect mount or top level pseudo direct mount
* (autofs-4.1).
@@ -326,6 +333,11 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
DPRINTK("checking mountpoint %p %.*s",
dentry, (int)dentry->d_name.len, dentry->d_name.name);
+ /* Path walk currently on this dentry? */
+ ino_count = atomic_read(&ino->count) + 2;
+ if (atomic_read(&dentry->d_count) > ino_count)
+ goto next;
+
/* Can we umount this guy */
if (autofs4_mount_busy(mnt, dentry))
goto next;
@@ -343,23 +355,25 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
/* Case 2: tree mount, expire iff entire tree is not busy */
if (!exp_leaves) {
- /* Lock the tree as we must expire as a whole */
- spin_lock(&sbi->fs_lock);
- if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
- struct autofs_info *inf = autofs4_dentry_ino(dentry);
+ /* Path walk currently on this dentry? */
+ ino_count = atomic_read(&ino->count) + 1;
+ if (atomic_read(&dentry->d_count) > ino_count)
+ goto next;
- /* Set this flag early to catch sys_chdir and the like */
- inf->flags |= AUTOFS_INF_EXPIRING;
- spin_unlock(&sbi->fs_lock);
+ if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
expired = dentry;
goto found;
}
- spin_unlock(&sbi->fs_lock);
/*
* Case 3: pseudo direct mount, expire individual leaves
* (autofs-4.1).
*/
} else {
+ /* Path walk currently on this dentry? */
+ ino_count = atomic_read(&ino->count) + 1;
+ if (atomic_read(&dentry->d_count) > ino_count)
+ goto next;
+
expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
if (expired) {
dput(dentry);
@@ -367,6 +381,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
}
}
next:
+ spin_unlock(&sbi->fs_lock);
dput(dentry);
spin_lock(&dcache_lock);
next = next->next;
@@ -377,12 +392,45 @@ next:
found:
DPRINTK("returning %p %.*s",
expired, (int)expired->d_name.len, expired->d_name.name);
+ ino = autofs4_dentry_ino(expired);
+ ino->flags |= AUTOFS_INF_EXPIRING;
+ init_completion(&ino->expire_complete);
+ spin_unlock(&sbi->fs_lock);
spin_lock(&dcache_lock);
list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
spin_unlock(&dcache_lock);
return expired;
}
+int autofs4_expire_wait(struct dentry *dentry)
+{
+ struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+ struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ int status;
+
+ /* Block on any pending expire */
+ spin_lock(&sbi->fs_lock);
+ if (ino->flags & AUTOFS_INF_EXPIRING) {
+ spin_unlock(&sbi->fs_lock);
+
+ DPRINTK("waiting for expire %p name=%.*s",
+ dentry, dentry->d_name.len, dentry->d_name.name);
+
+ status = autofs4_wait(sbi, dentry, NFY_NONE);
+ wait_for_completion(&ino->expire_complete);
+
+ DPRINTK("expire done status=%d", status);
+
+ if (d_unhashed(dentry))
+ return -EAGAIN;
+
+ return status;
+ }
+ spin_unlock(&sbi->fs_lock);
+
+ return 0;
+}
+
/* Perform an expiry operation */
int autofs4_expire_run(struct super_block *sb,
struct vfsmount *mnt,
@@ -390,7 +438,9 @@ int autofs4_expire_run(struct super_block *sb,
struct autofs_packet_expire __user *pkt_p)
{
struct autofs_packet_expire pkt;
+ struct autofs_info *ino;
struct dentry *dentry;
+ int ret = 0;
memset(&pkt,0,sizeof pkt);
@@ -406,9 +456,15 @@ int autofs4_expire_run(struct super_block *sb,
dput(dentry);
if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) )
- return -EFAULT;
+ ret = -EFAULT;
- return 0;
+ spin_lock(&sbi->fs_lock);
+ ino = autofs4_dentry_ino(dentry);
+ ino->flags &= ~AUTOFS_INF_EXPIRING;
+ complete_all(&ino->expire_complete);
+ spin_unlock(&sbi->fs_lock);
+
+ return ret;
}
/* Call repeatedly until it returns -EAGAIN, meaning there's nothing
@@ -433,9 +489,16 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
/* This is synchronous because it makes the daemon a
little easier */
- ino->flags |= AUTOFS_INF_EXPIRING;
ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
+
+ spin_lock(&sbi->fs_lock);
+ if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
+ sb->s_root->d_mounted++;
+ ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
+ }
ino->flags &= ~AUTOFS_INF_EXPIRING;
+ complete_all(&ino->expire_complete);
+ spin_unlock(&sbi->fs_lock);
dput(dentry);
}
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 2fdcf5e1d23..7bb3e5ba053 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -24,8 +24,10 @@
static void ino_lnkfree(struct autofs_info *ino)
{
- kfree(ino->u.symlink);
- ino->u.symlink = NULL;
+ if (ino->u.symlink) {
+ kfree(ino->u.symlink);
+ ino->u.symlink = NULL;
+ }
}
struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
@@ -41,16 +43,18 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
if (ino == NULL)
return NULL;
- ino->flags = 0;
- ino->mode = mode;
- ino->inode = NULL;
- ino->dentry = NULL;
- ino->size = 0;
-
- INIT_LIST_HEAD(&ino->rehash);
+ if (!reinit) {
+ ino->flags = 0;
+ ino->inode = NULL;
+ ino->dentry = NULL;
+ ino->size = 0;
+ INIT_LIST_HEAD(&ino->active);
+ INIT_LIST_HEAD(&ino->expiring);
+ atomic_set(&ino->count, 0);
+ }
+ ino->mode = mode;
ino->last_used = jiffies;
- atomic_set(&ino->count, 0);
ino->sbi = sbi;
@@ -159,8 +163,8 @@ void autofs4_kill_sb(struct super_block *sb)
if (!sbi)
goto out_kill_sb;
- if (!sbi->catatonic)
- autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */
+ /* Free wait queues, close pipe */
+ autofs4_catatonic_mode(sbi);
/* Clean up and release dangling references */
autofs4_force_release(sbi);
@@ -338,8 +342,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
mutex_init(&sbi->wq_mutex);
spin_lock_init(&sbi->fs_lock);
sbi->queues = NULL;
- spin_lock_init(&sbi->rehash_lock);
- INIT_LIST_HEAD(&sbi->rehash_list);
+ spin_lock_init(&sbi->lookup_lock);
+ INIT_LIST_HEAD(&sbi->active_list);
+ INIT_LIST_HEAD(&sbi->expiring_list);
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
s->s_magic = AUTOFS_SUPER_MAGIC;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index edf5b6bddb5..bcfb2dc0a61 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -25,25 +25,25 @@ static int autofs4_dir_rmdir(struct inode *,struct dentry *);
static int autofs4_dir_mkdir(struct inode *,struct dentry *,int);
static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long);
static int autofs4_dir_open(struct inode *inode, struct file *file);
-static int autofs4_dir_close(struct inode *inode, struct file *file);
-static int autofs4_dir_readdir(struct file * filp, void * dirent, filldir_t filldir);
-static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir);
static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
static void *autofs4_follow_link(struct dentry *, struct nameidata *);
+#define TRIGGER_FLAGS (LOOKUP_CONTINUE | LOOKUP_DIRECTORY)
+#define TRIGGER_INTENTS (LOOKUP_OPEN | LOOKUP_CREATE)
+
const struct file_operations autofs4_root_operations = {
.open = dcache_dir_open,
.release = dcache_dir_close,
.read = generic_read_dir,
- .readdir = autofs4_root_readdir,
+ .readdir = dcache_readdir,
.ioctl = autofs4_root_ioctl,
};
const struct file_operations autofs4_dir_operations = {
.open = autofs4_dir_open,
- .release = autofs4_dir_close,
+ .release = dcache_dir_close,
.read = generic_read_dir,
- .readdir = autofs4_dir_readdir,
+ .readdir = dcache_readdir,
};
const struct inode_operations autofs4_indirect_root_inode_operations = {
@@ -70,42 +70,10 @@ const struct inode_operations autofs4_dir_inode_operations = {
.rmdir = autofs4_dir_rmdir,
};
-static int autofs4_root_readdir(struct file *file, void *dirent,
- filldir_t filldir)
-{
- struct autofs_sb_info *sbi = autofs4_sbi(file->f_path.dentry->d_sb);
- int oz_mode = autofs4_oz_mode(sbi);
-
- DPRINTK("called, filp->f_pos = %lld", file->f_pos);
-
- /*
- * Don't set reghost flag if:
- * 1) f_pos is larger than zero -- we've already been here.
- * 2) we haven't even enabled reghosting in the 1st place.
- * 3) this is the daemon doing a readdir
- */
- if (oz_mode && file->f_pos == 0 && sbi->reghost_enabled)
- sbi->needs_reghost = 1;
-
- DPRINTK("needs_reghost = %d", sbi->needs_reghost);
-
- return dcache_readdir(file, dirent, filldir);
-}
-
static int autofs4_dir_open(struct inode *inode, struct file *file)
{
struct dentry *dentry = file->f_path.dentry;
- struct vfsmount *mnt = file->f_path.mnt;
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
- struct dentry *cursor;
- int status;
-
- status = dcache_dir_open(inode, file);
- if (status)
- goto out;
-
- cursor = file->private_data;
- cursor->d_fsdata = NULL;
DPRINTK("file=%p dentry=%p %.*s",
file, dentry, dentry->d_name.len, dentry->d_name.name);
@@ -113,159 +81,32 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
if (autofs4_oz_mode(sbi))
goto out;
- if (autofs4_ispending(dentry)) {
- DPRINTK("dentry busy");
- dcache_dir_close(inode, file);
- status = -EBUSY;
- goto out;
- }
-
- status = -ENOENT;
- if (!d_mountpoint(dentry) && dentry->d_op && dentry->d_op->d_revalidate) {
- struct nameidata nd;
- int empty, ret;
-
- /* In case there are stale directory dentrys from a failed mount */
- spin_lock(&dcache_lock);
- empty = list_empty(&dentry->d_subdirs);
+ /*
+ * An empty directory in an autofs file system is always a
+ * mount point. The daemon must have failed to mount this
+ * during lookup so it doesn't exist. This can happen, for
+ * example, if user space returns an incorrect status for a
+ * mount request. Otherwise we're doing a readdir on the
+ * autofs file system so just let the libfs routines handle
+ * it.
+ */
+ spin_lock(&dcache_lock);
+ if (!d_mountpoint(dentry) && __simple_empty(dentry)) {
spin_unlock(&dcache_lock);
-
- if (!empty)
- d_invalidate(dentry);
-
- nd.flags = LOOKUP_DIRECTORY;
- ret = (dentry->d_op->d_revalidate)(dentry, &nd);
-
- if (ret <= 0) {
- if (ret < 0)
- status = ret;
- dcache_dir_close(inode, file);
- goto out;
- }
+ return -ENOENT;
}
+ spin_unlock(&dcache_lock);
- if (d_mountpoint(dentry)) {
- struct file *fp = NULL;
- struct path fp_path = { .dentry = dentry, .mnt = mnt };
-
- path_get(&fp_path);
-
- if (!autofs4_follow_mount(&fp_path.mnt, &fp_path.dentry)) {
- path_put(&fp_path);
- dcache_dir_close(inode, file);
- goto out;
- }
-
- fp = dentry_open(fp_path.dentry, fp_path.mnt, file->f_flags);
- status = PTR_ERR(fp);
- if (IS_ERR(fp)) {
- dcache_dir_close(inode, file);
- goto out;
- }
- cursor->d_fsdata = fp;
- }
- return 0;
-out:
- return status;
-}
-
-static int autofs4_dir_close(struct inode *inode, struct file *file)
-{
- struct dentry *dentry = file->f_path.dentry;
- struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
- struct dentry *cursor = file->private_data;
- int status = 0;
-
- DPRINTK("file=%p dentry=%p %.*s",
- file, dentry, dentry->d_name.len, dentry->d_name.name);
-
- if (autofs4_oz_mode(sbi))
- goto out;
-
- if (autofs4_ispending(dentry)) {
- DPRINTK("dentry busy");
- status = -EBUSY;
- goto out;
- }
-
- if (d_mountpoint(dentry)) {
- struct file *fp = cursor->d_fsdata;
- if (!fp) {
- status = -ENOENT;
- goto out;
- }
- filp_close(fp, current->files);
- }
-out:
- dcache_dir_close(inode, file);
- return status;
-}
-
-static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldir)
-{
- struct dentry *dentry = file->f_path.dentry;
- struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
- struct dentry *cursor = file->private_data;
- int status;
-
- DPRINTK("file=%p dentry=%p %.*s",
- file, dentry, dentry->d_name.len, dentry->d_name.name);
-
- if (autofs4_oz_mode(sbi))
- goto out;
-
- if (autofs4_ispending(dentry)) {
- DPRINTK("dentry busy");
- return -EBUSY;
- }
-
- if (d_mountpoint(dentry)) {
- struct file *fp = cursor->d_fsdata;
-
- if (!fp)
- return -ENOENT;
-
- if (!fp->f_op || !fp->f_op->readdir)
- goto out;
-
- status = vfs_readdir(fp, filldir, dirent);
- file->f_pos = fp->f_pos;
- if (status)
- autofs4_copy_atime(file, fp);
- return status;
- }
out:
- return dcache_readdir(file, dirent, filldir);
+ return dcache_dir_open(inode, file);
}
static int try_to_fill_dentry(struct dentry *dentry, int flags)
{
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
- struct dentry *new;
int status;
- /* Block on any pending expiry here; invalidate the dentry
- when expiration is done to trigger mount request with a new
- dentry */
- if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
- DPRINTK("waiting for expire %p name=%.*s",
- dentry, dentry->d_name.len, dentry->d_name.name);
-
- status = autofs4_wait(sbi, dentry, NFY_NONE);
-
- DPRINTK("expire done status=%d", status);
-
- /*
- * If the directory still exists the mount request must
- * continue otherwise it can't be followed at the right
- * time during the walk.
- */
- status = d_invalidate(dentry);
- if (status != -EBUSY)
- return -EAGAIN;
- }
-
DPRINTK("dentry=%p %.*s ino=%p",
dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
@@ -292,7 +133,8 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
return status;
}
/* Trigger mount for path component or follow link */
- } else if (flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) ||
+ } else if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
+ flags & (TRIGGER_FLAGS | TRIGGER_INTENTS) ||
current->link_count) {
DPRINTK("waiting for mount name=%.*s",
dentry->d_name.len, dentry->d_name.name);
@@ -320,26 +162,6 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
spin_unlock(&dentry->d_lock);
- /*
- * The dentry that is passed in from lookup may not be the one
- * we end up using, as mkdir can create a new one. If this
- * happens, and another process tries the lookup at the same time,
- * it will set the PENDING flag on this new dentry, but add itself
- * to our waitq. Then, if after the lookup succeeds, the first
- * process that requested the mount performs another lookup of the
- * same directory, it will show up as still pending! So, we need
- * to redo the lookup here and clear pending on that dentry.
- */
- if (d_unhashed(dentry)) {
- new = d_lookup(dentry->d_parent, &dentry->d_name);
- if (new) {
- spin_lock(&new->d_lock);
- new->d_flags &= ~DCACHE_AUTOFS_PENDING;
- spin_unlock(&new->d_lock);
- dput(new);
- }
- }
-
return 0;
}
@@ -355,51 +177,63 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d",
dentry, dentry->d_name.len, dentry->d_name.name, oz_mode,
nd->flags);
-
- /* If it's our master or we shouldn't trigger a mount we're done */
- lookup_type = nd->flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY);
- if (oz_mode || !lookup_type)
+ /*
+ * For an expire of a covered direct or offset mount we need
+ * to beeak out of follow_down() at the autofs mount trigger
+ * (d_mounted--), so we can see the expiring flag, and manage
+ * the blocking and following here until the expire is completed.
+ */
+ if (oz_mode) {
+ spin_lock(&sbi->fs_lock);
+ if (ino->flags & AUTOFS_INF_EXPIRING) {
+ spin_unlock(&sbi->fs_lock);
+ /* Follow down to our covering mount. */
+ if (!follow_down(&nd->path.mnt, &nd->path.dentry))
+ goto done;
+ goto follow;
+ }
+ spin_unlock(&sbi->fs_lock);
goto done;
+ }
- /* If an expire request is pending wait for it. */
- if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
- DPRINTK("waiting for active request %p name=%.*s",
- dentry, dentry->d_name.len, dentry->d_name.name);
-
- status = autofs4_wait(sbi, dentry, NFY_NONE);
+ /* If an expire request is pending everyone must wait. */
+ autofs4_expire_wait(dentry);
- DPRINTK("request done status=%d", status);
- }
+ /* We trigger a mount for almost all flags */
+ lookup_type = nd->flags & (TRIGGER_FLAGS | TRIGGER_INTENTS);
+ if (!(lookup_type || dentry->d_flags & DCACHE_AUTOFS_PENDING))
+ goto follow;
/*
- * If the dentry contains directories then it is an
- * autofs multi-mount with no root mount offset. So
- * don't try to mount it again.
+ * If the dentry contains directories then it is an autofs
+ * multi-mount with no root mount offset. So don't try to
+ * mount it again.
*/
spin_lock(&dcache_lock);
- if (!d_mountpoint(dentry) && __simple_empty(dentry)) {
+ if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
+ (!d_mountpoint(dentry) && __simple_empty(dentry))) {
spin_unlock(&dcache_lock);
status = try_to_fill_dentry(dentry, 0);
if (status)
goto out_error;
- /*
- * The mount succeeded but if there is no root mount
- * it must be an autofs multi-mount with no root offset
- * so we don't need to follow the mount.
- */
- if (d_mountpoint(dentry)) {
- if (!autofs4_follow_mount(&nd->path.mnt,
- &nd->path.dentry)) {
- status = -ENOENT;
- goto out_error;
- }
- }
-
- goto done;
+ goto follow;
}
spin_unlock(&dcache_lock);
+follow:
+ /*
+ * If there is no root mount it must be an autofs
+ * multi-mount with no root offset so we don't need
+ * to follow it.
+ */
+ if (d_mountpoint(dentry)) {
+ if (!autofs4_follow_mount(&nd->path.mnt,
+ &nd->path.dentry)) {
+ status = -ENOENT;
+ goto out_error;
+ }
+ }
done:
return NULL;
@@ -424,12 +258,23 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
int status = 1;
/* Pending dentry */
+ spin_lock(&sbi->fs_lock);
if (autofs4_ispending(dentry)) {
/* The daemon never causes a mount to trigger */
+ spin_unlock(&sbi->fs_lock);
+
if (oz_mode)
return 1;
/*
+ * If the directory has gone away due to an expire
+ * we have been called as ->d_revalidate() and so
+ * we need to return false and proceed to ->lookup().
+ */
+ if (autofs4_expire_wait(dentry) == -EAGAIN)
+ return 0;
+
+ /*
* A zero status is success otherwise we have a
* negative error code.
*/
@@ -437,17 +282,9 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
if (status == 0)
return 1;
- /*
- * A status of EAGAIN here means that the dentry has gone
- * away while waiting for an expire to complete. If we are
- * racing with expire lookup will wait for it so this must
- * be a revalidate and we need to send it to lookup.
- */
- if (status == -EAGAIN)
- return 0;
-
return status;
}
+ spin_unlock(&sbi->fs_lock);
/* Negative dentry.. invalidate if "old" */
if (dentry->d_inode == NULL)
@@ -461,6 +298,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
DPRINTK("dentry=%p %.*s, emptydir",
dentry, dentry->d_name.len, dentry->d_name.name);
spin_unlock(&dcache_lock);
+
/* The daemon never causes a mount to trigger */
if (oz_mode)
return 1;
@@ -493,10 +331,12 @@ void autofs4_dentry_release(struct dentry *de)
struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb);
if (sbi) {
- spin_lock(&sbi->rehash_lock);
- if (!list_empty(&inf->rehash))
- list_del(&inf->rehash);
- spin_unlock(&sbi->rehash_lock);
+ spin_lock(&sbi->lookup_lock);
+ if (!list_empty(&inf->active))
+ list_del(&inf->active);
+ if (!list_empty(&inf->expiring))
+ list_del(&inf->expiring);
+ spin_unlock(&sbi->lookup_lock);
}
inf->dentry = NULL;
@@ -518,7 +358,7 @@ static struct dentry_operations autofs4_dentry_operations = {
.d_release = autofs4_dentry_release,
};
-static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
+static struct dentry *autofs4_lookup_active(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
{
unsigned int len = name->len;
unsigned int hash = name->hash;
@@ -526,14 +366,66 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
struct list_head *p, *head;
spin_lock(&dcache_lock);
- spin_lock(&sbi->rehash_lock);
- head = &sbi->rehash_list;
+ spin_lock(&sbi->lookup_lock);
+ head = &sbi->active_list;
list_for_each(p, head) {
struct autofs_info *ino;
struct dentry *dentry;
struct qstr *qstr;
- ino = list_entry(p, struct autofs_info, rehash);
+ ino = list_entry(p, struct autofs_info, active);
+ dentry = ino->dentry;
+
+ spin_lock(&dentry->d_lock);
+
+ /* Already gone? */
+ if (atomic_read(&dentry->d_count) == 0)
+ goto next;
+
+ qstr = &dentry->d_name;
+
+ if (dentry->d_name.hash != hash)
+ goto next;
+ if (dentry->d_parent != parent)
+ goto next;
+
+ if (qstr->len != len)
+ goto next;
+ if (memcmp(qstr->name, str, len))
+ goto next;
+
+ if (d_unhashed(dentry)) {
+ dget(dentry);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&sbi->lookup_lock);
+ spin_unlock(&dcache_lock);
+ return dentry;
+ }
+next:
+ spin_unlock(&dentry->d_lock);
+ }
+ spin_unlock(&sbi->lookup_lock);
+ spin_unlock(&dcache_lock);
+
+ return NULL;
+}
+
+static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
+{
+ unsigned int len = name->len;
+ unsigned int hash = name->hash;
+ const unsigned char *str = name->name;
+ struct list_head *p, *head;
+
+ spin_lock(&dcache_lock);
+ spin_lock(&sbi->lookup_lock);
+ head = &sbi->expiring_list;
+ list_for_each(p, head) {
+ struct autofs_info *ino;
+ struct dentry *dentry;
+ struct qstr *qstr;
+
+ ino = list_entry(p, struct autofs_info, expiring);
dentry = ino->dentry;
spin_lock(&dentry->d_lock);
@@ -555,33 +447,16 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
goto next;
if (d_unhashed(dentry)) {
- struct inode *inode = dentry->d_inode;
-
- ino = autofs4_dentry_ino(dentry);
- list_del_init(&ino->rehash);
dget(dentry);
- /*
- * Make the rehashed dentry negative so the VFS
- * behaves as it should.
- */
- if (inode) {
- dentry->d_inode = NULL;
- list_del_init(&dentry->d_alias);
- spin_unlock(&dentry->d_lock);
- spin_unlock(&sbi->rehash_lock);
- spin_unlock(&dcache_lock);
- iput(inode);
- return dentry;
- }
spin_unlock(&dentry->d_lock);
- spin_unlock(&sbi->rehash_lock);
+ spin_unlock(&sbi->lookup_lock);
spin_unlock(&dcache_lock);
return dentry;
}
next:
spin_unlock(&dentry->d_lock);
}
- spin_unlock(&sbi->rehash_lock);
+ spin_unlock(&sbi->lookup_lock);
spin_unlock(&dcache_lock);
return NULL;
@@ -591,7 +466,8 @@ next:
static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{
struct autofs_sb_info *sbi;
- struct dentry *unhashed;
+ struct autofs_info *ino;
+ struct dentry *expiring, *unhashed;
int oz_mode;
DPRINTK("name = %.*s",
@@ -607,8 +483,26 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
- unhashed = autofs4_lookup_unhashed(sbi, dentry->d_parent, &dentry->d_name);
- if (!unhashed) {
+ expiring = autofs4_lookup_expiring(sbi, dentry->d_parent, &dentry->d_name);
+ if (expiring) {
+ /*
+ * If we are racing with expire the request might not
+ * be quite complete but the directory has been removed
+ * so it must have been successful, so just wait for it.
+ */
+ ino = autofs4_dentry_ino(expiring);
+ autofs4_expire_wait(expiring);
+ spin_lock(&sbi->lookup_lock);
+ if (!list_empty(&ino->expiring))
+ list_del_init(&ino->expiring);
+ spin_unlock(&sbi->lookup_lock);
+ dput(expiring);
+ }
+
+ unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name);
+ if (unhashed)
+ dentry = unhashed;
+ else {
/*
* Mark the dentry incomplete but don't hash it. We do this
* to serialize our inode creation operations (symlink and
@@ -622,39 +516,34 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
*/
dentry->d_op = &autofs4_root_dentry_operations;
- dentry->d_fsdata = NULL;
- d_instantiate(dentry, NULL);
- } else {
- struct autofs_info *ino = autofs4_dentry_ino(unhashed);
- DPRINTK("rehash %p with %p", dentry, unhashed);
/*
- * If we are racing with expire the request might not
- * be quite complete but the directory has been removed
- * so it must have been successful, so just wait for it.
- * We need to ensure the AUTOFS_INF_EXPIRING flag is clear
- * before continuing as revalidate may fail when calling
- * try_to_fill_dentry (returning EAGAIN) if we don't.
+ * And we need to ensure that the same dentry is used for
+ * all following lookup calls until it is hashed so that
+ * the dentry flags are persistent throughout the request.
*/
- while (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
- DPRINTK("wait for incomplete expire %p name=%.*s",
- unhashed, unhashed->d_name.len,
- unhashed->d_name.name);
- autofs4_wait(sbi, unhashed, NFY_NONE);
- DPRINTK("request completed");
- }
- dentry = unhashed;
+ ino = autofs4_init_ino(NULL, sbi, 0555);
+ if (!ino)
+ return ERR_PTR(-ENOMEM);
+
+ dentry->d_fsdata = ino;
+ ino->dentry = dentry;
+
+ spin_lock(&sbi->lookup_lock);
+ list_add(&ino->active, &sbi->active_list);
+ spin_unlock(&sbi->lookup_lock);
+
+ d_instantiate(dentry, NULL);
}
if (!oz_mode) {
spin_lock(&dentry->d_lock);
dentry->d_flags |= DCACHE_AUTOFS_PENDING;
spin_unlock(&dentry->d_lock);
- }
-
- if (dentry->d_op && dentry->d_op->d_revalidate) {
- mutex_unlock(&dir->i_mutex);
- (dentry->d_op->d_revalidate)(dentry, nd);
- mutex_lock(&dir->i_mutex);
+ if (dentry->d_op && dentry->d_op->d_revalidate) {
+ mutex_unlock(&dir->i_mutex);
+ (dentry->d_op->d_revalidate)(dentry, nd);
+ mutex_lock(&dir->i_mutex);
+ }
}
/*
@@ -673,9 +562,11 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
return ERR_PTR(-ERESTARTNOINTR);
}
}
- spin_lock(&dentry->d_lock);
- dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
- spin_unlock(&dentry->d_lock);
+ if (!oz_mode) {
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
+ spin_unlock(&dentry->d_lock);
+ }
}
/*
@@ -706,7 +597,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
}
if (unhashed)
- return dentry;
+ return unhashed;
return NULL;
}
@@ -728,20 +619,31 @@ static int autofs4_dir_symlink(struct inode *dir,
return -EACCES;
ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555);
- if (ino == NULL)
- return -ENOSPC;
+ if (!ino)
+ return -ENOMEM;
- ino->size = strlen(symname);
- ino->u.symlink = cp = kmalloc(ino->size + 1, GFP_KERNEL);
+ spin_lock(&sbi->lookup_lock);
+ if (!list_empty(&ino->active))
+ list_del_init(&ino->active);
+ spin_unlock(&sbi->lookup_lock);
- if (cp == NULL) {
- kfree(ino);
- return -ENOSPC;
+ ino->size = strlen(symname);
+ cp = kmalloc(ino->size + 1, GFP_KERNEL);
+ if (!cp) {
+ if (!dentry->d_fsdata)
+ kfree(ino);
+ return -ENOMEM;
}
strcpy(cp, symname);
inode = autofs4_get_inode(dir->i_sb, ino);
+ if (!inode) {
+ kfree(cp);
+ if (!dentry->d_fsdata)
+ kfree(ino);
+ return -ENOMEM;
+ }
d_add(dentry, inode);
if (dir == dir->i_sb->s_root->d_inode)
@@ -757,6 +659,7 @@ static int autofs4_dir_symlink(struct inode *dir,
atomic_inc(&p_ino->count);
ino->inode = inode;
+ ino->u.symlink = cp;
dir->i_mtime = CURRENT_TIME;
return 0;
@@ -769,9 +672,8 @@ static int autofs4_dir_symlink(struct inode *dir,
* that the file no longer exists. However, doing that means that the
* VFS layer can turn the dentry into a negative dentry. We don't want
* this, because the unlink is probably the result of an expire.
- * We simply d_drop it and add it to a rehash candidates list in the
- * super block, which allows the dentry lookup to reuse it retaining
- * the flags, such as expire in progress, in case we're racing with expire.
+ * We simply d_drop it and add it to a expiring list in the super block,
+ * which allows the dentry lookup to check for an incomplete expire.
*
* If a process is blocked on the dentry waiting for the expire to finish,
* it will invalidate the dentry and try to mount with a new one.
@@ -801,9 +703,10 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
dir->i_mtime = CURRENT_TIME;
spin_lock(&dcache_lock);
- spin_lock(&sbi->rehash_lock);
- list_add(&ino->rehash, &sbi->rehash_list);
- spin_unlock(&sbi->rehash_lock);
+ spin_lock(&sbi->lookup_lock);
+ if (list_empty(&ino->expiring))
+ list_add(&ino->expiring, &sbi->expiring_list);
+ spin_unlock(&sbi->lookup_lock);
spin_lock(&dentry->d_lock);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
@@ -829,9 +732,10 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
spin_unlock(&dcache_lock);
return -ENOTEMPTY;
}
- spin_lock(&sbi->rehash_lock);
- list_add(&ino->rehash, &sbi->rehash_list);
- spin_unlock(&sbi->rehash_lock);
+ spin_lock(&sbi->lookup_lock);
+ if (list_empty(&ino->expiring))
+ list_add(&ino->expiring, &sbi->expiring_list);
+ spin_unlock(&sbi->lookup_lock);
spin_lock(&dentry->d_lock);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
@@ -866,10 +770,20 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
dentry, dentry->d_name.len, dentry->d_name.name);
ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555);
- if (ino == NULL)
- return -ENOSPC;
+ if (!ino)
+ return -ENOMEM;
+
+ spin_lock(&sbi->lookup_lock);
+ if (!list_empty(&ino->active))
+ list_del_init(&ino->active);
+ spin_unlock(&sbi->lookup_lock);
inode = autofs4_get_inode(dir->i_sb, ino);
+ if (!inode) {
+ if (!dentry->d_fsdata)
+ kfree(ino);
+ return -ENOMEM;
+ }
d_add(dentry, inode);
if (dir == dir->i_sb->s_root->d_inode)
@@ -922,44 +836,6 @@ static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, int __user
}
/*
- * Tells the daemon whether we need to reghost or not. Also, clears
- * the reghost_needed flag.
- */
-static inline int autofs4_ask_reghost(struct autofs_sb_info *sbi, int __user *p)
-{
- int status;
-
- DPRINTK("returning %d", sbi->needs_reghost);
-
- status = put_user(sbi->needs_reghost, p);
- if (status)
- return status;
-
- sbi->needs_reghost = 0;
- return 0;
-}
-
-/*
- * Enable / Disable reghosting ioctl() operation
- */
-static inline int autofs4_toggle_reghost(struct autofs_sb_info *sbi, int __user *p)
-{
- int status;
- int val;
-
- status = get_user(val, p);
-
- DPRINTK("reghost = %d", val);
-
- if (status)
- return status;
-
- /* turn on/off reghosting, with the val */
- sbi->reghost_enabled = val;
- return 0;
-}
-
-/*
* Tells the daemon whether it can umount the autofs mount.
*/
static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
@@ -1023,11 +899,6 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp,
case AUTOFS_IOC_SETTIMEOUT:
return autofs4_get_set_timeout(sbi, p);
- case AUTOFS_IOC_TOGGLEREGHOST:
- return autofs4_toggle_reghost(sbi, p);
- case AUTOFS_IOC_ASKREGHOST:
- return autofs4_ask_reghost(sbi, p);
-
case AUTOFS_IOC_ASKUMOUNT:
return autofs4_ask_umount(filp->f_path.mnt, p);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 75e5955c3f6..35216d18d8b 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -28,6 +28,12 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
{
struct autofs_wait_queue *wq, *nwq;
+ mutex_lock(&sbi->wq_mutex);
+ if (sbi->catatonic) {
+ mutex_unlock(&sbi->wq_mutex);
+ return;
+ }
+
DPRINTK("entering catatonic mode");
sbi->catatonic = 1;
@@ -36,13 +42,18 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
while (wq) {
nwq = wq->next;
wq->status = -ENOENT; /* Magic is gone - report failure */
- kfree(wq->name);
- wq->name = NULL;
+ if (wq->name.name) {
+ kfree(wq->name.name);
+ wq->name.name = NULL;
+ }
+ wq->wait_ctr--;
wake_up_interruptible(&wq->queue);
wq = nwq;
}
fput(sbi->pipe); /* Close the pipe */
sbi->pipe = NULL;
+ sbi->pipefd = -1;
+ mutex_unlock(&sbi->wq_mutex);
}
static int autofs4_write(struct file *file, const void *addr, int bytes)
@@ -89,10 +100,11 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
union autofs_packet_union v4_pkt;
union autofs_v5_packet_union v5_pkt;
} pkt;
+ struct file *pipe = NULL;
size_t pktsz;
DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d",
- wq->wait_queue_token, wq->len, wq->name, type);
+ wq->wait_queue_token, wq->name.len, wq->name.name, type);
memset(&pkt,0,sizeof pkt); /* For security reasons */
@@ -107,9 +119,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
pktsz = sizeof(*mp);
mp->wait_queue_token = wq->wait_queue_token;
- mp->len = wq->len;
- memcpy(mp->name, wq->name, wq->len);
- mp->name[wq->len] = '\0';
+ mp->len = wq->name.len;
+ memcpy(mp->name, wq->name.name, wq->name.len);
+ mp->name[wq->name.len] = '\0';
break;
}
case autofs_ptype_expire_multi:
@@ -119,9 +131,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
pktsz = sizeof(*ep);
ep->wait_queue_token = wq->wait_queue_token;
- ep->len = wq->len;
- memcpy(ep->name, wq->name, wq->len);
- ep->name[wq->len] = '\0';
+ ep->len = wq->name.len;
+ memcpy(ep->name, wq->name.name, wq->name.len);
+ ep->name[wq->name.len] = '\0';
break;
}
/*
@@ -138,9 +150,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
pktsz = sizeof(*packet);
packet->wait_queue_token = wq->wait_queue_token;
- packet->len = wq->len;
- memcpy(packet->name, wq->name, wq->len);
- packet->name[wq->len] = '\0';
+ packet->len = wq->name.len;
+ memcpy(packet->name, wq->name.name, wq->name.len);
+ packet->name[wq->name.len] = '\0';
packet->dev = wq->dev;
packet->ino = wq->ino;
packet->uid = wq->uid;
@@ -154,8 +166,19 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
return;
}
- if (autofs4_write(sbi->pipe, &pkt, pktsz))
- autofs4_catatonic_mode(sbi);
+ /* Check if we have become catatonic */
+ mutex_lock(&sbi->wq_mutex);
+ if (!sbi->catatonic) {
+ pipe = sbi->pipe;
+ get_file(pipe);
+ }
+ mutex_unlock(&sbi->wq_mutex);
+
+ if (pipe) {
+ if (autofs4_write(pipe, &pkt, pktsz))
+ autofs4_catatonic_mode(sbi);
+ fput(pipe);
+ }
}
static int autofs4_getpath(struct autofs_sb_info *sbi,
@@ -191,58 +214,55 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
}
static struct autofs_wait_queue *
-autofs4_find_wait(struct autofs_sb_info *sbi,
- char *name, unsigned int hash, unsigned int len)
+autofs4_find_wait(struct autofs_sb_info *sbi, struct qstr *qstr)
{
struct autofs_wait_queue *wq;
for (wq = sbi->queues; wq; wq = wq->next) {
- if (wq->hash == hash &&
- wq->len == len &&
- wq->name && !memcmp(wq->name, name, len))
+ if (wq->name.hash == qstr->hash &&
+ wq->name.len == qstr->len &&
+ wq->name.name &&
+ !memcmp(wq->name.name, qstr->name, qstr->len))
break;
}
return wq;
}
-int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
- enum autofs_notify notify)
+/*
+ * Check if we have a valid request.
+ * Returns
+ * 1 if the request should continue.
+ * In this case we can return an autofs_wait_queue entry if one is
+ * found or NULL to idicate a new wait needs to be created.
+ * 0 or a negative errno if the request shouldn't continue.
+ */
+static int validate_request(struct autofs_wait_queue **wait,
+ struct autofs_sb_info *sbi,
+ struct qstr *qstr,
+ struct dentry*dentry, enum autofs_notify notify)
{
- struct autofs_info *ino;
struct autofs_wait_queue *wq;
- char *name;
- unsigned int len = 0;
- unsigned int hash = 0;
- int status, type;
-
- /* In catatonic mode, we don't wait for nobody */
- if (sbi->catatonic)
- return -ENOENT;
-
- name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
- if (!name)
- return -ENOMEM;
+ struct autofs_info *ino;
- /* If this is a direct mount request create a dummy name */
- if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT))
- len = sprintf(name, "%p", dentry);
- else {
- len = autofs4_getpath(sbi, dentry, &name);
- if (!len) {
- kfree(name);
- return -ENOENT;
- }
+ /* Wait in progress, continue; */
+ wq = autofs4_find_wait(sbi, qstr);
+ if (wq) {
+ *wait = wq;
+ return 1;
}
- hash = full_name_hash(name, len);
- if (mutex_lock_interruptible(&sbi->wq_mutex)) {
- kfree(name);
- return -EINTR;
- }
+ *wait = NULL;
- wq = autofs4_find_wait(sbi, name, hash, len);
+ /* If we don't yet have any info this is a new request */
ino = autofs4_dentry_ino(dentry);
- if (!wq && ino && notify == NFY_NONE) {
+ if (!ino)
+ return 1;
+
+ /*
+ * If we've been asked to wait on an existing expire (NFY_NONE)
+ * but there is no wait in the queue ...
+ */
+ if (notify == NFY_NONE) {
/*
* Either we've betean the pending expire to post it's
* wait or it finished while we waited on the mutex.
@@ -253,13 +273,14 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
while (ino->flags & AUTOFS_INF_EXPIRING) {
mutex_unlock(&sbi->wq_mutex);
schedule_timeout_interruptible(HZ/10);
- if (mutex_lock_interruptible(&sbi->wq_mutex)) {
- kfree(name);
+ if (mutex_lock_interruptible(&sbi->wq_mutex))
return -EINTR;
+
+ wq = autofs4_find_wait(sbi, qstr);
+ if (wq) {
+ *wait = wq;
+ return 1;
}
- wq = autofs4_find_wait(sbi, name, hash, len);
- if (wq)
- break;
}
/*
@@ -267,18 +288,96 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
* cases where we wait on NFY_NONE neither depend on the
* return status of the wait.
*/
- if (!wq) {
+ return 0;
+ }
+
+ /*
+ * If we've been asked to trigger a mount and the request
+ * completed while we waited on the mutex ...
+ */
+ if (notify == NFY_MOUNT) {
+ /*
+ * If the dentry isn't hashed just go ahead and try the
+ * mount again with a new wait (not much else we can do).
+ */
+ if (!d_unhashed(dentry)) {
+ /*
+ * But if the dentry is hashed, that means that we
+ * got here through the revalidate path. Thus, we
+ * need to check if the dentry has been mounted
+ * while we waited on the wq_mutex. If it has,
+ * simply return success.
+ */
+ if (d_mountpoint(dentry))
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
+ enum autofs_notify notify)
+{
+ struct autofs_wait_queue *wq;
+ struct qstr qstr;
+ char *name;
+ int status, ret, type;
+
+ /* In catatonic mode, we don't wait for nobody */
+ if (sbi->catatonic)
+ return -ENOENT;
+
+ if (!dentry->d_inode) {
+ /*
+ * A wait for a negative dentry is invalid for certain
+ * cases. A direct or offset mount "always" has its mount
+ * point directory created and so the request dentry must
+ * be positive or the map key doesn't exist. The situation
+ * is very similar for indirect mounts except only dentrys
+ * in the root of the autofs file system may be negative.
+ */
+ if (sbi->type & (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET))
+ return -ENOENT;
+ else if (!IS_ROOT(dentry->d_parent))
+ return -ENOENT;
+ }
+
+ name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ /* If this is a direct mount request create a dummy name */
+ if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT))
+ qstr.len = sprintf(name, "%p", dentry);
+ else {
+ qstr.len = autofs4_getpath(sbi, dentry, &name);
+ if (!qstr.len) {
kfree(name);
- mutex_unlock(&sbi->wq_mutex);
- return 0;
+ return -ENOENT;
}
}
+ qstr.name = name;
+ qstr.hash = full_name_hash(name, qstr.len);
+
+ if (mutex_lock_interruptible(&sbi->wq_mutex)) {
+ kfree(qstr.name);
+ return -EINTR;
+ }
+
+ ret = validate_request(&wq, sbi, &qstr, dentry, notify);
+ if (ret <= 0) {
+ if (ret == 0)
+ mutex_unlock(&sbi->wq_mutex);
+ kfree(qstr.name);
+ return ret;
+ }
if (!wq) {
/* Create a new wait queue */
wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
if (!wq) {
- kfree(name);
+ kfree(qstr.name);
mutex_unlock(&sbi->wq_mutex);
return -ENOMEM;
}
@@ -289,9 +388,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
wq->next = sbi->queues;
sbi->queues = wq;
init_waitqueue_head(&wq->queue);
- wq->hash = hash;
- wq->name = name;
- wq->len = len;
+ memcpy(&wq->name, &qstr, sizeof(struct qstr));
wq->dev = autofs4_get_dev(sbi);
wq->ino = autofs4_get_ino(sbi);
wq->uid = current->uid;
@@ -299,7 +396,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
wq->pid = current->pid;
wq->tgid = current->tgid;
wq->status = -EINTR; /* Status return if interrupted */
- atomic_set(&wq->wait_ctr, 2);
+ wq->wait_ctr = 2;
mutex_unlock(&sbi->wq_mutex);
if (sbi->version < 5) {
@@ -319,28 +416,25 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
}
DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
- (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
+ (unsigned long) wq->wait_queue_token, wq->name.len,
+ wq->name.name, notify);
/* autofs4_notify_daemon() may block */
autofs4_notify_daemon(sbi, wq, type);
} else {
- atomic_inc(&wq->wait_ctr);
+ wq->wait_ctr++;
mutex_unlock(&sbi->wq_mutex);
- kfree(name);
+ kfree(qstr.name);
DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d",
- (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
- }
-
- /* wq->name is NULL if and only if the lock is already released */
-
- if (sbi->catatonic) {
- /* We might have slept, so check again for catatonic mode */
- wq->status = -ENOENT;
- kfree(wq->name);
- wq->name = NULL;
+ (unsigned long) wq->wait_queue_token, wq->name.len,
+ wq->name.name, notify);
}
- if (wq->name) {
+ /*
+ * wq->name.name is NULL iff the lock is already released
+ * or the mount has been made catatonic.
+ */
+ if (wq->name.name) {
/* Block all but "shutdown" signals while waiting */
sigset_t oldset;
unsigned long irqflags;
@@ -351,7 +445,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
recalc_sigpending();
spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
- wait_event_interruptible(wq->queue, wq->name == NULL);
+ wait_event_interruptible(wq->queue, wq->name.name == NULL);
spin_lock_irqsave(&current->sighand->siglock, irqflags);
current->blocked = oldset;
@@ -364,8 +458,10 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
status = wq->status;
/* Are we the last process to need status? */
- if (atomic_dec_and_test(&wq->wait_ctr))
+ mutex_lock(&sbi->wq_mutex);
+ if (!--wq->wait_ctr)
kfree(wq);
+ mutex_unlock(&sbi->wq_mutex);
return status;
}
@@ -387,16 +483,13 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
}
*wql = wq->next; /* Unlink from chain */
- mutex_unlock(&sbi->wq_mutex);
- kfree(wq->name);
- wq->name = NULL; /* Do not wait on this queue */
-
+ kfree(wq->name.name);
+ wq->name.name = NULL; /* Do not wait on this queue */
wq->status = status;
-
- if (atomic_dec_and_test(&wq->wait_ctr)) /* Is anyone still waiting for this guy? */
+ wake_up_interruptible(&wq->queue);
+ if (!--wq->wait_ctr)
kfree(wq);
- else
- wake_up_interruptible(&wq->queue);
+ mutex_unlock(&sbi->wq_mutex);
return 0;
}
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index f1c2ea8342f..5f1538c03b1 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -243,8 +243,7 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
return -EIO;
}
-static int bad_inode_permission(struct inode *inode, int mask,
- struct nameidata *nd)
+static int bad_inode_permission(struct inode *inode, int mask)
{
return -EIO;
}
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index e8717de3bab..02c6e62b72f 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -289,7 +289,7 @@ befs_destroy_inode(struct inode *inode)
kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
}
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct befs_inode_info *bi = (struct befs_inode_info *) foo;
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 70f5d3a8eed..7109e451abf 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -16,8 +16,9 @@ struct bfs_sb_info {
unsigned long si_freei;
unsigned long si_lf_eblk;
unsigned long si_lasti;
- unsigned long * si_imap;
- struct buffer_head * si_sbh; /* buffer header w/superblock */
+ unsigned long *si_imap;
+ struct buffer_head *si_sbh; /* buffer header w/superblock */
+ struct mutex bfs_lock;
};
/*
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 034950cb3cb..87ee5ccee34 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -32,16 +32,17 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
struct inode *dir = f->f_path.dentry->d_inode;
struct buffer_head *bh;
struct bfs_dirent *de;
+ struct bfs_sb_info *info = BFS_SB(dir->i_sb);
unsigned int offset;
int block;
- lock_kernel();
+ mutex_lock(&info->bfs_lock);
if (f->f_pos & (BFS_DIRENT_SIZE - 1)) {
printf("Bad f_pos=%08lx for %s:%08lx\n",
(unsigned long)f->f_pos,
dir->i_sb->s_id, dir->i_ino);
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
return -EBADF;
}
@@ -61,7 +62,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
le16_to_cpu(de->ino),
DT_UNKNOWN) < 0) {
brelse(bh);
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
return 0;
}
}
@@ -71,7 +72,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
brelse(bh);
}
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
return 0;
}
@@ -95,10 +96,10 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
inode = new_inode(s);
if (!inode)
return -ENOSPC;
- lock_kernel();
+ mutex_lock(&info->bfs_lock);
ino = find_first_zero_bit(info->si_imap, info->si_lasti);
if (ino > info->si_lasti) {
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
iput(inode);
return -ENOSPC;
}
@@ -125,10 +126,10 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
if (err) {
inode_dec_link_count(inode);
iput(inode);
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
return err;
}
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
d_instantiate(dentry, inode);
return 0;
}
@@ -139,22 +140,23 @@ static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry,
struct inode *inode = NULL;
struct buffer_head *bh;
struct bfs_dirent *de;
+ struct bfs_sb_info *info = BFS_SB(dir->i_sb);
if (dentry->d_name.len > BFS_NAMELEN)
return ERR_PTR(-ENAMETOOLONG);
- lock_kernel();
+ mutex_lock(&info->bfs_lock);
bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
if (bh) {
unsigned long ino = (unsigned long)le16_to_cpu(de->ino);
brelse(bh);
inode = bfs_iget(dir->i_sb, ino);
if (IS_ERR(inode)) {
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
return ERR_CAST(inode);
}
}
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
d_add(dentry, inode);
return NULL;
}
@@ -163,13 +165,14 @@ static int bfs_link(struct dentry *old, struct inode *dir,
struct dentry *new)
{
struct inode *inode = old->d_inode;
+ struct bfs_sb_info *info = BFS_SB(inode->i_sb);
int err;
- lock_kernel();
+ mutex_lock(&info->bfs_lock);
err = bfs_add_entry(dir, new->d_name.name, new->d_name.len,
inode->i_ino);
if (err) {
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
return err;
}
inc_nlink(inode);
@@ -177,19 +180,19 @@ static int bfs_link(struct dentry *old, struct inode *dir,
mark_inode_dirty(inode);
atomic_inc(&inode->i_count);
d_instantiate(new, inode);
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
return 0;
}
static int bfs_unlink(struct inode *dir, struct dentry *dentry)
{
int error = -ENOENT;
- struct inode *inode;
+ struct inode *inode = dentry->d_inode;
struct buffer_head *bh;
struct bfs_dirent *de;
+ struct bfs_sb_info *info = BFS_SB(inode->i_sb);
- inode = dentry->d_inode;
- lock_kernel();
+ mutex_lock(&info->bfs_lock);
bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
if (!bh || (le16_to_cpu(de->ino) != inode->i_ino))
goto out_brelse;
@@ -210,7 +213,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
out_brelse:
brelse(bh);
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
return error;
}
@@ -220,6 +223,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *old_inode, *new_inode;
struct buffer_head *old_bh, *new_bh;
struct bfs_dirent *old_de, *new_de;
+ struct bfs_sb_info *info;
int error = -ENOENT;
old_bh = new_bh = NULL;
@@ -227,7 +231,9 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (S_ISDIR(old_inode->i_mode))
return -EINVAL;
- lock_kernel();
+ info = BFS_SB(old_inode->i_sb);
+
+ mutex_lock(&info->bfs_lock);
old_bh = bfs_find_entry(old_dir,
old_dentry->d_name.name,
old_dentry->d_name.len, &old_de);
@@ -264,7 +270,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
error = 0;
end_rename:
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
brelse(old_bh);
brelse(new_bh);
return error;
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index b11e63e8fbc..6a021265f01 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -99,7 +99,7 @@ static int bfs_get_block(struct inode *inode, sector_t block,
return -ENOSPC;
/* The rest has to be protected against itself. */
- lock_kernel();
+ mutex_lock(&info->bfs_lock);
/*
* If the last data block for this file is the last allocated
@@ -151,7 +151,7 @@ static int bfs_get_block(struct inode *inode, sector_t block,
mark_buffer_dirty(sbh);
map_bh(bh_result, sb, phys);
out:
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
return err;
}
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 8db623838b5..0ed57b5ee01 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -104,6 +104,7 @@ static int bfs_write_inode(struct inode *inode, int unused)
struct bfs_inode *di;
struct buffer_head *bh;
int block, off;
+ struct bfs_sb_info *info = BFS_SB(inode->i_sb);
dprintf("ino=%08x\n", ino);
@@ -112,13 +113,13 @@ static int bfs_write_inode(struct inode *inode, int unused)
return -EIO;
}
- lock_kernel();
+ mutex_lock(&info->bfs_lock);
block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
bh = sb_bread(inode->i_sb, block);
if (!bh) {
printf("Unable to read inode %s:%08x\n",
inode->i_sb->s_id, ino);
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
return -EIO;
}
@@ -145,7 +146,7 @@ static int bfs_write_inode(struct inode *inode, int unused)
mark_buffer_dirty(bh);
brelse(bh);
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
return 0;
}
@@ -170,7 +171,7 @@ static void bfs_delete_inode(struct inode *inode)
inode->i_size = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
- lock_kernel();
+ mutex_lock(&info->bfs_lock);
mark_inode_dirty(inode);
block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
@@ -178,7 +179,7 @@ static void bfs_delete_inode(struct inode *inode)
if (!bh) {
printf("Unable to read inode %s:%08lx\n",
inode->i_sb->s_id, ino);
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
return;
}
off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
@@ -204,14 +205,16 @@ static void bfs_delete_inode(struct inode *inode)
info->si_lf_eblk = bi->i_sblock - 1;
mark_buffer_dirty(info->si_sbh);
}
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
clear_inode(inode);
}
static void bfs_put_super(struct super_block *s)
{
struct bfs_sb_info *info = BFS_SB(s);
+
brelse(info->si_sbh);
+ mutex_destroy(&info->bfs_lock);
kfree(info->si_imap);
kfree(info);
s->s_fs_info = NULL;
@@ -236,11 +239,13 @@ static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf)
static void bfs_write_super(struct super_block *s)
{
- lock_kernel();
+ struct bfs_sb_info *info = BFS_SB(s);
+
+ mutex_lock(&info->bfs_lock);
if (!(s->s_flags & MS_RDONLY))
- mark_buffer_dirty(BFS_SB(s)->si_sbh);
+ mark_buffer_dirty(info->si_sbh);
s->s_dirt = 0;
- unlock_kernel();
+ mutex_unlock(&info->bfs_lock);
}
static struct kmem_cache *bfs_inode_cachep;
@@ -259,7 +264,7 @@ static void bfs_destroy_inode(struct inode *inode)
kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
}
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct bfs_inode_info *bi = foo;
@@ -380,7 +385,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
struct bfs_inode *di;
int block = (i - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
int off = (i - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
- unsigned long sblock, eblock;
+ unsigned long eblock;
if (!off) {
brelse(bh);
@@ -399,7 +404,6 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
set_bit(i, info->si_imap);
info->si_freeb -= BFS_FILEBLOCKS(di);
- sblock = le32_to_cpu(di->i_sblock);
eblock = le32_to_cpu(di->i_eblock);
if (eblock > info->si_lf_eblk)
info->si_lf_eblk = eblock;
@@ -410,6 +414,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
s->s_dirt = 1;
}
dump_imap("read_super", s);
+ mutex_init(&info->bfs_lock);
return 0;
out:
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index ba4cddb92f1..204cfd1d767 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -444,12 +444,6 @@ beyond_if:
regs->gp = ex.a_gpvalue;
#endif
start_thread(regs, ex.a_entry, current->mm->start_stack);
- if (unlikely(current->ptrace & PT_PTRACED)) {
- if (current->ptrace & PT_TRACE_EXEC)
- ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
- else
- send_sig(SIGTRAP, current, 0);
- }
return 0;
}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d48ff5f370f..655ed8d30a8 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -131,6 +131,15 @@ static int padzero(unsigned long elf_bss)
#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
#endif
+#ifndef ELF_BASE_PLATFORM
+/*
+ * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
+ * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
+ * will be copied to the user stack in the same manner as AT_PLATFORM.
+ */
+#define ELF_BASE_PLATFORM NULL
+#endif
+
static int
create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
unsigned long load_addr, unsigned long interp_load_addr)
@@ -142,7 +151,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
elf_addr_t __user *envp;
elf_addr_t __user *sp;
elf_addr_t __user *u_platform;
+ elf_addr_t __user *u_base_platform;
const char *k_platform = ELF_PLATFORM;
+ const char *k_base_platform = ELF_BASE_PLATFORM;
int items;
elf_addr_t *elf_info;
int ei_index = 0;
@@ -172,6 +183,19 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
return -EFAULT;
}
+ /*
+ * If this architecture has a "base" platform capability
+ * string, copy it to userspace.
+ */
+ u_base_platform = NULL;
+ if (k_base_platform) {
+ size_t len = strlen(k_base_platform) + 1;
+
+ u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
+ if (__copy_to_user(u_base_platform, k_base_platform, len))
+ return -EFAULT;
+ }
+
/* Create the ELF interpreter info */
elf_info = (elf_addr_t *)current->mm->saved_auxv;
/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
@@ -204,10 +228,15 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
NEW_AUX_ENT(AT_GID, tsk->gid);
NEW_AUX_ENT(AT_EGID, tsk->egid);
NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
+ NEW_AUX_ENT(AT_EXECFN, bprm->exec);
if (k_platform) {
NEW_AUX_ENT(AT_PLATFORM,
(elf_addr_t)(unsigned long)u_platform);
}
+ if (k_base_platform) {
+ NEW_AUX_ENT(AT_BASE_PLATFORM,
+ (elf_addr_t)(unsigned long)u_base_platform);
+ }
if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
}
@@ -974,12 +1003,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
#endif
start_thread(regs, elf_entry, bprm->p);
- if (unlikely(current->ptrace & PT_PTRACED)) {
- if (current->ptrace & PT_TRACE_EXEC)
- ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
- else
- send_sig(SIGTRAP, current, 0);
- }
retval = 0;
out:
kfree(loc);
@@ -1477,7 +1500,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
const struct user_regset_view *view = task_user_regset_view(dump_task);
struct elf_thread_core_info *t;
struct elf_prpsinfo *psinfo;
- struct task_struct *g, *p;
+ struct core_thread *ct;
unsigned int i;
info->size = 0;
@@ -1516,31 +1539,26 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
/*
* Allocate a structure for each thread.
*/
- rcu_read_lock();
- do_each_thread(g, p)
- if (p->mm == dump_task->mm) {
- t = kzalloc(offsetof(struct elf_thread_core_info,
- notes[info->thread_notes]),
- GFP_ATOMIC);
- if (unlikely(!t)) {
- rcu_read_unlock();
- return 0;
- }
- t->task = p;
- if (p == dump_task || !info->thread) {
- t->next = info->thread;
- info->thread = t;
- } else {
- /*
- * Make sure to keep the original task at
- * the head of the list.
- */
- t->next = info->thread->next;
- info->thread->next = t;
- }
+ for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
+ t = kzalloc(offsetof(struct elf_thread_core_info,
+ notes[info->thread_notes]),
+ GFP_KERNEL);
+ if (unlikely(!t))
+ return 0;
+
+ t->task = ct->task;
+ if (ct->task == dump_task || !info->thread) {
+ t->next = info->thread;
+ info->thread = t;
+ } else {
+ /*
+ * Make sure to keep the original task at
+ * the head of the list.
+ */
+ t->next = info->thread->next;
+ info->thread->next = t;
}
- while_each_thread(g, p);
- rcu_read_unlock();
+ }
/*
* Now fill in each thread's information.
@@ -1687,7 +1705,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
{
#define NUM_NOTES 6
struct list_head *t;
- struct task_struct *g, *p;
info->notes = NULL;
info->prstatus = NULL;
@@ -1719,20 +1736,19 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
info->thread_status_size = 0;
if (signr) {
+ struct core_thread *ct;
struct elf_thread_status *ets;
- rcu_read_lock();
- do_each_thread(g, p)
- if (current->mm == p->mm && current != p) {
- ets = kzalloc(sizeof(*ets), GFP_ATOMIC);
- if (!ets) {
- rcu_read_unlock();
- return 0;
- }
- ets->thread = p;
- list_add(&ets->list, &info->thread_list);
- }
- while_each_thread(g, p);
- rcu_read_unlock();
+
+ for (ct = current->mm->core_state->dumper.next;
+ ct; ct = ct->next) {
+ ets = kzalloc(sizeof(*ets), GFP_KERNEL);
+ if (!ets)
+ return 0;
+
+ ets->thread = ct->task;
+ list_add(&ets->list, &info->thread_list);
+ }
+
list_for_each(t, &info->thread_list) {
int sz;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index d051a32e627..80c1f952ef7 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -433,13 +433,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
entryaddr = interp_params.entry_addr ?: exec_params.entry_addr;
start_thread(regs, entryaddr, current->mm->start_stack);
- if (unlikely(current->ptrace & PT_PTRACED)) {
- if (current->ptrace & PT_TRACE_EXEC)
- ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
- else
- send_sig(SIGTRAP, current, 0);
- }
-
retval = 0;
error:
@@ -477,6 +470,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
char __user *u_platform, *p;
long hwcap;
int loop;
+ int nr; /* reset for each csp adjustment */
/* we're going to shovel a whole load of stuff onto the stack */
#ifdef CONFIG_MMU
@@ -549,10 +543,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
/* force 16 byte _final_ alignment here for generality */
#define DLINFO_ITEMS 13
- nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0);
-#ifdef DLINFO_ARCH_ITEMS
- nitems += DLINFO_ARCH_ITEMS;
-#endif
+ nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0) + AT_VECTOR_SIZE_ARCH;
csp = sp;
sp -= nitems * 2 * sizeof(unsigned long);
@@ -564,39 +555,46 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
sp -= sp & 15UL;
/* put the ELF interpreter info on the stack */
-#define NEW_AUX_ENT(nr, id, val) \
+#define NEW_AUX_ENT(id, val) \
do { \
struct { unsigned long _id, _val; } __user *ent; \
\
ent = (void __user *) csp; \
__put_user((id), &ent[nr]._id); \
__put_user((val), &ent[nr]._val); \
+ nr++; \
} while (0)
+ nr = 0;
csp -= 2 * sizeof(unsigned long);
- NEW_AUX_ENT(0, AT_NULL, 0);
+ NEW_AUX_ENT(AT_NULL, 0);
if (k_platform) {
+ nr = 0;
csp -= 2 * sizeof(unsigned long);
- NEW_AUX_ENT(0, AT_PLATFORM,
+ NEW_AUX_ENT(AT_PLATFORM,
(elf_addr_t) (unsigned long) u_platform);
}
+ nr = 0;
csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long);
- NEW_AUX_ENT( 0, AT_HWCAP, hwcap);
- NEW_AUX_ENT( 1, AT_PAGESZ, PAGE_SIZE);
- NEW_AUX_ENT( 2, AT_CLKTCK, CLOCKS_PER_SEC);
- NEW_AUX_ENT( 3, AT_PHDR, exec_params->ph_addr);
- NEW_AUX_ENT( 4, AT_PHENT, sizeof(struct elf_phdr));
- NEW_AUX_ENT( 5, AT_PHNUM, exec_params->hdr.e_phnum);
- NEW_AUX_ENT( 6, AT_BASE, interp_params->elfhdr_addr);
- NEW_AUX_ENT( 7, AT_FLAGS, 0);
- NEW_AUX_ENT( 8, AT_ENTRY, exec_params->entry_addr);
- NEW_AUX_ENT( 9, AT_UID, (elf_addr_t) current->uid);
- NEW_AUX_ENT(10, AT_EUID, (elf_addr_t) current->euid);
- NEW_AUX_ENT(11, AT_GID, (elf_addr_t) current->gid);
- NEW_AUX_ENT(12, AT_EGID, (elf_addr_t) current->egid);
+ NEW_AUX_ENT(AT_HWCAP, hwcap);
+ NEW_AUX_ENT(AT_PAGESZ, PAGE_SIZE);
+ NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
+ NEW_AUX_ENT(AT_PHDR, exec_params->ph_addr);
+ NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
+ NEW_AUX_ENT(AT_PHNUM, exec_params->hdr.e_phnum);
+ NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr);
+ NEW_AUX_ENT(AT_FLAGS, 0);
+ NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr);
+ NEW_AUX_ENT(AT_UID, (elf_addr_t) current->uid);
+ NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->euid);
+ NEW_AUX_ENT(AT_GID, (elf_addr_t) current->gid);
+ NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->egid);
#ifdef ARCH_DLINFO
+ nr = 0;
+ csp -= AT_VECTOR_SIZE_ARCH * 2 * sizeof(unsigned long);
+
/* ARCH_DLINFO must come last so platform specific code can enforce
* special alignment requirements on the AUXV if necessary (eg. PPC).
*/
@@ -1573,7 +1571,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
struct memelfnote *notes = NULL;
struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
- struct task_struct *g, *p;
LIST_HEAD(thread_list);
struct list_head *t;
elf_fpregset_t *fpu = NULL;
@@ -1622,20 +1619,19 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
#endif
if (signr) {
+ struct core_thread *ct;
struct elf_thread_status *tmp;
- rcu_read_lock();
- do_each_thread(g,p)
- if (current->mm == p->mm && current != p) {
- tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
- if (!tmp) {
- rcu_read_unlock();
- goto cleanup;
- }
- tmp->thread = p;
- list_add(&tmp->list, &thread_list);
- }
- while_each_thread(g,p);
- rcu_read_unlock();
+
+ for (ct = current->mm->core_state->dumper.next;
+ ct; ct = ct->next) {
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ goto cleanup;
+
+ tmp->thread = ct->task;
+ list_add(&tmp->list, &thread_list);
+ }
+
list_for_each(t, &thread_list) {
struct elf_thread_status *tmp;
int sz;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 2cb1acda3a8..56372ecf169 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -920,9 +920,6 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
start_thread(regs, start_addr, current->mm->start_stack);
- if (current->ptrace & PT_PTRACED)
- send_sig(SIGTRAP, current, 0);
-
return 0;
}
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 7191306367c..756205314c2 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -27,6 +27,7 @@
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/syscalls.h>
+#include <linux/fs.h>
#include <asm/uaccess.h>
@@ -535,31 +536,16 @@ static ssize_t
bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
{
Node *e = file->f_path.dentry->d_inode->i_private;
- loff_t pos = *ppos;
ssize_t res;
char *page;
- int len;
if (!(page = (char*) __get_free_page(GFP_KERNEL)))
return -ENOMEM;
entry_status(e, page);
- len = strlen(page);
- res = -EINVAL;
- if (pos < 0)
- goto out;
- res = 0;
- if (pos >= len)
- goto out;
- if (len < pos + nbytes)
- nbytes = len - pos;
- res = -EFAULT;
- if (copy_to_user(buf, page + pos, nbytes))
- goto out;
- *ppos = pos + nbytes;
- res = nbytes;
-out:
+ res = simple_read_from_buffer(buf, nbytes, ppos, page, strlen(page));
+
free_page((unsigned long) page);
return res;
}
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index fdc36bfd6a7..68be580ba28 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -274,8 +274,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
map_hpux_gateway_page(current,current->mm);
start_thread_som(regs, som_entry, bprm->p);
- if (current->ptrace & PT_PTRACED)
- send_sig(SIGTRAP, current, 0);
return 0;
/* error cleanup */
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 63e2ee63058..c3e174b35fe 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -705,7 +705,6 @@ void __init bio_integrity_init_slab(void)
bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
SLAB_HWCACHE_ALIGN|SLAB_PANIC);
}
-EXPORT_SYMBOL(bio_integrity_init_slab);
static int __init integrity_init(void)
{
diff --git a/fs/bio.c b/fs/bio.c
index 88322b066ac..8000e2fa16c 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -77,11 +77,8 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
*/
bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
- if (bvl) {
- struct biovec_slab *bp = bvec_slabs + *idx;
-
- memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec));
- }
+ if (bvl)
+ memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
return bvl;
}
@@ -149,7 +146,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
goto out;
}
bio->bi_flags |= idx << BIO_POOL_OFFSET;
- bio->bi_max_vecs = bvec_slabs[idx].nr_vecs;
+ bio->bi_max_vecs = bvec_nr_vecs(idx);
}
bio->bi_io_vec = bvl;
}
@@ -721,12 +718,8 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
const int local_nr_pages = end - start;
const int page_limit = cur_page + local_nr_pages;
- down_read(&current->mm->mmap_sem);
- ret = get_user_pages(current, current->mm, uaddr,
- local_nr_pages,
- write_to_vm, 0, &pages[cur_page], NULL);
- up_read(&current->mm->mmap_sem);
-
+ ret = get_user_pages_fast(uaddr, local_nr_pages,
+ write_to_vm, &pages[cur_page]);
if (ret < local_nr_pages) {
ret = -EFAULT;
goto out_unmap;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 10d8a0aa871..aff54219e04 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -271,7 +271,7 @@ static void bdev_destroy_inode(struct inode *inode)
kmem_cache_free(bdev_cachep, bdi);
}
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
{
struct bdev_inode *ei = (struct bdev_inode *) foo;
struct block_device *bdev = &ei->bdev;
@@ -941,8 +941,10 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
* hooks: /n/, see "layering violations".
*/
ret = devcgroup_inode_permission(bdev->bd_inode, perm);
- if (ret != 0)
+ if (ret != 0) {
+ bdput(bdev);
return ret;
+ }
ret = -ENXIO;
file->f_mapping = bdev->bd_inode->i_mapping;
@@ -1234,6 +1236,7 @@ fail:
bdev = ERR_PTR(error);
goto out;
}
+EXPORT_SYMBOL(lookup_bdev);
/**
* open_bdev_excl - open a block device by name and set it up for use
diff --git a/fs/buffer.c b/fs/buffer.c
index d48caee12e2..38653e36e22 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -580,7 +580,7 @@ EXPORT_SYMBOL(mark_buffer_async_write);
/*
* The buffer's backing address_space's private_lock must be held
*/
-static inline void __remove_assoc_queue(struct buffer_head *bh)
+static void __remove_assoc_queue(struct buffer_head *bh)
{
list_del_init(&bh->b_assoc_buffers);
WARN_ON(!bh->b_assoc_map);
@@ -706,7 +706,7 @@ static int __set_page_dirty(struct page *page,
if (TestSetPageDirty(page))
return 0;
- write_lock_irq(&mapping->tree_lock);
+ spin_lock_irq(&mapping->tree_lock);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page));
@@ -719,7 +719,7 @@ static int __set_page_dirty(struct page *page,
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
- write_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
return 1;
@@ -1214,8 +1214,7 @@ void __brelse(struct buffer_head * buf)
put_bh(buf);
return;
}
- printk(KERN_ERR "VFS: brelse: Trying to free free buffer\n");
- WARN_ON(1);
+ WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
}
/*
@@ -1721,7 +1720,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
*/
if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
lock_buffer(bh);
- } else if (test_set_buffer_locked(bh)) {
+ } else if (!trylock_buffer(bh)) {
redirty_page_for_writepage(wbc, page);
continue;
}
@@ -2097,6 +2096,52 @@ int generic_write_end(struct file *file, struct address_space *mapping,
EXPORT_SYMBOL(generic_write_end);
/*
+ * block_is_partially_uptodate checks whether buffers within a page are
+ * uptodate or not.
+ *
+ * Returns true if all buffers which correspond to a file portion
+ * we want to read are uptodate.
+ */
+int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
+ unsigned long from)
+{
+ struct inode *inode = page->mapping->host;
+ unsigned block_start, block_end, blocksize;
+ unsigned to;
+ struct buffer_head *bh, *head;
+ int ret = 1;
+
+ if (!page_has_buffers(page))
+ return 0;
+
+ blocksize = 1 << inode->i_blkbits;
+ to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
+ to = from + to;
+ if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
+ return 0;
+
+ head = page_buffers(page);
+ bh = head;
+ block_start = 0;
+ do {
+ block_end = block_start + blocksize;
+ if (block_end > from && block_start < to) {
+ if (!buffer_uptodate(bh)) {
+ ret = 0;
+ break;
+ }
+ if (block_end >= to)
+ break;
+ }
+ block_start = block_end;
+ bh = bh->b_this_page;
+ } while (bh != head);
+
+ return ret;
+}
+EXPORT_SYMBOL(block_is_partially_uptodate);
+
+/*
* Generic "read page" function for block devices that have the normal
* get_block functionality. This is most of the block device filesystems.
* Reads the page asynchronously --- the unlock_buffer() and
@@ -2955,7 +3000,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
if (rw == SWRITE || rw == SWRITE_SYNC)
lock_buffer(bh);
- else if (test_set_buffer_locked(bh))
+ else if (!trylock_buffer(bh))
continue;
if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) {
@@ -3272,7 +3317,7 @@ int bh_submit_read(struct buffer_head *bh)
EXPORT_SYMBOL(bh_submit_read);
static void
-init_buffer_head(struct kmem_cache *cachep, void *data)
+init_buffer_head(void *data)
{
struct buffer_head *bh = data;
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 1f3465201fd..f5d0083e09f 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,11 @@
+Version 1.54
+------------
+Fix premature write failure on congested networks (we would give up
+on EAGAIN from the socket too quickly on large writes).
+Cifs_mkdir and cifs_create now respect the setgid bit on parent dir.
+Fix endian problems in acl (mode from/to cifs acl) on bigendian
+architectures.
+
Version 1.53
------------
DFS support added (Microsoft Distributed File System client support needed
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index f58e41d3ba4..5fabd2caf93 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -400,7 +400,7 @@ asn1_oid_decode(struct asn1_ctx *ctx,
size = eoc - ctx->pointer + 1;
/* first subid actually encodes first two subids */
- if (size < 2 || size > ULONG_MAX/sizeof(unsigned long))
+ if (size < 2 || size > UINT_MAX/sizeof(unsigned long))
return 0;
*oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
@@ -483,6 +483,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
asn1_open(&ctx, security_blob, length);
+ /* GSSAPI header */
if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
cFYI(1, ("Error decoding negTokenInit header"));
return 0;
@@ -490,153 +491,142 @@ decode_negTokenInit(unsigned char *security_blob, int length,
|| (tag != ASN1_EOC)) {
cFYI(1, ("cls = %d con = %d tag = %d", cls, con, tag));
return 0;
- } else {
- /* remember to free obj->oid */
- rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
- if (rc) {
- if ((tag == ASN1_OJI) && (cls == ASN1_PRI)) {
- rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
- if (rc) {
- rc = compare_oid(oid, oidlen,
- SPNEGO_OID,
- SPNEGO_OID_LEN);
- kfree(oid);
- }
- } else
- rc = 0;
- }
+ }
- if (!rc) {
- cFYI(1, ("Error decoding negTokenInit header"));
- return 0;
- }
+ /* Check for SPNEGO OID -- remember to free obj->oid */
+ rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
+ if (rc) {
+ if ((tag == ASN1_OJI) && (con == ASN1_PRI) &&
+ (cls == ASN1_UNI)) {
+ rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
+ if (rc) {
+ rc = compare_oid(oid, oidlen, SPNEGO_OID,
+ SPNEGO_OID_LEN);
+ kfree(oid);
+ }
+ } else
+ rc = 0;
+ }
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- cFYI(1, ("Error decoding negTokenInit"));
- return 0;
- } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
- || (tag != ASN1_EOC)) {
- cFYI(1,
- ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
- cls, con, tag, end, *end));
- return 0;
- }
+ /* SPNEGO OID not present or garbled -- bail out */
+ if (!rc) {
+ cFYI(1, ("Error decoding negTokenInit header"));
+ return 0;
+ }
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- cFYI(1, ("Error decoding negTokenInit"));
- return 0;
- } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
- || (tag != ASN1_SEQ)) {
- cFYI(1,
- ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
- cls, con, tag, end, *end));
- return 0;
- }
+ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+ cFYI(1, ("Error decoding negTokenInit"));
+ return 0;
+ } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
+ || (tag != ASN1_EOC)) {
+ cFYI(1,
+ ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
+ cls, con, tag, end, *end));
+ return 0;
+ }
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- cFYI(1, ("Error decoding 2nd part of negTokenInit"));
- return 0;
- } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
- || (tag != ASN1_EOC)) {
- cFYI(1,
- ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
- cls, con, tag, end, *end));
- return 0;
- }
+ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+ cFYI(1, ("Error decoding negTokenInit"));
+ return 0;
+ } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
+ || (tag != ASN1_SEQ)) {
+ cFYI(1,
+ ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
+ cls, con, tag, end, *end));
+ return 0;
+ }
- if (asn1_header_decode
- (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
- cFYI(1, ("Error decoding 2nd part of negTokenInit"));
- return 0;
- } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
- || (tag != ASN1_SEQ)) {
- cFYI(1,
- ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
- cls, con, tag, end, *end));
- return 0;
- }
+ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+ cFYI(1, ("Error decoding 2nd part of negTokenInit"));
+ return 0;
+ } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
+ || (tag != ASN1_EOC)) {
+ cFYI(1,
+ ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
+ cls, con, tag, end, *end));
+ return 0;
+ }
- while (!asn1_eoc_decode(&ctx, sequence_end)) {
- rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
- if (!rc) {
- cFYI(1,
- ("Error decoding negTokenInit hdr exit2"));
- return 0;
- }
- if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
- if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) {
-
- cFYI(1,
- ("OID len = %d oid = 0x%lx 0x%lx "
- "0x%lx 0x%lx",
- oidlen, *oid, *(oid + 1),
- *(oid + 2), *(oid + 3)));
-
- if (compare_oid(oid, oidlen,
- MSKRB5_OID,
- MSKRB5_OID_LEN))
- use_kerberos = true;
- else if (compare_oid(oid, oidlen,
- KRB5_OID,
- KRB5_OID_LEN))
- use_kerberos = true;
- else if (compare_oid(oid, oidlen,
- NTLMSSP_OID,
- NTLMSSP_OID_LEN))
- use_ntlmssp = true;
-
- kfree(oid);
- }
- } else {
- cFYI(1, ("Should be an oid what is going on?"));
- }
- }
+ if (asn1_header_decode
+ (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
+ cFYI(1, ("Error decoding 2nd part of negTokenInit"));
+ return 0;
+ } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
+ || (tag != ASN1_SEQ)) {
+ cFYI(1,
+ ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
+ cls, con, tag, end, *end));
+ return 0;
+ }
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- cFYI(1,
- ("Error decoding last part negTokenInit exit3"));
- return 0;
- } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
- /* tag = 3 indicating mechListMIC */
+ while (!asn1_eoc_decode(&ctx, sequence_end)) {
+ rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
+ if (!rc) {
cFYI(1,
- ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)",
- cls, con, tag, end, *end));
+ ("Error decoding negTokenInit hdr exit2"));
return 0;
}
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- cFYI(1,
- ("Error decoding last part negTokenInit exit5"));
- return 0;
- } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
- || (tag != ASN1_SEQ)) {
- cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)",
- cls, con, tag, end, *end));
+ if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
+ if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) {
+
+ cFYI(1, ("OID len = %d oid = 0x%lx 0x%lx "
+ "0x%lx 0x%lx", oidlen, *oid,
+ *(oid + 1), *(oid + 2), *(oid + 3)));
+
+ if (compare_oid(oid, oidlen, MSKRB5_OID,
+ MSKRB5_OID_LEN))
+ use_kerberos = true;
+ else if (compare_oid(oid, oidlen, KRB5_OID,
+ KRB5_OID_LEN))
+ use_kerberos = true;
+ else if (compare_oid(oid, oidlen, NTLMSSP_OID,
+ NTLMSSP_OID_LEN))
+ use_ntlmssp = true;
+
+ kfree(oid);
+ }
+ } else {
+ cFYI(1, ("Should be an oid what is going on?"));
}
+ }
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- cFYI(1,
- ("Error decoding last part negTokenInit exit 7"));
- return 0;
- } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
- cFYI(1,
- ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)",
- cls, con, tag, end, *end));
- return 0;
- }
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- cFYI(1,
- ("Error decoding last part negTokenInit exit9"));
- return 0;
- } else if ((cls != ASN1_UNI) || (con != ASN1_PRI)
- || (tag != ASN1_GENSTR)) {
- cFYI(1,
- ("Exit10 cls = %d con = %d tag = %d end = %p (%d)",
- cls, con, tag, end, *end));
- return 0;
- }
- cFYI(1, ("Need to call asn1_octets_decode() function for %s",
- ctx.pointer)); /* is this UTF-8 or ASCII? */
+ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+ cFYI(1, ("Error decoding last part negTokenInit exit3"));
+ return 0;
+ } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
+ /* tag = 3 indicating mechListMIC */
+ cFYI(1, ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)",
+ cls, con, tag, end, *end));
+ return 0;
+ }
+ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+ cFYI(1, ("Error decoding last part negTokenInit exit5"));
+ return 0;
+ } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
+ || (tag != ASN1_SEQ)) {
+ cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)",
+ cls, con, tag, end, *end));
+ }
+
+ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+ cFYI(1, ("Error decoding last part negTokenInit exit 7"));
+ return 0;
+ } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
+ cFYI(1, ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)",
+ cls, con, tag, end, *end));
+ return 0;
+ }
+ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+ cFYI(1, ("Error decoding last part negTokenInit exit9"));
+ return 0;
+ } else if ((cls != ASN1_UNI) || (con != ASN1_PRI)
+ || (tag != ASN1_GENSTR)) {
+ cFYI(1, ("Exit10 cls = %d con = %d tag = %d end = %p (%d)",
+ cls, con, tag, end, *end));
+ return 0;
}
+ cFYI(1, ("Need to call asn1_octets_decode() function for %s",
+ ctx.pointer)); /* is this UTF-8 or ASCII? */
if (use_kerberos)
*secType = Kerberos;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index cc950f69e51..69a12aae91d 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -79,27 +79,25 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
spin_lock(&GlobalMid_Lock);
list_for_each(tmp, &server->pending_mid_q) {
mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
- if (mid_entry) {
- cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
- mid_entry->midState,
- (int)mid_entry->command,
- mid_entry->pid,
- mid_entry->tsk,
- mid_entry->mid));
+ cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
+ mid_entry->midState,
+ (int)mid_entry->command,
+ mid_entry->pid,
+ mid_entry->tsk,
+ mid_entry->mid));
#ifdef CONFIG_CIFS_STATS2
- cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld",
- mid_entry->largeBuf,
- mid_entry->resp_buf,
- mid_entry->when_received,
- jiffies));
+ cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld",
+ mid_entry->largeBuf,
+ mid_entry->resp_buf,
+ mid_entry->when_received,
+ jiffies));
#endif /* STATS2 */
- cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
- mid_entry->multiEnd));
- if (mid_entry->resp_buf) {
- cifs_dump_detail(mid_entry->resp_buf);
- cifs_dump_mem("existing buf: ",
- mid_entry->resp_buf, 62);
- }
+ cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
+ mid_entry->multiEnd));
+ if (mid_entry->resp_buf) {
+ cifs_dump_detail(mid_entry->resp_buf);
+ cifs_dump_mem("existing buf: ",
+ mid_entry->resp_buf, 62);
}
}
spin_unlock(&GlobalMid_Lock);
@@ -107,9 +105,7 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
#endif /* CONFIG_CIFS_DEBUG2 */
#ifdef CONFIG_PROC_FS
-static int
-cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
- int count, int *eof, void *data)
+static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
{
struct list_head *tmp;
struct list_head *tmp1;
@@ -117,23 +113,13 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
struct cifsSesInfo *ses;
struct cifsTconInfo *tcon;
int i;
- int length = 0;
- char *original_buf = buf;
-
- *beginBuffer = buf + offset;
- length =
- sprintf(buf,
+ seq_puts(m,
"Display Internal CIFS Data Structures for Debugging\n"
"---------------------------------------------------\n");
- buf += length;
- length = sprintf(buf, "CIFS Version %s\n", CIFS_VERSION);
- buf += length;
- length = sprintf(buf,
- "Active VFS Requests: %d\n", GlobalTotalActiveXid);
- buf += length;
- length = sprintf(buf, "Servers:");
- buf += length;
+ seq_printf(m, "CIFS Version %s\n", CIFS_VERSION);
+ seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
+ seq_printf(m, "Servers:");
i = 0;
read_lock(&GlobalSMBSeslock);
@@ -142,11 +128,10 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList);
if ((ses->serverDomain == NULL) || (ses->serverOS == NULL) ||
(ses->serverNOS == NULL)) {
- buf += sprintf(buf, "\nentry for %s not fully "
+ seq_printf(m, "\nentry for %s not fully "
"displayed\n\t", ses->serverName);
} else {
- length =
- sprintf(buf,
+ seq_printf(m,
"\n%d) Name: %s Domain: %s Mounts: %d OS:"
" %s \n\tNOS: %s\tCapability: 0x%x\n\tSMB"
" session status: %d\t",
@@ -154,10 +139,9 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
atomic_read(&ses->inUse),
ses->serverOS, ses->serverNOS,
ses->capabilities, ses->status);
- buf += length;
}
if (ses->server) {
- buf += sprintf(buf, "TCP status: %d\n\tLocal Users To "
+ seq_printf(m, "TCP status: %d\n\tLocal Users To "
"Server: %d SecMode: 0x%x Req On Wire: %d",
ses->server->tcpStatus,
atomic_read(&ses->server->socketUseCount),
@@ -165,41 +149,34 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
atomic_read(&ses->server->inFlight));
#ifdef CONFIG_CIFS_STATS2
- buf += sprintf(buf, " In Send: %d In MaxReq Wait: %d",
+ seq_printf(m, " In Send: %d In MaxReq Wait: %d",
atomic_read(&ses->server->inSend),
atomic_read(&ses->server->num_waiters));
#endif
- length = sprintf(buf, "\nMIDs:\n");
- buf += length;
+ seq_puts(m, "\nMIDs:\n");
spin_lock(&GlobalMid_Lock);
list_for_each(tmp1, &ses->server->pending_mid_q) {
mid_entry = list_entry(tmp1, struct
mid_q_entry,
qhead);
- if (mid_entry) {
- length = sprintf(buf,
- "State: %d com: %d pid:"
- " %d tsk: %p mid %d\n",
- mid_entry->midState,
- (int)mid_entry->command,
- mid_entry->pid,
- mid_entry->tsk,
- mid_entry->mid);
- buf += length;
- }
+ seq_printf(m, "State: %d com: %d pid:"
+ " %d tsk: %p mid %d\n",
+ mid_entry->midState,
+ (int)mid_entry->command,
+ mid_entry->pid,
+ mid_entry->tsk,
+ mid_entry->mid);
}
spin_unlock(&GlobalMid_Lock);
}
}
read_unlock(&GlobalSMBSeslock);
- sprintf(buf, "\n");
- buf++;
+ seq_putc(m, '\n');
- length = sprintf(buf, "Shares:");
- buf += length;
+ seq_puts(m, "Shares:");
i = 0;
read_lock(&GlobalSMBSeslock);
@@ -208,62 +185,52 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
i++;
tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
- length = sprintf(buf, "\n%d) %s Uses: %d ", i,
+ seq_printf(m, "\n%d) %s Uses: %d ", i,
tcon->treeName, atomic_read(&tcon->useCount));
- buf += length;
if (tcon->nativeFileSystem) {
- length = sprintf(buf, "Type: %s ",
+ seq_printf(m, "Type: %s ",
tcon->nativeFileSystem);
- buf += length;
}
- length = sprintf(buf, "DevInfo: 0x%x Attributes: 0x%x"
+ seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x"
"\nPathComponentMax: %d Status: %d",
le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
le32_to_cpu(tcon->fsAttrInfo.Attributes),
le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
tcon->tidStatus);
- buf += length;
if (dev_type == FILE_DEVICE_DISK)
- length = sprintf(buf, " type: DISK ");
+ seq_puts(m, " type: DISK ");
else if (dev_type == FILE_DEVICE_CD_ROM)
- length = sprintf(buf, " type: CDROM ");
+ seq_puts(m, " type: CDROM ");
else
- length =
- sprintf(buf, " type: %d ", dev_type);
- buf += length;
- if (tcon->tidStatus == CifsNeedReconnect) {
- buf += sprintf(buf, "\tDISCONNECTED ");
- length += 14;
- }
+ seq_printf(m, " type: %d ", dev_type);
+
+ if (tcon->tidStatus == CifsNeedReconnect)
+ seq_puts(m, "\tDISCONNECTED ");
}
read_unlock(&GlobalSMBSeslock);
- length = sprintf(buf, "\n");
- buf += length;
+ seq_putc(m, '\n');
/* BB add code to dump additional info such as TCP session info now */
- /* Now calculate total size of returned data */
- length = buf - original_buf;
-
- if (offset + count >= length)
- *eof = 1;
- if (length < offset) {
- *eof = 1;
- return 0;
- } else {
- length = length - offset;
- }
- if (length > count)
- length = count;
+ return 0;
+}
- return length;
+static int cifs_debug_data_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, cifs_debug_data_proc_show, NULL);
}
-#ifdef CONFIG_CIFS_STATS
+static const struct file_operations cifs_debug_data_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = cifs_debug_data_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
-static int
-cifs_stats_write(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
+#ifdef CONFIG_CIFS_STATS
+static ssize_t cifs_stats_proc_write(struct file *file,
+ const char __user *buffer, size_t count, loff_t *ppos)
{
char c;
int rc;
@@ -307,236 +274,132 @@ cifs_stats_write(struct file *file, const char __user *buffer,
return count;
}
-static int
-cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
- int count, int *eof, void *data)
+static int cifs_stats_proc_show(struct seq_file *m, void *v)
{
- int item_length, i, length;
+ int i;
struct list_head *tmp;
struct cifsTconInfo *tcon;
- *beginBuffer = buf + offset;
-
- length = sprintf(buf,
+ seq_printf(m,
"Resources in use\nCIFS Session: %d\n",
sesInfoAllocCount.counter);
- buf += length;
- item_length =
- sprintf(buf, "Share (unique mount targets): %d\n",
+ seq_printf(m, "Share (unique mount targets): %d\n",
tconInfoAllocCount.counter);
- length += item_length;
- buf += item_length;
- item_length =
- sprintf(buf, "SMB Request/Response Buffer: %d Pool size: %d\n",
+ seq_printf(m, "SMB Request/Response Buffer: %d Pool size: %d\n",
bufAllocCount.counter,
cifs_min_rcv + tcpSesAllocCount.counter);
- length += item_length;
- buf += item_length;
- item_length =
- sprintf(buf, "SMB Small Req/Resp Buffer: %d Pool size: %d\n",
+ seq_printf(m, "SMB Small Req/Resp Buffer: %d Pool size: %d\n",
smBufAllocCount.counter, cifs_min_small);
- length += item_length;
- buf += item_length;
#ifdef CONFIG_CIFS_STATS2
- item_length = sprintf(buf, "Total Large %d Small %d Allocations\n",
+ seq_printf(m, "Total Large %d Small %d Allocations\n",
atomic_read(&totBufAllocCount),
atomic_read(&totSmBufAllocCount));
- length += item_length;
- buf += item_length;
#endif /* CONFIG_CIFS_STATS2 */
- item_length =
- sprintf(buf, "Operations (MIDs): %d\n",
- midCount.counter);
- length += item_length;
- buf += item_length;
- item_length = sprintf(buf,
+ seq_printf(m, "Operations (MIDs): %d\n", midCount.counter);
+ seq_printf(m,
"\n%d session %d share reconnects\n",
tcpSesReconnectCount.counter, tconInfoReconnectCount.counter);
- length += item_length;
- buf += item_length;
- item_length = sprintf(buf,
+ seq_printf(m,
"Total vfs operations: %d maximum at one time: %d\n",
GlobalCurrentXid, GlobalMaxActiveXid);
- length += item_length;
- buf += item_length;
i = 0;
read_lock(&GlobalSMBSeslock);
list_for_each(tmp, &GlobalTreeConnectionList) {
i++;
tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
- item_length = sprintf(buf, "\n%d) %s", i, tcon->treeName);
- buf += item_length;
- length += item_length;
- if (tcon->tidStatus == CifsNeedReconnect) {
- buf += sprintf(buf, "\tDISCONNECTED ");
- length += 14;
- }
- item_length = sprintf(buf, "\nSMBs: %d Oplock Breaks: %d",
+ seq_printf(m, "\n%d) %s", i, tcon->treeName);
+ if (tcon->tidStatus == CifsNeedReconnect)
+ seq_puts(m, "\tDISCONNECTED ");
+ seq_printf(m, "\nSMBs: %d Oplock Breaks: %d",
atomic_read(&tcon->num_smbs_sent),
atomic_read(&tcon->num_oplock_brks));
- buf += item_length;
- length += item_length;
- item_length = sprintf(buf, "\nReads: %d Bytes: %lld",
+ seq_printf(m, "\nReads: %d Bytes: %lld",
atomic_read(&tcon->num_reads),
(long long)(tcon->bytes_read));
- buf += item_length;
- length += item_length;
- item_length = sprintf(buf, "\nWrites: %d Bytes: %lld",
+ seq_printf(m, "\nWrites: %d Bytes: %lld",
atomic_read(&tcon->num_writes),
(long long)(tcon->bytes_written));
- buf += item_length;
- length += item_length;
- item_length = sprintf(buf,
+ seq_printf(m,
"\nLocks: %d HardLinks: %d Symlinks: %d",
atomic_read(&tcon->num_locks),
atomic_read(&tcon->num_hardlinks),
atomic_read(&tcon->num_symlinks));
- buf += item_length;
- length += item_length;
- item_length = sprintf(buf, "\nOpens: %d Closes: %d Deletes: %d",
+ seq_printf(m, "\nOpens: %d Closes: %d Deletes: %d",
atomic_read(&tcon->num_opens),
atomic_read(&tcon->num_closes),
atomic_read(&tcon->num_deletes));
- buf += item_length;
- length += item_length;
- item_length = sprintf(buf, "\nMkdirs: %d Rmdirs: %d",
+ seq_printf(m, "\nMkdirs: %d Rmdirs: %d",
atomic_read(&tcon->num_mkdirs),
atomic_read(&tcon->num_rmdirs));
- buf += item_length;
- length += item_length;
- item_length = sprintf(buf, "\nRenames: %d T2 Renames %d",
+ seq_printf(m, "\nRenames: %d T2 Renames %d",
atomic_read(&tcon->num_renames),
atomic_read(&tcon->num_t2renames));
- buf += item_length;
- length += item_length;
- item_length = sprintf(buf, "\nFindFirst: %d FNext %d FClose %d",
+ seq_printf(m, "\nFindFirst: %d FNext %d FClose %d",
atomic_read(&tcon->num_ffirst),
atomic_read(&tcon->num_fnext),
atomic_read(&tcon->num_fclose));
- buf += item_length;
- length += item_length;
}
read_unlock(&GlobalSMBSeslock);
- buf += sprintf(buf, "\n");
- length++;
-
- if (offset + count >= length)
- *eof = 1;
- if (length < offset) {
- *eof = 1;
- return 0;
- } else {
- length = length - offset;
- }
- if (length > count)
- length = count;
+ seq_putc(m, '\n');
+ return 0;
+}
- return length;
+static int cifs_stats_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, cifs_stats_proc_show, NULL);
}
+
+static const struct file_operations cifs_stats_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = cifs_stats_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = cifs_stats_proc_write,
+};
#endif /* STATS */
static struct proc_dir_entry *proc_fs_cifs;
-read_proc_t cifs_txanchor_read;
-static read_proc_t cifsFYI_read;
-static write_proc_t cifsFYI_write;
-static read_proc_t oplockEnabled_read;
-static write_proc_t oplockEnabled_write;
-static read_proc_t lookupFlag_read;
-static write_proc_t lookupFlag_write;
-static read_proc_t traceSMB_read;
-static write_proc_t traceSMB_write;
-static read_proc_t multiuser_mount_read;
-static write_proc_t multiuser_mount_write;
-static read_proc_t security_flags_read;
-static write_proc_t security_flags_write;
-/* static read_proc_t ntlmv2_enabled_read;
-static write_proc_t ntlmv2_enabled_write;
-static read_proc_t packet_signing_enabled_read;
-static write_proc_t packet_signing_enabled_write;*/
-static read_proc_t experimEnabled_read;
-static write_proc_t experimEnabled_write;
-static read_proc_t linuxExtensionsEnabled_read;
-static write_proc_t linuxExtensionsEnabled_write;
+static const struct file_operations cifsFYI_proc_fops;
+static const struct file_operations cifs_oplock_proc_fops;
+static const struct file_operations cifs_lookup_cache_proc_fops;
+static const struct file_operations traceSMB_proc_fops;
+static const struct file_operations cifs_multiuser_mount_proc_fops;
+static const struct file_operations cifs_security_flags_proc_fops;
+static const struct file_operations cifs_experimental_proc_fops;
+static const struct file_operations cifs_linux_ext_proc_fops;
void
cifs_proc_init(void)
{
- struct proc_dir_entry *pde;
-
proc_fs_cifs = proc_mkdir("fs/cifs", NULL);
if (proc_fs_cifs == NULL)
return;
proc_fs_cifs->owner = THIS_MODULE;
- create_proc_read_entry("DebugData", 0, proc_fs_cifs,
- cifs_debug_data_read, NULL);
+ proc_create("DebugData", 0, proc_fs_cifs, &cifs_debug_data_proc_fops);
#ifdef CONFIG_CIFS_STATS
- pde = create_proc_read_entry("Stats", 0, proc_fs_cifs,
- cifs_stats_read, NULL);
- if (pde)
- pde->write_proc = cifs_stats_write;
+ proc_create("Stats", 0, proc_fs_cifs, &cifs_stats_proc_fops);
#endif /* STATS */
- pde = create_proc_read_entry("cifsFYI", 0, proc_fs_cifs,
- cifsFYI_read, NULL);
- if (pde)
- pde->write_proc = cifsFYI_write;
-
- pde =
- create_proc_read_entry("traceSMB", 0, proc_fs_cifs,
- traceSMB_read, NULL);
- if (pde)
- pde->write_proc = traceSMB_write;
-
- pde = create_proc_read_entry("OplockEnabled", 0, proc_fs_cifs,
- oplockEnabled_read, NULL);
- if (pde)
- pde->write_proc = oplockEnabled_write;
-
- pde = create_proc_read_entry("Experimental", 0, proc_fs_cifs,
- experimEnabled_read, NULL);
- if (pde)
- pde->write_proc = experimEnabled_write;
-
- pde = create_proc_read_entry("LinuxExtensionsEnabled", 0, proc_fs_cifs,
- linuxExtensionsEnabled_read, NULL);
- if (pde)
- pde->write_proc = linuxExtensionsEnabled_write;
-
- pde =
- create_proc_read_entry("MultiuserMount", 0, proc_fs_cifs,
- multiuser_mount_read, NULL);
- if (pde)
- pde->write_proc = multiuser_mount_write;
-
- pde =
- create_proc_read_entry("SecurityFlags", 0, proc_fs_cifs,
- security_flags_read, NULL);
- if (pde)
- pde->write_proc = security_flags_write;
-
- pde =
- create_proc_read_entry("LookupCacheEnabled", 0, proc_fs_cifs,
- lookupFlag_read, NULL);
- if (pde)
- pde->write_proc = lookupFlag_write;
-
-/* pde =
- create_proc_read_entry("NTLMV2Enabled", 0, proc_fs_cifs,
- ntlmv2_enabled_read, NULL);
- if (pde)
- pde->write_proc = ntlmv2_enabled_write;
-
- pde =
- create_proc_read_entry("PacketSigningEnabled", 0, proc_fs_cifs,
- packet_signing_enabled_read, NULL);
- if (pde)
- pde->write_proc = packet_signing_enabled_write;*/
+ proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops);
+ proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops);
+ proc_create("OplockEnabled", 0, proc_fs_cifs, &cifs_oplock_proc_fops);
+ proc_create("Experimental", 0, proc_fs_cifs,
+ &cifs_experimental_proc_fops);
+ proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs,
+ &cifs_linux_ext_proc_fops);
+ proc_create("MultiuserMount", 0, proc_fs_cifs,
+ &cifs_multiuser_mount_proc_fops);
+ proc_create("SecurityFlags", 0, proc_fs_cifs,
+ &cifs_security_flags_proc_fops);
+ proc_create("LookupCacheEnabled", 0, proc_fs_cifs,
+ &cifs_lookup_cache_proc_fops);
}
void
@@ -553,39 +416,26 @@ cifs_proc_clean(void)
#endif
remove_proc_entry("MultiuserMount", proc_fs_cifs);
remove_proc_entry("OplockEnabled", proc_fs_cifs);
-/* remove_proc_entry("NTLMV2Enabled",proc_fs_cifs); */
remove_proc_entry("SecurityFlags", proc_fs_cifs);
-/* remove_proc_entry("PacketSigningEnabled", proc_fs_cifs); */
remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
remove_proc_entry("Experimental", proc_fs_cifs);
remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
remove_proc_entry("fs/cifs", NULL);
}
-static int
-cifsFYI_read(char *page, char **start, off_t off, int count,
- int *eof, void *data)
+static int cifsFYI_proc_show(struct seq_file *m, void *v)
{
- int len;
-
- len = sprintf(page, "%d\n", cifsFYI);
-
- len -= off;
- *start = page + off;
-
- if (len > count)
- len = count;
- else
- *eof = 1;
-
- if (len < 0)
- len = 0;
+ seq_printf(m, "%d\n", cifsFYI);
+ return 0;
+}
- return len;
+static int cifsFYI_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, cifsFYI_proc_show, NULL);
}
-static int
-cifsFYI_write(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
+
+static ssize_t cifsFYI_proc_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
{
char c;
int rc;
@@ -603,30 +453,28 @@ cifsFYI_write(struct file *file, const char __user *buffer,
return count;
}
-static int
-oplockEnabled_read(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- int len;
-
- len = sprintf(page, "%d\n", oplockEnabled);
-
- len -= off;
- *start = page + off;
+static const struct file_operations cifsFYI_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = cifsFYI_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = cifsFYI_proc_write,
+};
- if (len > count)
- len = count;
- else
- *eof = 1;
-
- if (len < 0)
- len = 0;
+static int cifs_oplock_proc_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "%d\n", oplockEnabled);
+ return 0;
+}
- return len;
+static int cifs_oplock_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, cifs_oplock_proc_show, NULL);
}
-static int
-oplockEnabled_write(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
+
+static ssize_t cifs_oplock_proc_write(struct file *file,
+ const char __user *buffer, size_t count, loff_t *ppos)
{
char c;
int rc;
@@ -642,30 +490,28 @@ oplockEnabled_write(struct file *file, const char __user *buffer,
return count;
}
-static int
-experimEnabled_read(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- int len;
-
- len = sprintf(page, "%d\n", experimEnabled);
+static const struct file_operations cifs_oplock_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = cifs_oplock_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = cifs_oplock_proc_write,
+};
- len -= off;
- *start = page + off;
-
- if (len > count)
- len = count;
- else
- *eof = 1;
-
- if (len < 0)
- len = 0;
+static int cifs_experimental_proc_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "%d\n", experimEnabled);
+ return 0;
+}
- return len;
+static int cifs_experimental_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, cifs_experimental_proc_show, NULL);
}
-static int
-experimEnabled_write(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
+
+static ssize_t cifs_experimental_proc_write(struct file *file,
+ const char __user *buffer, size_t count, loff_t *ppos)
{
char c;
int rc;
@@ -683,29 +529,28 @@ experimEnabled_write(struct file *file, const char __user *buffer,
return count;
}
-static int
-linuxExtensionsEnabled_read(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- int len;
-
- len = sprintf(page, "%d\n", linuxExtEnabled);
- len -= off;
- *start = page + off;
+static const struct file_operations cifs_experimental_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = cifs_experimental_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = cifs_experimental_proc_write,
+};
- if (len > count)
- len = count;
- else
- *eof = 1;
-
- if (len < 0)
- len = 0;
+static int cifs_linux_ext_proc_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "%d\n", linuxExtEnabled);
+ return 0;
+}
- return len;
+static int cifs_linux_ext_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, cifs_linux_ext_proc_show, NULL);
}
-static int
-linuxExtensionsEnabled_write(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
+
+static ssize_t cifs_linux_ext_proc_write(struct file *file,
+ const char __user *buffer, size_t count, loff_t *ppos)
{
char c;
int rc;
@@ -721,31 +566,28 @@ linuxExtensionsEnabled_write(struct file *file, const char __user *buffer,
return count;
}
+static const struct file_operations cifs_linux_ext_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = cifs_linux_ext_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = cifs_linux_ext_proc_write,
+};
-static int
-lookupFlag_read(char *page, char **start, off_t off,
- int count, int *eof, void *data)
+static int cifs_lookup_cache_proc_show(struct seq_file *m, void *v)
{
- int len;
-
- len = sprintf(page, "%d\n", lookupCacheEnabled);
-
- len -= off;
- *start = page + off;
-
- if (len > count)
- len = count;
- else
- *eof = 1;
-
- if (len < 0)
- len = 0;
+ seq_printf(m, "%d\n", lookupCacheEnabled);
+ return 0;
+}
- return len;
+static int cifs_lookup_cache_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, cifs_lookup_cache_proc_show, NULL);
}
-static int
-lookupFlag_write(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
+
+static ssize_t cifs_lookup_cache_proc_write(struct file *file,
+ const char __user *buffer, size_t count, loff_t *ppos)
{
char c;
int rc;
@@ -760,30 +602,29 @@ lookupFlag_write(struct file *file, const char __user *buffer,
return count;
}
-static int
-traceSMB_read(char *page, char **start, off_t off, int count,
- int *eof, void *data)
-{
- int len;
-
- len = sprintf(page, "%d\n", traceSMB);
-
- len -= off;
- *start = page + off;
- if (len > count)
- len = count;
- else
- *eof = 1;
+static const struct file_operations cifs_lookup_cache_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = cifs_lookup_cache_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = cifs_lookup_cache_proc_write,
+};
- if (len < 0)
- len = 0;
+static int traceSMB_proc_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "%d\n", traceSMB);
+ return 0;
+}
- return len;
+static int traceSMB_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, traceSMB_proc_show, NULL);
}
-static int
-traceSMB_write(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
+
+static ssize_t traceSMB_proc_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
{
char c;
int rc;
@@ -799,30 +640,28 @@ traceSMB_write(struct file *file, const char __user *buffer,
return count;
}
-static int
-multiuser_mount_read(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- int len;
-
- len = sprintf(page, "%d\n", multiuser_mount);
-
- len -= off;
- *start = page + off;
+static const struct file_operations traceSMB_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = traceSMB_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = traceSMB_proc_write,
+};
- if (len > count)
- len = count;
- else
- *eof = 1;
-
- if (len < 0)
- len = 0;
+static int cifs_multiuser_mount_proc_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "%d\n", multiuser_mount);
+ return 0;
+}
- return len;
+static int cifs_multiuser_mount_proc_open(struct inode *inode, struct file *fh)
+{
+ return single_open(fh, cifs_multiuser_mount_proc_show, NULL);
}
-static int
-multiuser_mount_write(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
+
+static ssize_t cifs_multiuser_mount_proc_write(struct file *file,
+ const char __user *buffer, size_t count, loff_t *ppos)
{
char c;
int rc;
@@ -838,30 +677,28 @@ multiuser_mount_write(struct file *file, const char __user *buffer,
return count;
}
-static int
-security_flags_read(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- int len;
-
- len = sprintf(page, "0x%x\n", extended_security);
-
- len -= off;
- *start = page + off;
+static const struct file_operations cifs_multiuser_mount_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = cifs_multiuser_mount_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = cifs_multiuser_mount_proc_write,
+};
- if (len > count)
- len = count;
- else
- *eof = 1;
-
- if (len < 0)
- len = 0;
+static int cifs_security_flags_proc_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "0x%x\n", extended_security);
+ return 0;
+}
- return len;
+static int cifs_security_flags_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, cifs_security_flags_proc_show, NULL);
}
-static int
-security_flags_write(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
+
+static ssize_t cifs_security_flags_proc_write(struct file *file,
+ const char __user *buffer, size_t count, loff_t *ppos)
{
unsigned int flags;
char flags_string[12];
@@ -917,6 +754,15 @@ security_flags_write(struct file *file, const char __user *buffer,
/* BB should we turn on MAY flags for other MUST options? */
return count;
}
+
+static const struct file_operations cifs_security_flags_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = cifs_security_flags_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = cifs_security_flags_proc_write,
+};
#else
inline void cifs_proc_init(void)
{
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index d82374c9e32..d2c8eef84f3 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -226,7 +226,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
int err;
mntget(newmnt);
- err = do_add_mount(newmnt, nd, nd->path.mnt->mnt_flags, mntlist);
+ err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist);
switch (err) {
case 0:
path_put(&nd->path);
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 7013aaff6ae..2434ab0e879 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -66,8 +66,8 @@ struct key_type cifs_spnego_key_type = {
.describe = user_describe,
};
-#define MAX_VER_STR_LEN 9 /* length of longest version string e.g.
- strlen(";ver=0xFF") */
+#define MAX_VER_STR_LEN 8 /* length of longest version string e.g.
+ strlen("ver=0xFF") */
#define MAX_MECH_STR_LEN 13 /* length of longest security mechanism name, eg
in future could have strlen(";sec=ntlmsspi") */
#define MAX_IPV6_ADDR_LEN 42 /* eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */
@@ -81,11 +81,15 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
struct key *spnego_key;
const char *hostname = server->hostname;
- /* BB: come up with better scheme for determining length */
- /* length of fields (with semicolons): ver=0xyz ipv4= ipaddress host=
- hostname sec=mechanism uid=0x uid */
- desc_len = MAX_VER_STR_LEN + 5 + MAX_IPV6_ADDR_LEN + 1 + 6 +
- strlen(hostname) + MAX_MECH_STR_LEN + 8 + (sizeof(uid_t) * 2);
+ /* length of fields (with semicolons): ver=0xyz ip4=ipaddress
+ host=hostname sec=mechanism uid=0xFF user=username */
+ desc_len = MAX_VER_STR_LEN +
+ 6 /* len of "host=" */ + strlen(hostname) +
+ 5 /* len of ";ipv4=" */ + MAX_IPV6_ADDR_LEN +
+ MAX_MECH_STR_LEN +
+ 7 /* len of ";uid=0x" */ + (sizeof(uid_t) * 2) +
+ 6 /* len of ";user=" */ + strlen(sesInfo->userName) + 1;
+
spnego_key = ERR_PTR(-ENOMEM);
description = kzalloc(desc_len, GFP_KERNEL);
if (description == NULL)
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 0e9fc2ba90e..57ecdc83c26 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -56,7 +56,7 @@ int match_sid(struct cifs_sid *ctsid)
struct cifs_sid *cwsid;
if (!ctsid)
- return (-1);
+ return -1;
for (i = 0; i < NUM_WK_SIDS; ++i) {
cwsid = &(wksidarr[i].cifssid);
@@ -87,11 +87,11 @@ int match_sid(struct cifs_sid *ctsid)
}
cFYI(1, ("matching sid: %s\n", wksidarr[i].sidname));
- return (0); /* sids compare/match */
+ return 0; /* sids compare/match */
}
cFYI(1, ("No matching sid"));
- return (-1);
+ return -1;
}
/* if the two SIDs (roughly equivalent to a UUID for a user or group) are
@@ -102,16 +102,16 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
int num_subauth, num_sat, num_saw;
if ((!ctsid) || (!cwsid))
- return (0);
+ return 0;
/* compare the revision */
if (ctsid->revision != cwsid->revision)
- return (0);
+ return 0;
/* compare all of the six auth values */
for (i = 0; i < 6; ++i) {
if (ctsid->authority[i] != cwsid->authority[i])
- return (0);
+ return 0;
}
/* compare all of the subauth values if any */
@@ -121,11 +121,11 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
if (num_subauth) {
for (i = 0; i < num_subauth; ++i) {
if (ctsid->sub_auth[i] != cwsid->sub_auth[i])
- return (0);
+ return 0;
}
}
- return (1); /* sids compare/match */
+ return 1; /* sids compare/match */
}
@@ -169,8 +169,7 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
for (i = 0; i < 6; i++)
ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i];
for (i = 0; i < 5; i++)
- ngroup_sid_ptr->sub_auth[i] =
- cpu_to_le32(group_sid_ptr->sub_auth[i]);
+ ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i];
return;
}
@@ -285,7 +284,7 @@ static __u16 fill_ace_for_sid(struct cifs_ace *pntace,
size = 1 + 1 + 2 + 4 + 1 + 1 + 6 + (psid->num_subauth * 4);
pntace->size = cpu_to_le16(size);
- return (size);
+ return size;
}
@@ -426,7 +425,7 @@ static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid,
pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl));
pndacl->num_aces = cpu_to_le32(3);
- return (0);
+ return 0;
}
@@ -510,7 +509,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
sizeof(struct cifs_sid)); */
- return (0);
+ return 0;
}
@@ -527,7 +526,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */
if ((inode == NULL) || (pntsd == NULL) || (pnntsd == NULL))
- return (-EIO);
+ return -EIO;
owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
le32_to_cpu(pntsd->osidoffset));
@@ -550,7 +549,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
/* copy security descriptor control portion and owner and group sid */
copy_sec_desc(pntsd, pnntsd, sidsoffset);
- return (rc);
+ return rc;
}
@@ -629,11 +628,11 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode));
if (!inode)
- return (rc);
+ return rc;
sb = inode->i_sb;
if (sb == NULL)
- return (rc);
+ return rc;
cifs_sb = CIFS_SB(sb);
xid = GetXid();
@@ -652,7 +651,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
if (rc != 0) {
cERROR(1, ("Unable to open file to set ACL"));
FreeXid(xid);
- return (rc);
+ return rc;
}
}
@@ -665,7 +664,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
FreeXid(xid);
- return (rc);
+ return rc;
}
/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
@@ -715,7 +714,7 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
if (!pnntsd) {
cERROR(1, ("Unable to allocate security descriptor"));
kfree(pntsd);
- return (-ENOMEM);
+ return -ENOMEM;
}
rc = build_sec_desc(pntsd, pnntsd, inode, nmode);
@@ -732,6 +731,6 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
kfree(pntsd);
}
- return (rc);
+ return rc;
}
#endif /* CONFIG_CIFS_EXPERIMENTAL */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 4ff8939c6cc..83fd40dc1ef 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -310,9 +310,8 @@ void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key)
utf8 and other multibyte codepages each need their own strupper
function since a byte at a time will ont work. */
- for (i = 0; i < CIFS_ENCPWD_SIZE; i++) {
+ for (i = 0; i < CIFS_ENCPWD_SIZE; i++)
password_with_pad[i] = toupper(password_with_pad[i]);
- }
SMBencrypt(password_with_pad, ses->server->cryptKey, lnm_session_key);
/* clear password before we return/free memory */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 22857c639df..e8da4ee761b 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -267,7 +267,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int cifs_permission(struct inode *inode, int mask)
{
struct cifs_sb_info *cifs_sb;
@@ -766,7 +766,7 @@ const struct file_operations cifs_dir_ops = {
};
static void
-cifs_init_once(struct kmem_cache *cachep, void *inode)
+cifs_init_once(void *inode)
{
struct cifsInodeInfo *cifsi = inode;
@@ -930,36 +930,34 @@ static int cifs_oplock_thread(void *dummyarg)
schedule_timeout(39*HZ);
} else {
oplock_item = list_entry(GlobalOplock_Q.next,
- struct oplock_q_entry, qhead);
- if (oplock_item) {
- cFYI(1, ("found oplock item to write out"));
- pTcon = oplock_item->tcon;
- inode = oplock_item->pinode;
- netfid = oplock_item->netfid;
- spin_unlock(&GlobalMid_Lock);
- DeleteOplockQEntry(oplock_item);
- /* can not grab inode sem here since it would
+ struct oplock_q_entry, qhead);
+ cFYI(1, ("found oplock item to write out"));
+ pTcon = oplock_item->tcon;
+ inode = oplock_item->pinode;
+ netfid = oplock_item->netfid;
+ spin_unlock(&GlobalMid_Lock);
+ DeleteOplockQEntry(oplock_item);
+ /* can not grab inode sem here since it would
deadlock when oplock received on delete
since vfs_unlink holds the i_mutex across
the call */
- /* mutex_lock(&inode->i_mutex);*/
- if (S_ISREG(inode->i_mode)) {
- rc =
- filemap_fdatawrite(inode->i_mapping);
- if (CIFS_I(inode)->clientCanCacheRead
- == 0) {
- waitrc = filemap_fdatawait(inode->i_mapping);
- invalidate_remote_inode(inode);
- }
- if (rc == 0)
- rc = waitrc;
- } else
- rc = 0;
- /* mutex_unlock(&inode->i_mutex);*/
- if (rc)
- CIFS_I(inode)->write_behind_rc = rc;
- cFYI(1, ("Oplock flush inode %p rc %d",
- inode, rc));
+ /* mutex_lock(&inode->i_mutex);*/
+ if (S_ISREG(inode->i_mode)) {
+ rc = filemap_fdatawrite(inode->i_mapping);
+ if (CIFS_I(inode)->clientCanCacheRead == 0) {
+ waitrc = filemap_fdatawait(
+ inode->i_mapping);
+ invalidate_remote_inode(inode);
+ }
+ if (rc == 0)
+ rc = waitrc;
+ } else
+ rc = 0;
+ /* mutex_unlock(&inode->i_mutex);*/
+ if (rc)
+ CIFS_I(inode)->write_behind_rc = rc;
+ cFYI(1, ("Oplock flush inode %p rc %d",
+ inode, rc));
/* releasing stale oplock after recent reconnect
of smb session using a now incorrect file
@@ -967,15 +965,13 @@ static int cifs_oplock_thread(void *dummyarg)
not bother sending an oplock release if session
to server still is disconnected since oplock
already released by the server in that case */
- if (pTcon->tidStatus != CifsNeedReconnect) {
- rc = CIFSSMBLock(0, pTcon, netfid,
- 0 /* len */ , 0 /* offset */, 0,
- 0, LOCKING_ANDX_OPLOCK_RELEASE,
- false /* wait flag */);
- cFYI(1, ("Oplock release rc = %d", rc));
- }
- } else
- spin_unlock(&GlobalMid_Lock);
+ if (pTcon->tidStatus != CifsNeedReconnect) {
+ rc = CIFSSMBLock(0, pTcon, netfid,
+ 0 /* len */ , 0 /* offset */, 0,
+ 0, LOCKING_ANDX_OPLOCK_RELEASE,
+ false /* wait flag */);
+ cFYI(1, ("Oplock release rc = %d", rc));
+ }
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1); /* yield in case q were corrupt */
}
@@ -1001,8 +997,7 @@ static int cifs_dnotify_thread(void *dummyarg)
list_for_each(tmp, &GlobalSMBSessionList) {
ses = list_entry(tmp, struct cifsSesInfo,
cifsSessionList);
- if (ses && ses->server &&
- atomic_read(&ses->server->inFlight))
+ if (ses->server && atomic_read(&ses->server->inFlight))
wake_up_all(&ses->server->response_q);
}
read_unlock(&GlobalSMBSeslock);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 25a6cbd1552..135c965c413 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -101,5 +101,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* EXPERIMENTAL */
-#define CIFS_VERSION "1.53"
+#define CIFS_VERSION "1.54"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 9cfcf326ead..7e1cf262eff 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -27,7 +27,7 @@
#define MAX_SES_INFO 2
#define MAX_TCON_INFO 4
-#define MAX_TREE_SIZE 2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1
+#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1)
#define MAX_SERVER_SIZE 15
#define MAX_SHARE_SIZE 64 /* used to be 20, this should still be enough */
#define MAX_USERNAME_SIZE 32 /* 32 is to allow for 15 char names + null
@@ -537,8 +537,8 @@ require use of the stronger protocol */
#endif /* WEAK_PW_HASH */
#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */
-#define CIFSSEC_DEF CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2
-#define CIFSSEC_MAX CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2
+#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2)
+#define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2)
#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5)
/*
*****************************************************************
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 0f327c224da..d2a073edd1b 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -31,7 +31,7 @@
#else
#define CIFS_PROT 0
#endif
-#define POSIX_PROT CIFS_PROT+1
+#define POSIX_PROT (CIFS_PROT+1)
#define BAD_PROT 0xFFFF
/* SMB command codes */
@@ -262,7 +262,7 @@
*/
#define CIFS_NO_HANDLE 0xFFFF
-#define NO_CHANGE_64 cpu_to_le64(0xFFFFFFFFFFFFFFFFULL)
+#define NO_CHANGE_64 0xFFFFFFFFFFFFFFFFULL
#define NO_CHANGE_32 0xFFFFFFFFUL
/* IPC$ in ASCII */
@@ -341,7 +341,7 @@
#define CREATE_COMPLETE_IF_OPLK 0x00000100 /* should be zero */
#define CREATE_NO_EA_KNOWLEDGE 0x00000200
#define CREATE_EIGHT_DOT_THREE 0x00000400 /* doc says this is obsolete
- "open for recovery" flag - should
+ "open for recovery" flag should
be zero in any case */
#define CREATE_OPEN_FOR_RECOVERY 0x00000400
#define CREATE_RANDOM_ACCESS 0x00000800
@@ -414,8 +414,8 @@ struct smb_hdr {
__u8 WordCount;
} __attribute__((packed));
/* given a pointer to an smb_hdr retrieve the value of byte count */
-#define BCC(smb_var) ( *(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
-#define BCC_LE(smb_var) ( *(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
+#define BCC(smb_var) (*(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
+#define BCC_LE(smb_var) (*(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
/* given a pointer to an smb_hdr retrieve the pointer to the byte area */
#define pByteArea(smb_var) ((unsigned char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount) + 2)
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index b9f5e935f82..a729d083e6f 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -172,12 +172,13 @@ extern int CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon);
extern int CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon,
struct kstatfs *FSData);
-extern int CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon,
+extern int CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
const char *fileName, const FILE_BASIC_INFO *data,
const struct nls_table *nls_codepage,
int remap_special_chars);
-extern int CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
- const FILE_BASIC_INFO *data, __u16 fid);
+extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
+ const FILE_BASIC_INFO *data, __u16 fid,
+ __u32 pid_of_opener);
#if 0
extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon,
char *fileName, __u16 dos_attributes,
@@ -191,9 +192,20 @@ extern int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon,
extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon,
__u64 size, __u16 fileHandle, __u32 opener_pid,
bool AllocSizeFlag);
-extern int CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *pTcon,
- char *full_path, __u64 mode, __u64 uid,
- __u64 gid, dev_t dev,
+
+struct cifs_unix_set_info_args {
+ __u64 ctime;
+ __u64 atime;
+ __u64 mtime;
+ __u64 mode;
+ __u64 uid;
+ __u64 gid;
+ dev_t device;
+};
+
+extern int CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *pTcon,
+ char *fileName,
+ const struct cifs_unix_set_info_args *args,
const struct nls_table *nls_codepage,
int remap_special_chars);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4511b708f0f..994de7c9047 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -128,8 +128,7 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
write_lock(&GlobalSMBSeslock);
list_for_each_safe(tmp, tmp1, &pTcon->openFileList) {
open_file = list_entry(tmp, struct cifsFileInfo, tlist);
- if (open_file)
- open_file->invalidHandle = true;
+ open_file->invalidHandle = true;
}
write_unlock(&GlobalSMBSeslock);
/* BB Add call to invalidate_inodes(sb) for all superblocks mounted
@@ -686,11 +685,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
SecurityBlob,
count - 16,
&server->secType);
- if (rc == 1) {
+ if (rc == 1)
rc = 0;
- } else {
+ else
rc = -EINVAL;
- }
}
} else
server->capabilities &= ~CAP_EXTENDED_SECURITY;
@@ -3914,7 +3912,10 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
bool is_unicode;
struct dfs_referral_level_3 *ref;
- is_unicode = pSMBr->hdr.Flags2 & SMBFLG2_UNICODE;
+ if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
+ is_unicode = true;
+ else
+ is_unicode = false;
*num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals);
if (*num_of_nodes < 1) {
@@ -4814,8 +4815,8 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
time and resort to the original setpathinfo level which takes the ancient
DOS time format with 2 second granularity */
int
-CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
- const FILE_BASIC_INFO *data, __u16 fid)
+CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
+ const FILE_BASIC_INFO *data, __u16 fid, __u32 pid_of_opener)
{
struct smb_com_transaction2_sfi_req *pSMB = NULL;
char *data_offset;
@@ -4828,11 +4829,8 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
if (rc)
return rc;
- /* At this point there is no need to override the current pid
- with the pid of the opener, but that could change if we someday
- use an existing handle (rather than opening one on the fly) */
- /* pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
- pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));*/
+ pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
+ pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));
params = 6;
pSMB->MaxSetupCount = 0;
@@ -4880,9 +4878,9 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
int
-CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon, const char *fileName,
- const FILE_BASIC_INFO *data,
- const struct nls_table *nls_codepage, int remap)
+CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
+ const char *fileName, const FILE_BASIC_INFO *data,
+ const struct nls_table *nls_codepage, int remap)
{
TRANSACTION2_SPI_REQ *pSMB = NULL;
TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -5011,10 +5009,9 @@ SetAttrLgcyRetry:
#endif /* temporarily unneeded SetAttr legacy function */
int
-CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon,
- char *fileName, __u64 mode, __u64 uid, __u64 gid,
- dev_t device, const struct nls_table *nls_codepage,
- int remap)
+CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
+ const struct cifs_unix_set_info_args *args,
+ const struct nls_table *nls_codepage, int remap)
{
TRANSACTION2_SPI_REQ *pSMB = NULL;
TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -5023,6 +5020,7 @@ CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon,
int bytes_returned = 0;
FILE_UNIX_BASIC_INFO *data_offset;
__u16 params, param_offset, offset, count, byte_count;
+ __u64 mode = args->mode;
cFYI(1, ("In SetUID/GID/Mode"));
setPermsRetry:
@@ -5078,16 +5076,16 @@ setPermsRetry:
set file size and do not want to truncate file size to zero
accidently as happened on one Samba server beta by putting
zero instead of -1 here */
- data_offset->EndOfFile = NO_CHANGE_64;
- data_offset->NumOfBytes = NO_CHANGE_64;
- data_offset->LastStatusChange = NO_CHANGE_64;
- data_offset->LastAccessTime = NO_CHANGE_64;
- data_offset->LastModificationTime = NO_CHANGE_64;
- data_offset->Uid = cpu_to_le64(uid);
- data_offset->Gid = cpu_to_le64(gid);
+ data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
+ data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64);
+ data_offset->LastStatusChange = cpu_to_le64(args->ctime);
+ data_offset->LastAccessTime = cpu_to_le64(args->atime);
+ data_offset->LastModificationTime = cpu_to_le64(args->mtime);
+ data_offset->Uid = cpu_to_le64(args->uid);
+ data_offset->Gid = cpu_to_le64(args->gid);
/* better to leave device as zero when it is */
- data_offset->DevMajor = cpu_to_le64(MAJOR(device));
- data_offset->DevMinor = cpu_to_le64(MINOR(device));
+ data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
+ data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
data_offset->Permissions = cpu_to_le64(mode);
if (S_ISREG(mode))
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e8fa46c7cff..0711db65afe 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -151,7 +151,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
}
list_for_each(tmp, &GlobalTreeConnectionList) {
tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
- if ((tcon) && (tcon->ses) && (tcon->ses->server == server))
+ if ((tcon->ses) && (tcon->ses->server == server))
tcon->tidStatus = CifsNeedReconnect;
}
read_unlock(&GlobalSMBSeslock);
@@ -173,14 +173,12 @@ cifs_reconnect(struct TCP_Server_Info *server)
mid_entry = list_entry(tmp, struct
mid_q_entry,
qhead);
- if (mid_entry) {
- if (mid_entry->midState == MID_REQUEST_SUBMITTED) {
+ if (mid_entry->midState == MID_REQUEST_SUBMITTED) {
/* Mark other intransit requests as needing
retry so we do not immediately mark the
session bad again (ie after we reconnect
below) as they timeout too */
- mid_entry->midState = MID_RETRY_NEEDED;
- }
+ mid_entry->midState = MID_RETRY_NEEDED;
}
}
spin_unlock(&GlobalMid_Lock);
@@ -351,11 +349,9 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
current->flags |= PF_MEMALLOC;
cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current)));
- write_lock(&GlobalSMBSeslock);
- atomic_inc(&tcpSesAllocCount);
- length = tcpSesAllocCount.counter;
- write_unlock(&GlobalSMBSeslock);
- if (length > 1)
+
+ length = atomic_inc_return(&tcpSesAllocCount);
+ if (length > 1)
mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
GFP_KERNEL);
@@ -455,7 +451,7 @@ incomplete_rcv:
/* Note that FC 1001 length is big endian on the wire,
but we convert it here so it is always manipulated
as host byte order */
- pdu_length = ntohl(smb_buffer->smb_buf_length);
+ pdu_length = be32_to_cpu((__force __be32)smb_buffer->smb_buf_length);
smb_buffer->smb_buf_length = pdu_length;
cFYI(1, ("rfc1002 length 0x%x", pdu_length+4));
@@ -745,14 +741,11 @@ multi_t2_fnd:
coming home not much else we can do but free the memory */
}
- write_lock(&GlobalSMBSeslock);
- atomic_dec(&tcpSesAllocCount);
- length = tcpSesAllocCount.counter;
-
/* last chance to mark ses pointers invalid
if there are any pointing to this (e.g
if a crazy root user tried to kill cifsd
kernel thread explicitly this might happen) */
+ write_lock(&GlobalSMBSeslock);
list_for_each(tmp, &GlobalSMBSessionList) {
ses = list_entry(tmp, struct cifsSesInfo,
cifsSessionList);
@@ -763,6 +756,8 @@ multi_t2_fnd:
kfree(server->hostname);
kfree(server);
+
+ length = atomic_dec_return(&tcpSesAllocCount);
if (length > 0)
mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
GFP_KERNEL);
@@ -1461,6 +1456,39 @@ get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path,
return rc;
}
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key cifs_key[2];
+static struct lock_class_key cifs_slock_key[2];
+
+static inline void
+cifs_reclassify_socket4(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ BUG_ON(sock_owned_by_user(sk));
+ sock_lock_init_class_and_name(sk, "slock-AF_INET-CIFS",
+ &cifs_slock_key[0], "sk_lock-AF_INET-CIFS", &cifs_key[0]);
+}
+
+static inline void
+cifs_reclassify_socket6(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ BUG_ON(sock_owned_by_user(sk));
+ sock_lock_init_class_and_name(sk, "slock-AF_INET6-CIFS",
+ &cifs_slock_key[1], "sk_lock-AF_INET6-CIFS", &cifs_key[1]);
+}
+#else
+static inline void
+cifs_reclassify_socket4(struct socket *sock)
+{
+}
+
+static inline void
+cifs_reclassify_socket6(struct socket *sock)
+{
+}
+#endif
+
/* See RFC1001 section 14 on representation of Netbios names */
static void rfc1002mangle(char *target, char *source, unsigned int length)
{
@@ -1495,6 +1523,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
/* BB other socket options to set KEEPALIVE, NODELAY? */
cFYI(1, ("Socket created"));
(*csocket)->sk->sk_allocation = GFP_NOFS;
+ cifs_reclassify_socket4(*csocket);
}
}
@@ -1627,6 +1656,7 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
/* BB other socket options to set KEEPALIVE, NODELAY? */
cFYI(1, ("ipv6 Socket created"));
(*csocket)->sk->sk_allocation = GFP_NOFS;
+ cifs_reclassify_socket6(*csocket);
}
}
@@ -3588,97 +3618,91 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
}
first_time = 1;
}
- if (!rc) {
- pSesInfo->flags = 0;
- pSesInfo->capabilities = pSesInfo->server->capabilities;
- if (linuxExtEnabled == 0)
- pSesInfo->capabilities &= (~CAP_UNIX);
+
+ if (rc)
+ goto ss_err_exit;
+
+ pSesInfo->flags = 0;
+ pSesInfo->capabilities = pSesInfo->server->capabilities;
+ if (linuxExtEnabled == 0)
+ pSesInfo->capabilities &= (~CAP_UNIX);
/* pSesInfo->sequence_number = 0;*/
- cFYI(1,
- ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
- pSesInfo->server->secMode,
- pSesInfo->server->capabilities,
- pSesInfo->server->timeAdj));
- if (experimEnabled < 2)
- rc = CIFS_SessSetup(xid, pSesInfo,
- first_time, nls_info);
- else if (extended_security
- && (pSesInfo->capabilities
- & CAP_EXTENDED_SECURITY)
- && (pSesInfo->server->secType == NTLMSSP)) {
- rc = -EOPNOTSUPP;
- } else if (extended_security
- && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
- && (pSesInfo->server->secType == RawNTLMSSP)) {
- cFYI(1, ("NTLMSSP sesssetup"));
- rc = CIFSNTLMSSPNegotiateSessSetup(xid,
- pSesInfo,
- &ntlmv2_flag,
- nls_info);
- if (!rc) {
- if (ntlmv2_flag) {
- char *v2_response;
- cFYI(1, ("more secure NTLM ver2 hash"));
- if (CalcNTLMv2_partial_mac_key(pSesInfo,
- nls_info)) {
- rc = -ENOMEM;
- goto ss_err_exit;
- } else
- v2_response = kmalloc(16 + 64 /* blob */, GFP_KERNEL);
- if (v2_response) {
- CalcNTLMv2_response(pSesInfo,
- v2_response);
- /* if (first_time)
- cifs_calculate_ntlmv2_mac_key(
- pSesInfo->server->mac_signing_key,
- response, ntlm_session_key,*/
- kfree(v2_response);
+ cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
+ pSesInfo->server->secMode,
+ pSesInfo->server->capabilities,
+ pSesInfo->server->timeAdj));
+ if (experimEnabled < 2)
+ rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info);
+ else if (extended_security
+ && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
+ && (pSesInfo->server->secType == NTLMSSP)) {
+ rc = -EOPNOTSUPP;
+ } else if (extended_security
+ && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
+ && (pSesInfo->server->secType == RawNTLMSSP)) {
+ cFYI(1, ("NTLMSSP sesssetup"));
+ rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag,
+ nls_info);
+ if (!rc) {
+ if (ntlmv2_flag) {
+ char *v2_response;
+ cFYI(1, ("more secure NTLM ver2 hash"));
+ if (CalcNTLMv2_partial_mac_key(pSesInfo,
+ nls_info)) {
+ rc = -ENOMEM;
+ goto ss_err_exit;
+ } else
+ v2_response = kmalloc(16 + 64 /* blob*/,
+ GFP_KERNEL);
+ if (v2_response) {
+ CalcNTLMv2_response(pSesInfo,
+ v2_response);
+ /* if (first_time)
+ cifs_calculate_ntlmv2_mac_key */
+ kfree(v2_response);
/* BB Put dummy sig in SessSetup PDU? */
- } else {
- rc = -ENOMEM;
- goto ss_err_exit;
- }
-
} else {
- SMBNTencrypt(pSesInfo->password,
- pSesInfo->server->cryptKey,
- ntlm_session_key);
-
- if (first_time)
- cifs_calculate_mac_key(
- &pSesInfo->server->mac_signing_key,
- ntlm_session_key,
- pSesInfo->password);
+ rc = -ENOMEM;
+ goto ss_err_exit;
}
+
+ } else {
+ SMBNTencrypt(pSesInfo->password,
+ pSesInfo->server->cryptKey,
+ ntlm_session_key);
+
+ if (first_time)
+ cifs_calculate_mac_key(
+ &pSesInfo->server->mac_signing_key,
+ ntlm_session_key,
+ pSesInfo->password);
+ }
/* for better security the weaker lanman hash not sent
in AuthSessSetup so we no longer calculate it */
- rc = CIFSNTLMSSPAuthSessSetup(xid,
- pSesInfo,
- ntlm_session_key,
- ntlmv2_flag,
- nls_info);
- }
- } else { /* old style NTLM 0.12 session setup */
- SMBNTencrypt(pSesInfo->password,
- pSesInfo->server->cryptKey,
- ntlm_session_key);
+ rc = CIFSNTLMSSPAuthSessSetup(xid, pSesInfo,
+ ntlm_session_key,
+ ntlmv2_flag,
+ nls_info);
+ }
+ } else { /* old style NTLM 0.12 session setup */
+ SMBNTencrypt(pSesInfo->password, pSesInfo->server->cryptKey,
+ ntlm_session_key);
- if (first_time)
- cifs_calculate_mac_key(
+ if (first_time)
+ cifs_calculate_mac_key(
&pSesInfo->server->mac_signing_key,
ntlm_session_key, pSesInfo->password);
- rc = CIFSSessSetup(xid, pSesInfo,
- ntlm_session_key, nls_info);
- }
- if (rc) {
- cERROR(1, ("Send error in SessSetup = %d", rc));
- } else {
- cFYI(1, ("CIFS Session Established successfully"));
+ rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info);
+ }
+ if (rc) {
+ cERROR(1, ("Send error in SessSetup = %d", rc));
+ } else {
+ cFYI(1, ("CIFS Session Established successfully"));
pSesInfo->status = CifsGood;
- }
}
+
ss_err_exit:
return rc;
}
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index fb69c1fa85c..e962e75e6f7 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -226,23 +226,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
/* If Open reported that we actually created a file
then we now have to set the mode if possible */
if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) {
+ struct cifs_unix_set_info_args args = {
+ .mode = mode,
+ .ctime = NO_CHANGE_64,
+ .atime = NO_CHANGE_64,
+ .mtime = NO_CHANGE_64,
+ .device = 0,
+ };
+
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
- CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
- (__u64)current->fsuid,
- (__u64)current->fsgid,
- 0 /* dev */,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ args.uid = (__u64) current->fsuid;
+ if (inode->i_mode & S_ISGID)
+ args.gid = (__u64) inode->i_gid;
+ else
+ args.gid = (__u64) current->fsgid;
} else {
- CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
- (__u64)-1,
- (__u64)-1,
- 0 /* dev */,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ args.uid = NO_CHANGE_64;
+ args.gid = NO_CHANGE_64;
}
+ CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
+ cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
} else {
/* BB implement mode setting via Windows security
descriptors e.g. */
@@ -267,7 +272,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
(cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_SET_UID)) {
newinode->i_uid = current->fsuid;
- newinode->i_gid = current->fsgid;
+ if (inode->i_mode & S_ISGID)
+ newinode->i_gid =
+ inode->i_gid;
+ else
+ newinode->i_gid =
+ current->fsgid;
}
}
}
@@ -357,21 +367,24 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
if (full_path == NULL)
rc = -ENOMEM;
else if (pTcon->unix_ext) {
- mode &= ~current->fs->umask;
+ struct cifs_unix_set_info_args args = {
+ .mode = mode & ~current->fs->umask,
+ .ctime = NO_CHANGE_64,
+ .atime = NO_CHANGE_64,
+ .mtime = NO_CHANGE_64,
+ .device = device_number,
+ };
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
- rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path,
- mode, (__u64)current->fsuid,
- (__u64)current->fsgid,
- device_number, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ args.uid = (__u64) current->fsuid;
+ args.gid = (__u64) current->fsgid;
} else {
- rc = CIFSSMBUnixSetPerms(xid, pTcon,
- full_path, mode, (__u64)-1, (__u64)-1,
- device_number, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ args.uid = NO_CHANGE_64;
+ args.gid = NO_CHANGE_64;
}
+ rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path,
+ &args, cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
if (!rc) {
rc = cifs_get_inode_info_unix(&newinode, full_path,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0aac824371a..ff14d14903a 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -310,18 +310,19 @@ int cifs_open(struct inode *inode, struct file *file)
/* time to set mode which we can not set earlier due to
problems creating new read-only files */
if (pTcon->unix_ext) {
- CIFSSMBUnixSetPerms(xid, pTcon, full_path,
- inode->i_mode,
- (__u64)-1, (__u64)-1, 0 /* dev */,
+ struct cifs_unix_set_info_args args = {
+ .mode = inode->i_mode,
+ .uid = NO_CHANGE_64,
+ .gid = NO_CHANGE_64,
+ .ctime = NO_CHANGE_64,
+ .atime = NO_CHANGE_64,
+ .mtime = NO_CHANGE_64,
+ .device = 0,
+ };
+ CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
- } else {
- /* BB implement via Windows security descriptors eg
- CIFSSMBWinSetPerms(xid, pTcon, full_path, mode,
- -1, -1, local_nls);
- in the meantime could set r/o dos attribute when
- perms are eg: mode & 0222 == 0 */
}
}
@@ -1280,7 +1281,7 @@ retry:
if (first < 0)
lock_page(page);
- else if (TestSetPageLocked(page))
+ else if (!trylock_page(page))
break;
if (unlikely(page->mapping != mapping)) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 2e904bd111c..28a22092d45 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -737,7 +737,7 @@ psx_del_no_retry:
/* ATTRS set to normal clears r/o bit */
pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL);
if (!(pTcon->ses->flags & CIFS_SES_NT4))
- rc = CIFSSMBSetTimes(xid, pTcon, full_path,
+ rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
pinfo_buf,
cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags &
@@ -767,9 +767,10 @@ psx_del_no_retry:
cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
if (rc == 0) {
- rc = CIFSSMBSetFileTimes(xid, pTcon,
- pinfo_buf,
- netfid);
+ rc = CIFSSMBSetFileInfo(xid, pTcon,
+ pinfo_buf,
+ netfid,
+ current->tgid);
CIFSSMBClose(xid, pTcon, netfid);
}
}
@@ -984,32 +985,41 @@ mkdir_get_info:
* failed to get it from the server or was set bogus */
if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
direntry->d_inode->i_nlink = 2;
+
mode &= ~current->fs->umask;
+ /* must turn on setgid bit if parent dir has it */
+ if (inode->i_mode & S_ISGID)
+ mode |= S_ISGID;
+
if (pTcon->unix_ext) {
+ struct cifs_unix_set_info_args args = {
+ .mode = mode,
+ .ctime = NO_CHANGE_64,
+ .atime = NO_CHANGE_64,
+ .mtime = NO_CHANGE_64,
+ .device = 0,
+ };
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
- CIFSSMBUnixSetPerms(xid, pTcon, full_path,
- mode,
- (__u64)current->fsuid,
- (__u64)current->fsgid,
- 0 /* dev_t */,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ args.uid = (__u64)current->fsuid;
+ if (inode->i_mode & S_ISGID)
+ args.gid = (__u64)inode->i_gid;
+ else
+ args.gid = (__u64)current->fsgid;
} else {
- CIFSSMBUnixSetPerms(xid, pTcon, full_path,
- mode, (__u64)-1,
- (__u64)-1, 0 /* dev_t */,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ args.uid = NO_CHANGE_64;
+ args.gid = NO_CHANGE_64;
}
+ CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
+ cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
} else {
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
(mode & S_IWUGO) == 0) {
FILE_BASIC_INFO pInfo;
memset(&pInfo, 0, sizeof(pInfo));
pInfo.Attributes = cpu_to_le32(ATTR_READONLY);
- CIFSSMBSetTimes(xid, pTcon, full_path,
+ CIFSSMBSetPathInfo(xid, pTcon, full_path,
&pInfo, cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -1024,8 +1034,12 @@ mkdir_get_info:
CIFS_MOUNT_SET_UID) {
direntry->d_inode->i_uid =
current->fsuid;
- direntry->d_inode->i_gid =
- current->fsgid;
+ if (inode->i_mode & S_ISGID)
+ direntry->d_inode->i_gid =
+ inode->i_gid;
+ else
+ direntry->d_inode->i_gid =
+ current->fsgid;
}
}
}
@@ -1310,10 +1324,11 @@ int cifs_revalidate(struct dentry *direntry)
/* if (S_ISDIR(direntry->d_inode->i_mode))
shrink_dcache_parent(direntry); */
if (S_ISREG(direntry->d_inode->i_mode)) {
- if (direntry->d_inode->i_mapping)
+ if (direntry->d_inode->i_mapping) {
wbrc = filemap_fdatawait(direntry->d_inode->i_mapping);
if (wbrc)
CIFS_I(direntry->d_inode)->write_behind_rc = wbrc;
+ }
/* may eventually have to do this for open files too */
if (list_empty(&(cifsInode->openFileList))) {
/* changed on server - flush read ahead pages */
@@ -1413,31 +1428,304 @@ out_busy:
return -ETXTBSY;
}
-int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
+static int
+cifs_set_file_size(struct inode *inode, struct iattr *attrs,
+ int xid, char *full_path)
{
+ int rc;
+ struct cifsFileInfo *open_file;
+ struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifsTconInfo *pTcon = cifs_sb->tcon;
+
+ /*
+ * To avoid spurious oplock breaks from server, in the case of
+ * inodes that we already have open, avoid doing path based
+ * setting of file size if we can do it by handle.
+ * This keeps our caching token (oplock) and avoids timeouts
+ * when the local oplock break takes longer to flush
+ * writebehind data than the SMB timeout for the SetPathInfo
+ * request would allow
+ */
+ open_file = find_writable_file(cifsInode);
+ if (open_file) {
+ __u16 nfid = open_file->netfid;
+ __u32 npid = open_file->pid;
+ rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid,
+ npid, false);
+ atomic_dec(&open_file->wrtPending);
+ cFYI(1, ("SetFSize for attrs rc = %d", rc));
+ if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
+ unsigned int bytes_written;
+ rc = CIFSSMBWrite(xid, pTcon, nfid, 0, attrs->ia_size,
+ &bytes_written, NULL, NULL, 1);
+ cFYI(1, ("Wrt seteof rc %d", rc));
+ }
+ } else
+ rc = -EINVAL;
+
+ if (rc != 0) {
+ /* Set file size by pathname rather than by handle
+ either because no valid, writeable file handle for
+ it was found or because there was an error setting
+ it by handle */
+ rc = CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size,
+ false, cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+ cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc));
+ if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
+ __u16 netfid;
+ int oplock = 0;
+
+ rc = SMBLegacyOpen(xid, pTcon, full_path,
+ FILE_OPEN, GENERIC_WRITE,
+ CREATE_NOT_DIR, &netfid, &oplock, NULL,
+ cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (rc == 0) {
+ unsigned int bytes_written;
+ rc = CIFSSMBWrite(xid, pTcon, netfid, 0,
+ attrs->ia_size,
+ &bytes_written, NULL,
+ NULL, 1);
+ cFYI(1, ("wrt seteof rc %d", rc));
+ CIFSSMBClose(xid, pTcon, netfid);
+ }
+ }
+ }
+
+ if (rc == 0) {
+ rc = cifs_vmtruncate(inode, attrs->ia_size);
+ cifs_truncate_page(inode->i_mapping, inode->i_size);
+ }
+
+ return rc;
+}
+
+static int
+cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid,
+ char *full_path, __u32 dosattr)
+{
+ int rc;
+ int oplock = 0;
+ __u16 netfid;
+ __u32 netpid;
+ bool set_time = false;
+ struct cifsFileInfo *open_file;
+ struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifsTconInfo *pTcon = cifs_sb->tcon;
+ FILE_BASIC_INFO info_buf;
+
+ if (attrs->ia_valid & ATTR_ATIME) {
+ set_time = true;
+ info_buf.LastAccessTime =
+ cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime));
+ } else
+ info_buf.LastAccessTime = 0;
+
+ if (attrs->ia_valid & ATTR_MTIME) {
+ set_time = true;
+ info_buf.LastWriteTime =
+ cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
+ } else
+ info_buf.LastWriteTime = 0;
+
+ /*
+ * Samba throws this field away, but windows may actually use it.
+ * Do not set ctime unless other time stamps are changed explicitly
+ * (i.e. by utimes()) since we would then have a mix of client and
+ * server times.
+ */
+ if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
+ cFYI(1, ("CIFS - CTIME changed"));
+ info_buf.ChangeTime =
+ cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
+ } else
+ info_buf.ChangeTime = 0;
+
+ info_buf.CreationTime = 0; /* don't change */
+ info_buf.Attributes = cpu_to_le32(dosattr);
+
+ /*
+ * If the file is already open for write, just use that fileid
+ */
+ open_file = find_writable_file(cifsInode);
+ if (open_file) {
+ netfid = open_file->netfid;
+ netpid = open_file->pid;
+ goto set_via_filehandle;
+ }
+
+ /*
+ * NT4 apparently returns success on this call, but it doesn't
+ * really work.
+ */
+ if (!(pTcon->ses->flags & CIFS_SES_NT4)) {
+ rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
+ &info_buf, cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (rc != -EOPNOTSUPP && rc != -EINVAL)
+ goto out;
+ }
+
+ cFYI(1, ("calling SetFileInfo since SetPathInfo for "
+ "times not supported by this server"));
+ rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
+ SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
+ CREATE_NOT_DIR, &netfid, &oplock,
+ NULL, cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+ if (rc != 0) {
+ if (rc == -EIO)
+ rc = -EINVAL;
+ goto out;
+ }
+
+ netpid = current->tgid;
+
+set_via_filehandle:
+ rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid);
+ if (open_file == NULL)
+ CIFSSMBClose(xid, pTcon, netfid);
+ else
+ atomic_dec(&open_file->wrtPending);
+out:
+ return rc;
+}
+
+static int
+cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
+{
+ int rc;
int xid;
- struct cifs_sb_info *cifs_sb;
- struct cifsTconInfo *pTcon;
char *full_path = NULL;
- int rc = -EACCES;
- struct cifsFileInfo *open_file = NULL;
- FILE_BASIC_INFO time_buf;
- bool set_time = false;
- bool set_dosattr = false;
- __u64 mode = 0xFFFFFFFFFFFFFFFFULL;
- __u64 uid = 0xFFFFFFFFFFFFFFFFULL;
- __u64 gid = 0xFFFFFFFFFFFFFFFFULL;
- struct cifsInodeInfo *cifsInode;
struct inode *inode = direntry->d_inode;
+ struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifsTconInfo *pTcon = cifs_sb->tcon;
+ struct cifs_unix_set_info_args *args = NULL;
+
+ cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x",
+ direntry->d_name.name, attrs->ia_valid));
+
+ xid = GetXid();
+
+ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
+ /* check if we have permission to change attrs */
+ rc = inode_change_ok(inode, attrs);
+ if (rc < 0)
+ goto out;
+ else
+ rc = 0;
+ }
+
+ full_path = build_path_from_dentry(direntry);
+ if (full_path == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
+ /*
+ Flush data before changing file size or changing the last
+ write time of the file on the server. If the
+ flush returns error, store it to report later and continue.
+ BB: This should be smarter. Why bother flushing pages that
+ will be truncated anyway? Also, should we error out here if
+ the flush returns error?
+ */
+ rc = filemap_write_and_wait(inode->i_mapping);
+ if (rc != 0) {
+ cifsInode->write_behind_rc = rc;
+ rc = 0;
+ }
+ }
+
+ if (attrs->ia_valid & ATTR_SIZE) {
+ rc = cifs_set_file_size(inode, attrs, xid, full_path);
+ if (rc != 0)
+ goto out;
+ }
+
+ /* skip mode change if it's just for clearing setuid/setgid */
+ if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
+ attrs->ia_valid &= ~ATTR_MODE;
+
+ args = kmalloc(sizeof(*args), GFP_KERNEL);
+ if (args == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* set up the struct */
+ if (attrs->ia_valid & ATTR_MODE)
+ args->mode = attrs->ia_mode;
+ else
+ args->mode = NO_CHANGE_64;
+
+ if (attrs->ia_valid & ATTR_UID)
+ args->uid = attrs->ia_uid;
+ else
+ args->uid = NO_CHANGE_64;
+
+ if (attrs->ia_valid & ATTR_GID)
+ args->gid = attrs->ia_gid;
+ else
+ args->gid = NO_CHANGE_64;
+
+ if (attrs->ia_valid & ATTR_ATIME)
+ args->atime = cifs_UnixTimeToNT(attrs->ia_atime);
+ else
+ args->atime = NO_CHANGE_64;
+
+ if (attrs->ia_valid & ATTR_MTIME)
+ args->mtime = cifs_UnixTimeToNT(attrs->ia_mtime);
+ else
+ args->mtime = NO_CHANGE_64;
+
+ if (attrs->ia_valid & ATTR_CTIME)
+ args->ctime = cifs_UnixTimeToNT(attrs->ia_ctime);
+ else
+ args->ctime = NO_CHANGE_64;
+
+ args->device = 0;
+ rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, args,
+ cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+ if (!rc)
+ rc = inode_setattr(inode, attrs);
+out:
+ kfree(args);
+ kfree(full_path);
+ FreeXid(xid);
+ return rc;
+}
+
+static int
+cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
+{
+ int xid;
+ struct inode *inode = direntry->d_inode;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+ char *full_path = NULL;
+ int rc = -EACCES;
+ __u32 dosattr = 0;
+ __u64 mode = NO_CHANGE_64;
xid = GetXid();
cFYI(1, ("setattr on file %s attrs->iavalid 0x%x",
direntry->d_name.name, attrs->ia_valid));
- cifs_sb = CIFS_SB(inode->i_sb);
- pTcon = cifs_sb->tcon;
-
if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
/* check if we have permission to change attrs */
rc = inode_change_ok(inode, attrs);
@@ -1453,7 +1741,6 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
FreeXid(xid);
return -ENOMEM;
}
- cifsInode = CIFS_I(inode);
if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
/*
@@ -1472,78 +1759,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
}
if (attrs->ia_valid & ATTR_SIZE) {
- /* To avoid spurious oplock breaks from server, in the case of
- inodes that we already have open, avoid doing path based
- setting of file size if we can do it by handle.
- This keeps our caching token (oplock) and avoids timeouts
- when the local oplock break takes longer to flush
- writebehind data than the SMB timeout for the SetPathInfo
- request would allow */
-
- open_file = find_writable_file(cifsInode);
- if (open_file) {
- __u16 nfid = open_file->netfid;
- __u32 npid = open_file->pid;
- rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size,
- nfid, npid, false);
- atomic_dec(&open_file->wrtPending);
- cFYI(1, ("SetFSize for attrs rc = %d", rc));
- if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
- unsigned int bytes_written;
- rc = CIFSSMBWrite(xid, pTcon,
- nfid, 0, attrs->ia_size,
- &bytes_written, NULL, NULL,
- 1 /* 45 seconds */);
- cFYI(1, ("Wrt seteof rc %d", rc));
- }
- } else
- rc = -EINVAL;
-
- if (rc != 0) {
- /* Set file size by pathname rather than by handle
- either because no valid, writeable file handle for
- it was found or because there was an error setting
- it by handle */
- rc = CIFSSMBSetEOF(xid, pTcon, full_path,
- attrs->ia_size, false,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc));
- if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
- __u16 netfid;
- int oplock = 0;
-
- rc = SMBLegacyOpen(xid, pTcon, full_path,
- FILE_OPEN, GENERIC_WRITE,
- CREATE_NOT_DIR, &netfid, &oplock,
- NULL, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- if (rc == 0) {
- unsigned int bytes_written;
- rc = CIFSSMBWrite(xid, pTcon,
- netfid, 0,
- attrs->ia_size,
- &bytes_written, NULL,
- NULL, 1 /* 45 sec */);
- cFYI(1, ("wrt seteof rc %d", rc));
- CIFSSMBClose(xid, pTcon, netfid);
- }
-
- }
- }
-
- /* Server is ok setting allocation size implicitly - no need
- to call:
- CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, true,
- cifs_sb->local_nls);
- */
-
- if (rc == 0) {
- rc = cifs_vmtruncate(inode, attrs->ia_size);
- cifs_truncate_page(inode->i_mapping, inode->i_size);
- } else
+ rc = cifs_set_file_size(inode, attrs, xid, full_path);
+ if (rc != 0)
goto cifs_setattr_exit;
}
@@ -1554,21 +1771,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
* CIFSACL support + proper Windows to Unix idmapping, we may be
* able to support this in the future.
*/
- if (!pTcon->unix_ext &&
- !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) {
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID))
attrs->ia_valid &= ~(ATTR_UID | ATTR_GID);
- } else {
- if (attrs->ia_valid & ATTR_UID) {
- cFYI(1, ("UID changed to %d", attrs->ia_uid));
- uid = attrs->ia_uid;
- }
- if (attrs->ia_valid & ATTR_GID) {
- cFYI(1, ("GID changed to %d", attrs->ia_gid));
- gid = attrs->ia_gid;
- }
- }
-
- time_buf.Attributes = 0;
/* skip mode change if it's just for clearing setuid/setgid */
if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
@@ -1579,13 +1783,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
mode = attrs->ia_mode;
}
- if ((pTcon->unix_ext)
- && (attrs->ia_valid & (ATTR_MODE | ATTR_GID | ATTR_UID)))
- rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, uid, gid,
- 0 /* dev_t */, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- else if (attrs->ia_valid & ATTR_MODE) {
+ if (attrs->ia_valid & ATTR_MODE) {
rc = 0;
#ifdef CONFIG_CIFS_EXPERIMENTAL
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
@@ -1594,24 +1792,19 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
#endif
if (((mode & S_IWUGO) == 0) &&
(cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
- set_dosattr = true;
- time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs |
- ATTR_READONLY);
+
+ dosattr = cifsInode->cifsAttrs | ATTR_READONLY;
+
/* fix up mode if we're not using dynperm */
if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
attrs->ia_mode = inode->i_mode & ~S_IWUGO;
} else if ((mode & S_IWUGO) &&
(cifsInode->cifsAttrs & ATTR_READONLY)) {
- /* If file is readonly on server, we would
- not be able to write to it - so if any write
- bit is enabled for user or group or other we
- need to at least try to remove r/o dos attr */
- set_dosattr = true;
- time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs &
- (~ATTR_READONLY));
- /* Windows ignores set to zero */
- if (time_buf.Attributes == 0)
- time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL);
+
+ dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY;
+ /* Attributes of 0 are ignored */
+ if (dosattr == 0)
+ dosattr |= ATTR_NORMAL;
/* reset local inode permissions to normal */
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) {
@@ -1629,82 +1822,18 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
}
}
- if (attrs->ia_valid & ATTR_ATIME) {
- set_time = true;
- time_buf.LastAccessTime =
- cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime));
- } else
- time_buf.LastAccessTime = 0;
-
- if (attrs->ia_valid & ATTR_MTIME) {
- set_time = true;
- time_buf.LastWriteTime =
- cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
- } else
- time_buf.LastWriteTime = 0;
- /* Do not set ctime explicitly unless other time
- stamps are changed explicitly (i.e. by utime()
- since we would then have a mix of client and
- server times */
-
- if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
- set_time = true;
- /* Although Samba throws this field away
- it may be useful to Windows - but we do
- not want to set ctime unless some other
- timestamp is changing */
- cFYI(1, ("CIFS - CTIME changed"));
- time_buf.ChangeTime =
- cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
- } else
- time_buf.ChangeTime = 0;
-
- if (set_time || set_dosattr) {
- time_buf.CreationTime = 0; /* do not change */
- /* In the future we should experiment - try setting timestamps
- via Handle (SetFileInfo) instead of by path */
- if (!(pTcon->ses->flags & CIFS_SES_NT4))
- rc = CIFSSMBSetTimes(xid, pTcon, full_path, &time_buf,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- else
- rc = -EOPNOTSUPP;
+ if (attrs->ia_valid & (ATTR_MTIME|ATTR_ATIME|ATTR_CTIME) ||
+ ((attrs->ia_valid & ATTR_MODE) && dosattr)) {
+ rc = cifs_set_file_info(inode, attrs, xid, full_path, dosattr);
+ /* BB: check for rc = -EOPNOTSUPP and switch to legacy mode */
- if (rc == -EOPNOTSUPP) {
- int oplock = 0;
- __u16 netfid;
-
- cFYI(1, ("calling SetFileInfo since SetPathInfo for "
- "times not supported by this server"));
- /* BB we could scan to see if we already have it open
- and pass in pid of opener to function */
- rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
- SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
- CREATE_NOT_DIR, &netfid, &oplock,
- NULL, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- if (rc == 0) {
- rc = CIFSSMBSetFileTimes(xid, pTcon, &time_buf,
- netfid);
- CIFSSMBClose(xid, pTcon, netfid);
- } else {
- /* BB For even older servers we could convert time_buf
- into old DOS style which uses two second
- granularity */
-
- /* rc = CIFSSMBSetTimesLegacy(xid, pTcon, full_path,
- &time_buf, cifs_sb->local_nls); */
- }
- }
/* Even if error on time set, no sense failing the call if
the server would set the time to a reasonable value anyway,
and this check ensures that we are not being called from
sys_utimes in which case we ought to fail the call back to
the user when the server rejects the call */
if ((rc) && (attrs->ia_valid &
- (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE)))
+ (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE)))
rc = 0;
}
@@ -1718,6 +1847,21 @@ cifs_setattr_exit:
return rc;
}
+int
+cifs_setattr(struct dentry *direntry, struct iattr *attrs)
+{
+ struct inode *inode = direntry->d_inode;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifsTconInfo *pTcon = cifs_sb->tcon;
+
+ if (pTcon->unix_ext)
+ return cifs_setattr_unix(direntry, attrs);
+
+ return cifs_setattr_nounix(direntry, attrs);
+
+ /* BB: add cifs_setattr_legacy for really old servers */
+}
+
#if 0
void cifs_delete_inode(struct inode *inode)
{
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 83f30695488..5f40ed3473f 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -690,6 +690,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
else
cifs_buf_release(cifsFile->srch_inf.
ntwrk_buf_start);
+ cifsFile->srch_inf.ntwrk_buf_start = NULL;
}
rc = initiate_cifs_search(xid, file);
if (rc) {
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 000ac509c98..e286db9f5ee 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -265,6 +265,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
cFYI(1, ("Sending smb: total_len %d", total_len));
dump_smb(smb_buffer, len);
+ i = 0;
while (total_len) {
rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec],
n_vec - first_vec, total_len);
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index e1c854890f9..bf4a3fd3c8e 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -28,11 +28,9 @@ int coda_fake_statfs;
char * coda_f2s(struct CodaFid *f)
{
static char s[60];
-#ifdef CONFIG_CODA_FS_OLD_API
- sprintf(s, "(%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2]);
-#else
+
sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]);
-#endif
+
return s;
}
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 3d2580e00a3..c5916228243 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -137,9 +137,11 @@ exit:
}
-int coda_permission(struct inode *inode, int mask, struct nameidata *nd)
+int coda_permission(struct inode *inode, int mask)
{
int error = 0;
+
+ mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
if (!mask)
return 0;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 2f58dfc7008..830f51abb97 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -58,7 +58,7 @@ static void coda_destroy_inode(struct inode *inode)
kmem_cache_free(coda_inode_cachep, ITOC(inode));
}
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
{
struct coda_inode_info *ei = (struct coda_inode_info *) foo;
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index c21a1f552a6..c51365422aa 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -24,8 +24,7 @@
#include <linux/coda_psdev.h>
/* pioctl ops */
-static int coda_ioctl_permission(struct inode *inode, int mask,
- struct nameidata *nd);
+static int coda_ioctl_permission(struct inode *inode, int mask);
static int coda_pioctl(struct inode * inode, struct file * filp,
unsigned int cmd, unsigned long user_data);
@@ -42,8 +41,7 @@ const struct file_operations coda_ioctl_operations = {
};
/* the coda pioctl inode ops */
-static int coda_ioctl_permission(struct inode *inode, int mask,
- struct nameidata *nd)
+static int coda_ioctl_permission(struct inode *inode, int mask)
{
return 0;
}
@@ -51,7 +49,7 @@ static int coda_ioctl_permission(struct inode *inode, int mask,
static int coda_pioctl(struct inode * inode, struct file * filp,
unsigned int cmd, unsigned long user_data)
{
- struct nameidata nd;
+ struct path path;
int error;
struct PioctlData data;
struct inode *target_inode = NULL;
@@ -66,21 +64,21 @@ static int coda_pioctl(struct inode * inode, struct file * filp,
* Look up the pathname. Note that the pathname is in
* user memory, and namei takes care of this
*/
- if ( data.follow ) {
- error = user_path_walk(data.path, &nd);
+ if (data.follow) {
+ error = user_path(data.path, &path);
} else {
- error = user_path_walk_link(data.path, &nd);
+ error = user_lpath(data.path, &path);
}
if ( error ) {
return error;
} else {
- target_inode = nd.path.dentry->d_inode;
+ target_inode = path.dentry->d_inode;
}
/* return if it is not a Coda inode */
if ( target_inode->i_sb != inode->i_sb ) {
- path_put(&nd.path);
+ path_put(&path);
return -EINVAL;
}
@@ -89,7 +87,7 @@ static int coda_pioctl(struct inode * inode, struct file * filp,
error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data);
- path_put(&nd.path);
+ path_put(&path);
return error;
}
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index e3eb3556622..0d9b80ec689 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -362,8 +362,9 @@ static int init_coda_psdev(void)
goto out_chrdev;
}
for (i = 0; i < MAX_CODADEVS; i++)
- device_create(coda_psdev_class, NULL,
- MKDEV(CODA_PSDEV_MAJOR,i), "cfs%d", i);
+ device_create_drvdata(coda_psdev_class, NULL,
+ MKDEV(CODA_PSDEV_MAJOR, i),
+ NULL, "cfs%d", i);
coda_sysctl_init();
goto out;
@@ -377,11 +378,7 @@ MODULE_AUTHOR("Jan Harkes, Peter J. Braam");
MODULE_DESCRIPTION("Coda Distributed File System VFS interface");
MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR);
MODULE_LICENSE("GPL");
-#ifdef CONFIG_CODA_FS_OLD_API
-MODULE_VERSION("5.3.21");
-#else
MODULE_VERSION("6.6");
-#endif
static int __init init_coda(void)
{
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 359e531094d..ce432bca95d 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -52,12 +52,8 @@ static void *alloc_upcall(int opcode, int size)
inp->ih.opcode = opcode;
inp->ih.pid = current->pid;
inp->ih.pgid = task_pgrp_nr(current);
-#ifdef CONFIG_CODA_FS_OLD_API
- memset(&inp->ih.cred, 0, sizeof(struct coda_cred));
- inp->ih.cred.cr_fsuid = current->fsuid;
-#else
inp->ih.uid = current->fsuid;
-#endif
+
return (void*)inp;
}
@@ -166,20 +162,11 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
union inputArgs *inp;
union outputArgs *outp;
int insize, outsize, error;
-#ifdef CONFIG_CODA_FS_OLD_API
- struct coda_cred cred = { 0, };
- cred.cr_fsuid = uid;
-#endif
insize = SIZE(release);
UPARG(CODA_CLOSE);
-#ifdef CONFIG_CODA_FS_OLD_API
- memcpy(&(inp->ih.cred), &cred, sizeof(cred));
-#else
inp->ih.uid = uid;
-#endif
-
inp->coda_close.VFid = *fid;
inp->coda_close.flags = flags;
diff --git a/fs/compat.c b/fs/compat.c
index ed43e17a5dc..c9d1472e65c 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -197,8 +197,8 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
{
if (sizeof ubuf->f_blocks == 4) {
- if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail) &
- 0xffffffff00000000ULL)
+ if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail |
+ kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
return -EOVERFLOW;
/* f_files and f_ffree may be -1; it's okay
* to stuff that into 32 bits */
@@ -234,18 +234,18 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
* The following statfs calls are copies of code from fs/open.c and
* should be checked against those from time to time
*/
-asmlinkage long compat_sys_statfs(const char __user *path, struct compat_statfs __user *buf)
+asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
{
- struct nameidata nd;
+ struct path path;
int error;
- error = user_path_walk(path, &nd);
+ error = user_path(pathname, &path);
if (!error) {
struct kstatfs tmp;
- error = vfs_statfs(nd.path.dentry, &tmp);
+ error = vfs_statfs(path.dentry, &tmp);
if (!error)
error = put_compat_statfs(buf, &tmp);
- path_put(&nd.path);
+ path_put(&path);
}
return error;
}
@@ -271,8 +271,8 @@ out:
static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf)
{
if (sizeof ubuf->f_blocks == 4) {
- if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail) &
- 0xffffffff00000000ULL)
+ if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail |
+ kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
return -EOVERFLOW;
/* f_files and f_ffree may be -1; it's okay
* to stuff that into 32 bits */
@@ -299,21 +299,21 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
return 0;
}
-asmlinkage long compat_sys_statfs64(const char __user *path, compat_size_t sz, struct compat_statfs64 __user *buf)
+asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
{
- struct nameidata nd;
+ struct path path;
int error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = user_path_walk(path, &nd);
+ error = user_path(pathname, &path);
if (!error) {
struct kstatfs tmp;
- error = vfs_statfs(nd.path.dentry, &tmp);
+ error = vfs_statfs(path.dentry, &tmp);
if (!error)
error = put_compat_statfs64(buf, &tmp);
- path_put(&nd.path);
+ path_put(&path);
}
return error;
}
@@ -2131,9 +2131,9 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
#ifdef CONFIG_SIGNALFD
-asmlinkage long compat_sys_signalfd(int ufd,
- const compat_sigset_t __user *sigmask,
- compat_size_t sigsetsize)
+asmlinkage long compat_sys_signalfd4(int ufd,
+ const compat_sigset_t __user *sigmask,
+ compat_size_t sigsetsize, int flags)
{
compat_sigset_t ss32;
sigset_t tmp;
@@ -2148,9 +2148,15 @@ asmlinkage long compat_sys_signalfd(int ufd,
if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t)))
return -EFAULT;
- return sys_signalfd(ufd, ksigmask, sizeof(sigset_t));
+ return sys_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags);
}
+asmlinkage long compat_sys_signalfd(int ufd,
+ const compat_sigset_t __user *sigmask,
+ compat_size_t sigsetsize)
+{
+ return compat_sys_signalfd4(ufd, sigmask, sigsetsize, 0);
+}
#endif /* CONFIG_SIGNALFD */
#ifdef CONFIG_TIMERFD
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 7b3a03c7c6a..5235c67e759 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -25,7 +25,6 @@
#include <linux/slab.h>
#include <linux/raid/md.h>
#include <linux/kd.h>
-#include <linux/dirent.h>
#include <linux/route.h>
#include <linux/in6.h>
#include <linux/ipv6_route.h>
@@ -2297,8 +2296,6 @@ COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI)
COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER)
-COMPATIBLE_IOCTL(AUTOFS_IOC_ASKREGHOST)
-COMPATIBLE_IOCTL(AUTOFS_IOC_TOGGLEREGHOST)
COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT)
/* Raw devices */
COMPATIBLE_IOCTL(RAW_SETBIND)
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index da015c12e3e..762d287123c 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -49,8 +49,10 @@ struct configfs_dirent {
#define CONFIGFS_USET_DEFAULT 0x0080
#define CONFIGFS_USET_DROPPING 0x0100
#define CONFIGFS_USET_IN_MKDIR 0x0200
+#define CONFIGFS_USET_CREATING 0x0400
#define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR)
+extern struct mutex configfs_symlink_mutex;
extern spinlock_t configfs_dirent_lock;
extern struct vfsmount * configfs_mount;
@@ -66,6 +68,7 @@ extern void configfs_inode_exit(void);
extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
extern int configfs_make_dirent(struct configfs_dirent *,
struct dentry *, void *, umode_t, int);
+extern int configfs_dirent_is_ready(struct configfs_dirent *);
extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int);
extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 179589be063..7a8db78a91d 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -185,7 +185,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
if (!error)
error = configfs_make_dirent(p->d_fsdata, d, k, mode,
- CONFIGFS_DIR);
+ CONFIGFS_DIR | CONFIGFS_USET_CREATING);
if (!error) {
error = configfs_create(d, mode, init_dir);
if (!error) {
@@ -209,6 +209,9 @@ static int create_dir(struct config_item * k, struct dentry * p,
* configfs_create_dir - create a directory for an config_item.
* @item: config_itemwe're creating directory for.
* @dentry: config_item's dentry.
+ *
+ * Note: user-created entries won't be allowed under this new directory
+ * until it is validated by configfs_dir_set_ready()
*/
static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
@@ -231,6 +234,44 @@ static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
return error;
}
+/*
+ * Allow userspace to create new entries under a new directory created with
+ * configfs_create_dir(), and under all of its chidlren directories recursively.
+ * @sd configfs_dirent of the new directory to validate
+ *
+ * Caller must hold configfs_dirent_lock.
+ */
+static void configfs_dir_set_ready(struct configfs_dirent *sd)
+{
+ struct configfs_dirent *child_sd;
+
+ sd->s_type &= ~CONFIGFS_USET_CREATING;
+ list_for_each_entry(child_sd, &sd->s_children, s_sibling)
+ if (child_sd->s_type & CONFIGFS_USET_CREATING)
+ configfs_dir_set_ready(child_sd);
+}
+
+/*
+ * Check that a directory does not belong to a directory hierarchy being
+ * attached and not validated yet.
+ * @sd configfs_dirent of the directory to check
+ *
+ * @return non-zero iff the directory was validated
+ *
+ * Note: takes configfs_dirent_lock, so the result may change from false to true
+ * in two consecutive calls, but never from true to false.
+ */
+int configfs_dirent_is_ready(struct configfs_dirent *sd)
+{
+ int ret;
+
+ spin_lock(&configfs_dirent_lock);
+ ret = !(sd->s_type & CONFIGFS_USET_CREATING);
+ spin_unlock(&configfs_dirent_lock);
+
+ return ret;
+}
+
int configfs_create_link(struct configfs_symlink *sl,
struct dentry *parent,
struct dentry *dentry)
@@ -283,6 +324,8 @@ static void remove_dir(struct dentry * d)
* The only thing special about this is that we remove any files in
* the directory before we remove the directory, and we've inlined
* what used to be configfs_rmdir() below, instead of calling separately.
+ *
+ * Caller holds the mutex of the item's inode
*/
static void configfs_remove_dir(struct config_item * item)
@@ -330,7 +373,19 @@ static struct dentry * configfs_lookup(struct inode *dir,
struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
struct configfs_dirent * sd;
int found = 0;
- int err = 0;
+ int err;
+
+ /*
+ * Fake invisibility if dir belongs to a group/default groups hierarchy
+ * being attached
+ *
+ * This forbids userspace to read/write attributes of items which may
+ * not complete their initialization, since the dentries of the
+ * attributes won't be instantiated.
+ */
+ err = -ENOENT;
+ if (!configfs_dirent_is_ready(parent_sd))
+ goto out;
list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
if (sd->s_type & CONFIGFS_NOT_PINNED) {
@@ -353,6 +408,7 @@ static struct dentry * configfs_lookup(struct inode *dir,
return simple_lookup(dir, dentry, nd);
}
+out:
return ERR_PTR(err);
}
@@ -370,13 +426,17 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
struct configfs_dirent *sd;
int ret;
+ /* Mark that we're trying to drop the group */
+ parent_sd->s_type |= CONFIGFS_USET_DROPPING;
+
ret = -EBUSY;
if (!list_empty(&parent_sd->s_links))
goto out;
ret = 0;
list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
- if (sd->s_type & CONFIGFS_NOT_PINNED)
+ if (!sd->s_element ||
+ (sd->s_type & CONFIGFS_NOT_PINNED))
continue;
if (sd->s_type & CONFIGFS_USET_DEFAULT) {
/* Abort if racing with mkdir() */
@@ -385,8 +445,6 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
*wait_mutex = &sd->s_dentry->d_inode->i_mutex;
return -EAGAIN;
}
- /* Mark that we're trying to drop the group */
- sd->s_type |= CONFIGFS_USET_DROPPING;
/*
* Yup, recursive. If there's a problem, blame
@@ -414,12 +472,11 @@ static void configfs_detach_rollback(struct dentry *dentry)
struct configfs_dirent *parent_sd = dentry->d_fsdata;
struct configfs_dirent *sd;
- list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
- if (sd->s_type & CONFIGFS_USET_DEFAULT) {
+ parent_sd->s_type &= ~CONFIGFS_USET_DROPPING;
+
+ list_for_each_entry(sd, &parent_sd->s_children, s_sibling)
+ if (sd->s_type & CONFIGFS_USET_DEFAULT)
configfs_detach_rollback(sd->s_dentry);
- sd->s_type &= ~CONFIGFS_USET_DROPPING;
- }
- }
}
static void detach_attrs(struct config_item * item)
@@ -558,36 +615,21 @@ static int create_default_group(struct config_group *parent_group,
static int populate_groups(struct config_group *group)
{
struct config_group *new_group;
- struct dentry *dentry = group->cg_item.ci_dentry;
int ret = 0;
int i;
if (group->default_groups) {
- /*
- * FYI, we're faking mkdir here
- * I'm not sure we need this semaphore, as we're called
- * from our parent's mkdir. That holds our parent's
- * i_mutex, so afaik lookup cannot continue through our
- * parent to find us, let alone mess with our tree.
- * That said, taking our i_mutex is closer to mkdir
- * emulation, and shouldn't hurt.
- */
- mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
-
for (i = 0; group->default_groups[i]; i++) {
new_group = group->default_groups[i];
ret = create_default_group(group, new_group);
- if (ret)
+ if (ret) {
+ detach_groups(group);
break;
+ }
}
-
- mutex_unlock(&dentry->d_inode->i_mutex);
}
- if (ret)
- detach_groups(group);
-
return ret;
}
@@ -702,7 +744,15 @@ static int configfs_attach_item(struct config_item *parent_item,
if (!ret) {
ret = populate_attrs(item);
if (ret) {
+ /*
+ * We are going to remove an inode and its dentry but
+ * the VFS may already have hit and used them. Thus,
+ * we must lock them as rmdir() would.
+ */
+ mutex_lock(&dentry->d_inode->i_mutex);
configfs_remove_dir(item);
+ dentry->d_inode->i_flags |= S_DEAD;
+ mutex_unlock(&dentry->d_inode->i_mutex);
d_delete(dentry);
}
}
@@ -710,6 +760,7 @@ static int configfs_attach_item(struct config_item *parent_item,
return ret;
}
+/* Caller holds the mutex of the item's inode */
static void configfs_detach_item(struct config_item *item)
{
detach_attrs(item);
@@ -728,16 +779,30 @@ static int configfs_attach_group(struct config_item *parent_item,
sd = dentry->d_fsdata;
sd->s_type |= CONFIGFS_USET_DIR;
+ /*
+ * FYI, we're faking mkdir in populate_groups()
+ * We must lock the group's inode to avoid races with the VFS
+ * which can already hit the inode and try to add/remove entries
+ * under it.
+ *
+ * We must also lock the inode to remove it safely in case of
+ * error, as rmdir() would.
+ */
+ mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
ret = populate_groups(to_config_group(item));
if (ret) {
configfs_detach_item(item);
- d_delete(dentry);
+ dentry->d_inode->i_flags |= S_DEAD;
}
+ mutex_unlock(&dentry->d_inode->i_mutex);
+ if (ret)
+ d_delete(dentry);
}
return ret;
}
+/* Caller holds the mutex of the group's inode */
static void configfs_detach_group(struct config_item *item)
{
detach_groups(to_config_group(item));
@@ -1035,7 +1100,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct configfs_subsystem *subsys;
struct configfs_dirent *sd;
struct config_item_type *type;
- struct module *owner = NULL;
+ struct module *subsys_owner = NULL, *new_item_owner = NULL;
char *name;
if (dentry->d_parent == configfs_sb->s_root) {
@@ -1044,6 +1109,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
}
sd = dentry->d_parent->d_fsdata;
+
+ /*
+ * Fake invisibility if dir belongs to a group/default groups hierarchy
+ * being attached
+ */
+ if (!configfs_dirent_is_ready(sd)) {
+ ret = -ENOENT;
+ goto out;
+ }
+
if (!(sd->s_type & CONFIGFS_USET_DIR)) {
ret = -EPERM;
goto out;
@@ -1062,10 +1137,25 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
goto out_put;
}
+ /*
+ * The subsystem may belong to a different module than the item
+ * being created. We don't want to safely pin the new item but
+ * fail to pin the subsystem it sits under.
+ */
+ if (!subsys->su_group.cg_item.ci_type) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+ subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
+ if (!try_module_get(subsys_owner)) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+
name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
if (!name) {
ret = -ENOMEM;
- goto out_put;
+ goto out_subsys_put;
}
snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
@@ -1094,10 +1184,10 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
kfree(name);
if (ret) {
/*
- * If item == NULL, then link_obj() was never called.
+ * If ret != 0, then link_obj() was never called.
* There are no extra references to clean up.
*/
- goto out_put;
+ goto out_subsys_put;
}
/*
@@ -1111,8 +1201,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
goto out_unlink;
}
- owner = type->ct_owner;
- if (!try_module_get(owner)) {
+ new_item_owner = type->ct_owner;
+ if (!try_module_get(new_item_owner)) {
ret = -EINVAL;
goto out_unlink;
}
@@ -1142,6 +1232,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
spin_lock(&configfs_dirent_lock);
sd->s_type &= ~CONFIGFS_USET_IN_MKDIR;
+ if (!ret)
+ configfs_dir_set_ready(dentry->d_fsdata);
spin_unlock(&configfs_dirent_lock);
out_unlink:
@@ -1159,9 +1251,13 @@ out_unlink:
mutex_unlock(&subsys->su_mutex);
if (module_got)
- module_put(owner);
+ module_put(new_item_owner);
}
+out_subsys_put:
+ if (ret)
+ module_put(subsys_owner);
+
out_put:
/*
* link_obj()/link_group() took a reference from child->parent,
@@ -1180,7 +1276,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
struct config_item *item;
struct configfs_subsystem *subsys;
struct configfs_dirent *sd;
- struct module *owner = NULL;
+ struct module *subsys_owner = NULL, *dead_item_owner = NULL;
int ret;
if (dentry->d_parent == configfs_sb->s_root)
@@ -1207,6 +1303,15 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
return -EINVAL;
}
+ /* configfs_mkdir() shouldn't have allowed this */
+ BUG_ON(!subsys->su_group.cg_item.ci_type);
+ subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
+
+ /*
+ * Ensure that no racing symlink() will make detach_prep() fail while
+ * the new link is temporarily attached
+ */
+ mutex_lock(&configfs_symlink_mutex);
spin_lock(&configfs_dirent_lock);
do {
struct mutex *wait_mutex;
@@ -1215,6 +1320,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
if (ret) {
configfs_detach_rollback(dentry);
spin_unlock(&configfs_dirent_lock);
+ mutex_unlock(&configfs_symlink_mutex);
if (ret != -EAGAIN) {
config_item_put(parent_item);
return ret;
@@ -1224,10 +1330,12 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
mutex_lock(wait_mutex);
mutex_unlock(wait_mutex);
+ mutex_lock(&configfs_symlink_mutex);
spin_lock(&configfs_dirent_lock);
}
} while (ret == -EAGAIN);
spin_unlock(&configfs_dirent_lock);
+ mutex_unlock(&configfs_symlink_mutex);
/* Get a working ref for the duration of this function */
item = configfs_get_config_item(dentry);
@@ -1236,7 +1344,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
config_item_put(parent_item);
if (item->ci_type)
- owner = item->ci_type->ct_owner;
+ dead_item_owner = item->ci_type->ct_owner;
if (sd->s_type & CONFIGFS_USET_DIR) {
configfs_detach_group(item);
@@ -1258,7 +1366,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
/* Drop our reference from above */
config_item_put(item);
- module_put(owner);
+ module_put(dead_item_owner);
+ module_put(subsys_owner);
return 0;
}
@@ -1314,13 +1423,24 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
{
struct dentry * dentry = file->f_path.dentry;
struct configfs_dirent * parent_sd = dentry->d_fsdata;
+ int err;
mutex_lock(&dentry->d_inode->i_mutex);
- file->private_data = configfs_new_dirent(parent_sd, NULL);
+ /*
+ * Fake invisibility if dir belongs to a group/default groups hierarchy
+ * being attached
+ */
+ err = -ENOENT;
+ if (configfs_dirent_is_ready(parent_sd)) {
+ file->private_data = configfs_new_dirent(parent_sd, NULL);
+ if (IS_ERR(file->private_data))
+ err = PTR_ERR(file->private_data);
+ else
+ err = 0;
+ }
mutex_unlock(&dentry->d_inode->i_mutex);
- return IS_ERR(file->private_data) ? PTR_ERR(file->private_data) : 0;
-
+ return err;
}
static int configfs_dir_close(struct inode *inode, struct file *file)
@@ -1491,6 +1611,10 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
if (err) {
d_delete(dentry);
dput(dentry);
+ } else {
+ spin_lock(&configfs_dirent_lock);
+ configfs_dir_set_ready(dentry->d_fsdata);
+ spin_unlock(&configfs_dirent_lock);
}
}
@@ -1517,11 +1641,13 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex,
I_MUTEX_PARENT);
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+ mutex_lock(&configfs_symlink_mutex);
spin_lock(&configfs_dirent_lock);
if (configfs_detach_prep(dentry, NULL)) {
printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
}
spin_unlock(&configfs_dirent_lock);
+ mutex_unlock(&configfs_symlink_mutex);
configfs_detach_group(&group->cg_item);
dentry->d_inode->i_flags |= S_DEAD;
mutex_unlock(&dentry->d_inode->i_mutex);
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 0004d18c40a..bf74973b049 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -31,6 +31,9 @@
#include <linux/configfs.h>
#include "configfs_internal.h"
+/* Protects attachments of new symlinks */
+DEFINE_MUTEX(configfs_symlink_mutex);
+
static int item_depth(struct config_item * item)
{
struct config_item * p = item;
@@ -73,11 +76,20 @@ static int create_link(struct config_item *parent_item,
struct configfs_symlink *sl;
int ret;
+ ret = -ENOENT;
+ if (!configfs_dirent_is_ready(target_sd))
+ goto out;
ret = -ENOMEM;
sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
if (sl) {
sl->sl_target = config_item_get(item);
spin_lock(&configfs_dirent_lock);
+ if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
+ spin_unlock(&configfs_dirent_lock);
+ config_item_put(item);
+ kfree(sl);
+ return -ENOENT;
+ }
list_add(&sl->sl_list, &target_sd->s_links);
spin_unlock(&configfs_dirent_lock);
ret = configfs_create_link(sl, parent_item->ci_dentry,
@@ -91,6 +103,7 @@ static int create_link(struct config_item *parent_item,
}
}
+out:
return ret;
}
@@ -120,6 +133,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
{
int ret;
struct nameidata nd;
+ struct configfs_dirent *sd;
struct config_item *parent_item;
struct config_item *target_item;
struct config_item_type *type;
@@ -128,9 +142,19 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
if (dentry->d_parent == configfs_sb->s_root)
goto out;
+ sd = dentry->d_parent->d_fsdata;
+ /*
+ * Fake invisibility if dir belongs to a group/default groups hierarchy
+ * being attached
+ */
+ ret = -ENOENT;
+ if (!configfs_dirent_is_ready(sd))
+ goto out;
+
parent_item = configfs_get_config_item(dentry->d_parent);
type = parent_item->ci_type;
+ ret = -EPERM;
if (!type || !type->ct_item_ops ||
!type->ct_item_ops->allow_link)
goto out_put;
@@ -141,7 +165,9 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
ret = type->ct_item_ops->allow_link(parent_item, target_item);
if (!ret) {
+ mutex_lock(&configfs_symlink_mutex);
ret = create_link(parent_item, target_item, dentry);
+ mutex_unlock(&configfs_symlink_mutex);
if (ret && type->ct_item_ops->drop_link)
type->ct_item_ops->drop_link(parent_item,
target_item);
diff --git a/fs/dcache.c b/fs/dcache.c
index 6068c25b393..101663d15e9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -61,7 +61,6 @@ static struct kmem_cache *dentry_cache __read_mostly;
static unsigned int d_hash_mask __read_mostly;
static unsigned int d_hash_shift __read_mostly;
static struct hlist_head *dentry_hashtable __read_mostly;
-static LIST_HEAD(dentry_unused);
/* Statistics gathering. */
struct dentry_stat_t dentry_stat = {
@@ -96,14 +95,6 @@ static void d_free(struct dentry *dentry)
call_rcu(&dentry->d_u.d_rcu, d_callback);
}
-static void dentry_lru_remove(struct dentry *dentry)
-{
- if (!list_empty(&dentry->d_lru)) {
- list_del_init(&dentry->d_lru);
- dentry_stat.nr_unused--;
- }
-}
-
/*
* Release the dentry's inode, using the filesystem
* d_iput() operation if defined.
@@ -130,6 +121,41 @@ static void dentry_iput(struct dentry * dentry)
}
}
+/*
+ * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held.
+ */
+static void dentry_lru_add(struct dentry *dentry)
+{
+ list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+ dentry->d_sb->s_nr_dentry_unused++;
+ dentry_stat.nr_unused++;
+}
+
+static void dentry_lru_add_tail(struct dentry *dentry)
+{
+ list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+ dentry->d_sb->s_nr_dentry_unused++;
+ dentry_stat.nr_unused++;
+}
+
+static void dentry_lru_del(struct dentry *dentry)
+{
+ if (!list_empty(&dentry->d_lru)) {
+ list_del(&dentry->d_lru);
+ dentry->d_sb->s_nr_dentry_unused--;
+ dentry_stat.nr_unused--;
+ }
+}
+
+static void dentry_lru_del_init(struct dentry *dentry)
+{
+ if (likely(!list_empty(&dentry->d_lru))) {
+ list_del_init(&dentry->d_lru);
+ dentry->d_sb->s_nr_dentry_unused--;
+ dentry_stat.nr_unused--;
+ }
+}
+
/**
* d_kill - kill dentry and return parent
* @dentry: dentry to kill
@@ -212,8 +238,7 @@ repeat:
goto kill_it;
if (list_empty(&dentry->d_lru)) {
dentry->d_flags |= DCACHE_REFERENCED;
- list_add(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
+ dentry_lru_add(dentry);
}
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
@@ -222,7 +247,8 @@ repeat:
unhash_it:
__d_drop(dentry);
kill_it:
- dentry_lru_remove(dentry);
+ /* if dentry was on the d_lru list delete it from there */
+ dentry_lru_del(dentry);
dentry = d_kill(dentry);
if (dentry)
goto repeat;
@@ -290,7 +316,7 @@ int d_invalidate(struct dentry * dentry)
static inline struct dentry * __dget_locked(struct dentry *dentry)
{
atomic_inc(&dentry->d_count);
- dentry_lru_remove(dentry);
+ dentry_lru_del_init(dentry);
return dentry;
}
@@ -406,133 +432,168 @@ static void prune_one_dentry(struct dentry * dentry)
if (dentry->d_op && dentry->d_op->d_delete)
dentry->d_op->d_delete(dentry);
- dentry_lru_remove(dentry);
+ dentry_lru_del_init(dentry);
__d_drop(dentry);
dentry = d_kill(dentry);
spin_lock(&dcache_lock);
}
}
-/**
- * prune_dcache - shrink the dcache
- * @count: number of entries to try and free
- * @sb: if given, ignore dentries for other superblocks
- * which are being unmounted.
- *
- * Shrink the dcache. This is done when we need
- * more memory, or simply when we need to unmount
- * something (at which point we need to unuse
- * all dentries).
- *
- * This function may fail to free any resources if
- * all the dentries are in use.
+/*
+ * Shrink the dentry LRU on a given superblock.
+ * @sb : superblock to shrink dentry LRU.
+ * @count: If count is NULL, we prune all dentries on superblock.
+ * @flags: If flags is non-zero, we need to do special processing based on
+ * which flags are set. This means we don't need to maintain multiple
+ * similar copies of this loop.
*/
-
-static void prune_dcache(int count, struct super_block *sb)
+static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
{
- spin_lock(&dcache_lock);
- for (; count ; count--) {
- struct dentry *dentry;
- struct list_head *tmp;
- struct rw_semaphore *s_umount;
-
- cond_resched_lock(&dcache_lock);
+ LIST_HEAD(referenced);
+ LIST_HEAD(tmp);
+ struct dentry *dentry;
+ int cnt = 0;
- tmp = dentry_unused.prev;
- if (sb) {
- /* Try to find a dentry for this sb, but don't try
- * too hard, if they aren't near the tail they will
- * be moved down again soon
+ BUG_ON(!sb);
+ BUG_ON((flags & DCACHE_REFERENCED) && count == NULL);
+ spin_lock(&dcache_lock);
+ if (count != NULL)
+ /* called from prune_dcache() and shrink_dcache_parent() */
+ cnt = *count;
+restart:
+ if (count == NULL)
+ list_splice_init(&sb->s_dentry_lru, &tmp);
+ else {
+ while (!list_empty(&sb->s_dentry_lru)) {
+ dentry = list_entry(sb->s_dentry_lru.prev,
+ struct dentry, d_lru);
+ BUG_ON(dentry->d_sb != sb);
+
+ spin_lock(&dentry->d_lock);
+ /*
+ * If we are honouring the DCACHE_REFERENCED flag and
+ * the dentry has this flag set, don't free it. Clear
+ * the flag and put it back on the LRU.
*/
- int skip = count;
- while (skip && tmp != &dentry_unused &&
- list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
- skip--;
- tmp = tmp->prev;
+ if ((flags & DCACHE_REFERENCED)
+ && (dentry->d_flags & DCACHE_REFERENCED)) {
+ dentry->d_flags &= ~DCACHE_REFERENCED;
+ list_move_tail(&dentry->d_lru, &referenced);
+ spin_unlock(&dentry->d_lock);
+ } else {
+ list_move_tail(&dentry->d_lru, &tmp);
+ spin_unlock(&dentry->d_lock);
+ cnt--;
+ if (!cnt)
+ break;
}
+ cond_resched_lock(&dcache_lock);
}
- if (tmp == &dentry_unused)
- break;
- list_del_init(tmp);
- prefetch(dentry_unused.prev);
- dentry_stat.nr_unused--;
- dentry = list_entry(tmp, struct dentry, d_lru);
-
- spin_lock(&dentry->d_lock);
+ }
+ while (!list_empty(&tmp)) {
+ dentry = list_entry(tmp.prev, struct dentry, d_lru);
+ dentry_lru_del_init(dentry);
+ spin_lock(&dentry->d_lock);
/*
* We found an inuse dentry which was not removed from
- * dentry_unused because of laziness during lookup. Do not free
- * it - just keep it off the dentry_unused list.
+ * the LRU because of laziness during lookup. Do not free
+ * it - just keep it off the LRU list.
*/
- if (atomic_read(&dentry->d_count)) {
- spin_unlock(&dentry->d_lock);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
continue;
}
- /* If the dentry was recently referenced, don't free it. */
- if (dentry->d_flags & DCACHE_REFERENCED) {
- dentry->d_flags &= ~DCACHE_REFERENCED;
- list_add(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
- spin_unlock(&dentry->d_lock);
+ prune_one_dentry(dentry);
+ /* dentry->d_lock was dropped in prune_one_dentry() */
+ cond_resched_lock(&dcache_lock);
+ }
+ if (count == NULL && !list_empty(&sb->s_dentry_lru))
+ goto restart;
+ if (count != NULL)
+ *count = cnt;
+ if (!list_empty(&referenced))
+ list_splice(&referenced, &sb->s_dentry_lru);
+ spin_unlock(&dcache_lock);
+}
+
+/**
+ * prune_dcache - shrink the dcache
+ * @count: number of entries to try to free
+ *
+ * Shrink the dcache. This is done when we need more memory, or simply when we
+ * need to unmount something (at which point we need to unuse all dentries).
+ *
+ * This function may fail to free any resources if all the dentries are in use.
+ */
+static void prune_dcache(int count)
+{
+ struct super_block *sb;
+ int w_count;
+ int unused = dentry_stat.nr_unused;
+ int prune_ratio;
+ int pruned;
+
+ if (unused == 0 || count == 0)
+ return;
+ spin_lock(&dcache_lock);
+restart:
+ if (count >= unused)
+ prune_ratio = 1;
+ else
+ prune_ratio = unused / count;
+ spin_lock(&sb_lock);
+ list_for_each_entry(sb, &super_blocks, s_list) {
+ if (sb->s_nr_dentry_unused == 0)
continue;
- }
- /*
- * If the dentry is not DCACHED_REFERENCED, it is time
- * to remove it from the dcache, provided the super block is
- * NULL (which means we are trying to reclaim memory)
- * or this dentry belongs to the same super block that
- * we want to shrink.
+ sb->s_count++;
+ /* Now, we reclaim unused dentrins with fairness.
+ * We reclaim them same percentage from each superblock.
+ * We calculate number of dentries to scan on this sb
+ * as follows, but the implementation is arranged to avoid
+ * overflows:
+ * number of dentries to scan on this sb =
+ * count * (number of dentries on this sb /
+ * number of dentries in the machine)
*/
+ spin_unlock(&sb_lock);
+ if (prune_ratio != 1)
+ w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
+ else
+ w_count = sb->s_nr_dentry_unused;
+ pruned = w_count;
/*
- * If this dentry is for "my" filesystem, then I can prune it
- * without taking the s_umount lock (I already hold it).
- */
- if (sb && dentry->d_sb == sb) {
- prune_one_dentry(dentry);
- continue;
- }
- /*
- * ...otherwise we need to be sure this filesystem isn't being
- * unmounted, otherwise we could race with
- * generic_shutdown_super(), and end up holding a reference to
- * an inode while the filesystem is unmounted.
- * So we try to get s_umount, and make sure s_root isn't NULL.
- * (Take a local copy of s_umount to avoid a use-after-free of
- * `dentry').
+ * We need to be sure this filesystem isn't being unmounted,
+ * otherwise we could race with generic_shutdown_super(), and
+ * end up holding a reference to an inode while the filesystem
+ * is unmounted. So we try to get s_umount, and make sure
+ * s_root isn't NULL.
*/
- s_umount = &dentry->d_sb->s_umount;
- if (down_read_trylock(s_umount)) {
- if (dentry->d_sb->s_root != NULL) {
- prune_one_dentry(dentry);
- up_read(s_umount);
- continue;
+ if (down_read_trylock(&sb->s_umount)) {
+ if ((sb->s_root != NULL) &&
+ (!list_empty(&sb->s_dentry_lru))) {
+ spin_unlock(&dcache_lock);
+ __shrink_dcache_sb(sb, &w_count,
+ DCACHE_REFERENCED);
+ pruned -= w_count;
+ spin_lock(&dcache_lock);
}
- up_read(s_umount);
+ up_read(&sb->s_umount);
}
- spin_unlock(&dentry->d_lock);
+ spin_lock(&sb_lock);
+ count -= pruned;
/*
- * Insert dentry at the head of the list as inserting at the
- * tail leads to a cycle.
+ * restart only when sb is no longer on the list and
+ * we have more work to do.
*/
- list_add(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
+ if (__put_super_and_need_restart(sb) && count > 0) {
+ spin_unlock(&sb_lock);
+ goto restart;
+ }
}
+ spin_unlock(&sb_lock);
spin_unlock(&dcache_lock);
}
-/*
- * Shrink the dcache for the specified super block.
- * This allows us to unmount a device without disturbing
- * the dcache for the other devices.
- *
- * This implementation makes just two traversals of the
- * unused list. On the first pass we move the selected
- * dentries to the most recent end, and on the second
- * pass we free them. The second pass must restart after
- * each dput(), but since the target dentries are all at
- * the end, it's really just a single traversal.
- */
-
/**
* shrink_dcache_sb - shrink dcache for a superblock
* @sb: superblock
@@ -541,44 +602,9 @@ static void prune_dcache(int count, struct super_block *sb)
* is used to free the dcache before unmounting a file
* system
*/
-
void shrink_dcache_sb(struct super_block * sb)
{
- struct list_head *tmp, *next;
- struct dentry *dentry;
-
- /*
- * Pass one ... move the dentries for the specified
- * superblock to the most recent end of the unused list.
- */
- spin_lock(&dcache_lock);
- list_for_each_prev_safe(tmp, next, &dentry_unused) {
- dentry = list_entry(tmp, struct dentry, d_lru);
- if (dentry->d_sb != sb)
- continue;
- list_move_tail(tmp, &dentry_unused);
- }
-
- /*
- * Pass two ... free the dentries for this superblock.
- */
-repeat:
- list_for_each_prev_safe(tmp, next, &dentry_unused) {
- dentry = list_entry(tmp, struct dentry, d_lru);
- if (dentry->d_sb != sb)
- continue;
- dentry_stat.nr_unused--;
- list_del_init(tmp);
- spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count)) {
- spin_unlock(&dentry->d_lock);
- continue;
- }
- prune_one_dentry(dentry);
- cond_resched_lock(&dcache_lock);
- goto repeat;
- }
- spin_unlock(&dcache_lock);
+ __shrink_dcache_sb(sb, NULL, 0);
}
/*
@@ -595,7 +621,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
/* detach this root from the system */
spin_lock(&dcache_lock);
- dentry_lru_remove(dentry);
+ dentry_lru_del_init(dentry);
__d_drop(dentry);
spin_unlock(&dcache_lock);
@@ -609,7 +635,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
spin_lock(&dcache_lock);
list_for_each_entry(loop, &dentry->d_subdirs,
d_u.d_child) {
- dentry_lru_remove(loop);
+ dentry_lru_del_init(loop);
__d_drop(loop);
cond_resched_lock(&dcache_lock);
}
@@ -791,14 +817,13 @@ resume:
struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
- dentry_lru_remove(dentry);
+ dentry_lru_del_init(dentry);
/*
* move only zero ref count dentries to the end
* of the unused list for prune_dcache
*/
if (!atomic_read(&dentry->d_count)) {
- list_add_tail(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
+ dentry_lru_add_tail(dentry);
found++;
}
@@ -840,10 +865,11 @@ out:
void shrink_dcache_parent(struct dentry * parent)
{
+ struct super_block *sb = parent->d_sb;
int found;
while ((found = select_parent(parent)) != 0)
- prune_dcache(found, parent->d_sb);
+ __shrink_dcache_sb(sb, &found, 0);
}
/*
@@ -863,7 +889,7 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
if (nr) {
if (!(gfp_mask & __GFP_FS))
return -1;
- prune_dcache(nr, NULL);
+ prune_dcache(nr);
}
return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
}
@@ -1194,6 +1220,107 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
return new;
}
+/**
+ * d_add_ci - lookup or allocate new dentry with case-exact name
+ * @inode: the inode case-insensitive lookup has found
+ * @dentry: the negative dentry that was passed to the parent's lookup func
+ * @name: the case-exact name to be associated with the returned dentry
+ *
+ * This is to avoid filling the dcache with case-insensitive names to the
+ * same inode, only the actual correct case is stored in the dcache for
+ * case-insensitive filesystems.
+ *
+ * For a case-insensitive lookup match and if the the case-exact dentry
+ * already exists in in the dcache, use it and return it.
+ *
+ * If no entry exists with the exact case name, allocate new dentry with
+ * the exact case, and return the spliced entry.
+ */
+struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry,
+ struct qstr *name)
+{
+ int error;
+ struct dentry *found;
+ struct dentry *new;
+
+ /* Does a dentry matching the name exist already? */
+ found = d_hash_and_lookup(dentry->d_parent, name);
+ /* If not, create it now and return */
+ if (!found) {
+ new = d_alloc(dentry->d_parent, name);
+ if (!new) {
+ error = -ENOMEM;
+ goto err_out;
+ }
+ found = d_splice_alias(inode, new);
+ if (found) {
+ dput(new);
+ return found;
+ }
+ return new;
+ }
+ /* Matching dentry exists, check if it is negative. */
+ if (found->d_inode) {
+ if (unlikely(found->d_inode != inode)) {
+ /* This can't happen because bad inodes are unhashed. */
+ BUG_ON(!is_bad_inode(inode));
+ BUG_ON(!is_bad_inode(found->d_inode));
+ }
+ /*
+ * Already have the inode and the dentry attached, decrement
+ * the reference count to balance the iget() done
+ * earlier on. We found the dentry using d_lookup() so it
+ * cannot be disconnected and thus we do not need to worry
+ * about any NFS/disconnectedness issues here.
+ */
+ iput(inode);
+ return found;
+ }
+ /*
+ * Negative dentry: instantiate it unless the inode is a directory and
+ * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED),
+ * in which case d_move() that in place of the found dentry.
+ */
+ if (!S_ISDIR(inode->i_mode)) {
+ /* Not a directory; everything is easy. */
+ d_instantiate(found, inode);
+ return found;
+ }
+ spin_lock(&dcache_lock);
+ if (list_empty(&inode->i_dentry)) {
+ /*
+ * Directory without a 'disconnected' dentry; we need to do
+ * d_instantiate() by hand because it takes dcache_lock which
+ * we already hold.
+ */
+ list_add(&found->d_alias, &inode->i_dentry);
+ found->d_inode = inode;
+ spin_unlock(&dcache_lock);
+ security_d_instantiate(found, inode);
+ return found;
+ }
+ /*
+ * Directory with a 'disconnected' dentry; get a reference to the
+ * 'disconnected' dentry.
+ */
+ new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
+ dget_locked(new);
+ spin_unlock(&dcache_lock);
+ /* Do security vodoo. */
+ security_d_instantiate(found, inode);
+ /* Move new in place of found. */
+ d_move(new, found);
+ /* Balance the iget() we did above. */
+ iput(inode);
+ /* Throw away found. */
+ dput(found);
+ /* Use new as the actual dentry. */
+ return new;
+
+err_out:
+ iput(inode);
+ return ERR_PTR(error);
+}
/**
* d_lookup - search for a dentry
@@ -1215,7 +1342,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
* rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
* lookup is going on.
*
- * dentry_unused list is not updated even if lookup finds the required dentry
+ * The dentry unused LRU is not updated even if lookup finds the required dentry
* in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
* select_parent and __dget_locked. This laziness saves lookup from dcache_lock
* acquisition.
@@ -2228,6 +2355,7 @@ EXPORT_SYMBOL(d_path);
EXPORT_SYMBOL(d_prune_aliases);
EXPORT_SYMBOL(d_rehash);
EXPORT_SYMBOL(d_splice_alias);
+EXPORT_SYMBOL(d_add_ci);
EXPORT_SYMBOL(d_validate);
EXPORT_SYMBOL(dget_locked);
EXPORT_SYMBOL(dput);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index e9602d85c11..08e28c9bb41 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -309,6 +309,31 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
}
EXPORT_SYMBOL_GPL(debugfs_create_symlink);
+static void __debugfs_remove(struct dentry *dentry, struct dentry *parent)
+{
+ int ret = 0;
+
+ if (debugfs_positive(dentry)) {
+ if (dentry->d_inode) {
+ dget(dentry);
+ switch (dentry->d_inode->i_mode & S_IFMT) {
+ case S_IFDIR:
+ ret = simple_rmdir(parent->d_inode, dentry);
+ break;
+ case S_IFLNK:
+ kfree(dentry->d_inode->i_private);
+ /* fall through */
+ default:
+ simple_unlink(parent->d_inode, dentry);
+ break;
+ }
+ if (!ret)
+ d_delete(dentry);
+ dput(dentry);
+ }
+ }
+}
+
/**
* debugfs_remove - removes a file or directory from the debugfs filesystem
* @dentry: a pointer to a the dentry of the file or directory to be
@@ -325,7 +350,6 @@ EXPORT_SYMBOL_GPL(debugfs_create_symlink);
void debugfs_remove(struct dentry *dentry)
{
struct dentry *parent;
- int ret = 0;
if (!dentry)
return;
@@ -335,29 +359,83 @@ void debugfs_remove(struct dentry *dentry)
return;
mutex_lock(&parent->d_inode->i_mutex);
- if (debugfs_positive(dentry)) {
- if (dentry->d_inode) {
- dget(dentry);
- switch (dentry->d_inode->i_mode & S_IFMT) {
- case S_IFDIR:
- ret = simple_rmdir(parent->d_inode, dentry);
- break;
- case S_IFLNK:
- kfree(dentry->d_inode->i_private);
- /* fall through */
- default:
- simple_unlink(parent->d_inode, dentry);
+ __debugfs_remove(dentry, parent);
+ mutex_unlock(&parent->d_inode->i_mutex);
+ simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+}
+EXPORT_SYMBOL_GPL(debugfs_remove);
+
+/**
+ * debugfs_remove_recursive - recursively removes a directory
+ * @dentry: a pointer to a the dentry of the directory to be removed.
+ *
+ * This function recursively removes a directory tree in debugfs that
+ * was previously created with a call to another debugfs function
+ * (like debugfs_create_file() or variants thereof.)
+ *
+ * This function is required to be called in order for the file to be
+ * removed, no automatic cleanup of files will happen when a module is
+ * removed, you are responsible here.
+ */
+void debugfs_remove_recursive(struct dentry *dentry)
+{
+ struct dentry *child;
+ struct dentry *parent;
+
+ if (!dentry)
+ return;
+
+ parent = dentry->d_parent;
+ if (!parent || !parent->d_inode)
+ return;
+
+ parent = dentry;
+ mutex_lock(&parent->d_inode->i_mutex);
+
+ while (1) {
+ /*
+ * When all dentries under "parent" has been removed,
+ * walk up the tree until we reach our starting point.
+ */
+ if (list_empty(&parent->d_subdirs)) {
+ mutex_unlock(&parent->d_inode->i_mutex);
+ if (parent == dentry)
break;
- }
- if (!ret)
- d_delete(dentry);
- dput(dentry);
+ parent = parent->d_parent;
+ mutex_lock(&parent->d_inode->i_mutex);
+ }
+ child = list_entry(parent->d_subdirs.next, struct dentry,
+ d_u.d_child);
+
+ /*
+ * If "child" isn't empty, walk down the tree and
+ * remove all its descendants first.
+ */
+ if (!list_empty(&child->d_subdirs)) {
+ mutex_unlock(&parent->d_inode->i_mutex);
+ parent = child;
+ mutex_lock(&parent->d_inode->i_mutex);
+ continue;
}
+ __debugfs_remove(child, parent);
+ if (parent->d_subdirs.next == &child->d_u.d_child) {
+ /*
+ * Avoid infinite loop if we fail to remove
+ * one dentry.
+ */
+ mutex_unlock(&parent->d_inode->i_mutex);
+ break;
+ }
+ simple_release_fs(&debugfs_mount, &debugfs_mount_count);
}
+
+ parent = dentry->d_parent;
+ mutex_lock(&parent->d_inode->i_mutex);
+ __debugfs_remove(dentry, parent);
mutex_unlock(&parent->d_inode->i_mutex);
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
}
-EXPORT_SYMBOL_GPL(debugfs_remove);
+EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
/**
* debugfs_rename - rename a file/directory in the debugfs filesystem
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 285b64a8b06..488eb424f66 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -29,7 +29,7 @@
#define DEVPTS_DEFAULT_MODE 0600
extern int pty_limit; /* Config limit on Unix98 ptys */
-static DEFINE_IDR(allocated_ptys);
+static DEFINE_IDA(allocated_ptys);
static DEFINE_MUTEX(allocated_ptys_lock);
static struct vfsmount *devpts_mnt;
@@ -180,24 +180,24 @@ static struct dentry *get_node(int num)
int devpts_new_index(void)
{
int index;
- int idr_ret;
+ int ida_ret;
retry:
- if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
+ if (!ida_pre_get(&allocated_ptys, GFP_KERNEL)) {
return -ENOMEM;
}
mutex_lock(&allocated_ptys_lock);
- idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
- if (idr_ret < 0) {
+ ida_ret = ida_get_new(&allocated_ptys, &index);
+ if (ida_ret < 0) {
mutex_unlock(&allocated_ptys_lock);
- if (idr_ret == -EAGAIN)
+ if (ida_ret == -EAGAIN)
goto retry;
return -EIO;
}
if (index >= pty_limit) {
- idr_remove(&allocated_ptys, index);
+ ida_remove(&allocated_ptys, index);
mutex_unlock(&allocated_ptys_lock);
return -EIO;
}
@@ -208,7 +208,7 @@ retry:
void devpts_kill_index(int idx)
{
mutex_lock(&allocated_ptys_lock);
- idr_remove(&allocated_ptys, idx);
+ ida_remove(&allocated_ptys, idx);
mutex_unlock(&allocated_ptys_lock);
}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 9e81addbd6e..9606ee848fd 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -150,17 +150,11 @@ static int dio_refill_pages(struct dio *dio)
int nr_pages;
nr_pages = min(dio->total_pages - dio->curr_page, DIO_PAGES);
- down_read(&current->mm->mmap_sem);
- ret = get_user_pages(
- current, /* Task for fault acounting */
- current->mm, /* whose pages? */
+ ret = get_user_pages_fast(
dio->curr_user_address, /* Where from? */
nr_pages, /* How many pages? */
dio->rw == READ, /* Write to memory? */
- 0, /* force (?) */
- &dio->pages[0],
- NULL); /* vmas */
- up_read(&current->mm->mmap_sem);
+ &dio->pages[0]); /* Put results here */
if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) {
struct page *page = ZERO_PAGE(0);
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 2d3d1027ce2..724ddac9153 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -363,6 +363,7 @@ static int search_rsb_list(struct list_head *head, char *name, int len,
if (len == r->res_length && !memcmp(name, r->res_name, len))
goto found;
}
+ *r_ret = NULL;
return -EBADR;
found:
@@ -1782,7 +1783,8 @@ static void grant_pending_locks(struct dlm_rsb *r)
list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) {
if (lkb->lkb_bastfn && lock_requires_bast(lkb, high, cw)) {
- if (cw && high == DLM_LOCK_PR)
+ if (cw && high == DLM_LOCK_PR &&
+ lkb->lkb_grmode == DLM_LOCK_PR)
queue_bast(r, lkb, DLM_LOCK_CW);
else
queue_bast(r, lkb, high);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 637018c891e..3962262f991 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -891,8 +891,10 @@ static void tcp_connect_to_sock(struct connection *con)
goto out_err;
memset(&saddr, 0, sizeof(saddr));
- if (dlm_nodeid_to_addr(con->nodeid, &saddr))
+ if (dlm_nodeid_to_addr(con->nodeid, &saddr)) {
+ sock_release(sock);
goto out_err;
+ }
sock->sk->sk_user_data = con;
con->rx_action = receive_from_sock;
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 78878c5781c..eba87ff3177 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -116,7 +116,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
if (xop->callback == NULL)
wait_event(recv_wq, (op->done != 0));
else {
- rv = -EINPROGRESS;
+ rv = FILE_LOCK_DEFERRED;
goto out;
}
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index f976f303c19..929e48ae759 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -539,7 +539,7 @@ static ssize_t device_write(struct file *file, const char __user *buf,
/* do we really need this? can a write happen after a close? */
if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) &&
- test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))
+ (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags)))
return -EINVAL;
sigfillset(&allsigs);
diff --git a/fs/dquot.c b/fs/dquot.c
index 5ac77da1995..8ec4d6cc763 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -562,6 +562,8 @@ static struct shrinker dqcache_shrinker = {
*/
static void dqput(struct dquot *dquot)
{
+ int ret;
+
if (!dquot)
return;
#ifdef __DQUOT_PARANOIA
@@ -594,7 +596,19 @@ we_slept:
if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) {
spin_unlock(&dq_list_lock);
/* Commit dquot before releasing */
- dquot->dq_sb->dq_op->write_dquot(dquot);
+ ret = dquot->dq_sb->dq_op->write_dquot(dquot);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: cannot write quota structure on "
+ "device %s (error %d). Quota may get out of "
+ "sync!\n", dquot->dq_sb->s_id, ret);
+ /*
+ * We clear dirty bit anyway, so that we avoid
+ * infinite loop here
+ */
+ spin_lock(&dq_list_lock);
+ clear_dquot_dirty(dquot);
+ spin_unlock(&dq_list_lock);
+ }
goto we_slept;
}
/* Clear flag in case dquot was inactive (something bad happened) */
@@ -875,7 +889,10 @@ static void print_warning(struct dquot *dquot, const int warntype)
char *msg = NULL;
struct tty_struct *tty;
- if (!need_print_warning(dquot))
+ if (warntype == QUOTA_NL_IHARDBELOW ||
+ warntype == QUOTA_NL_ISOFTBELOW ||
+ warntype == QUOTA_NL_BHARDBELOW ||
+ warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot))
return;
mutex_lock(&tty_mutex);
@@ -1083,6 +1100,35 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
return QUOTA_OK;
}
+static int info_idq_free(struct dquot *dquot, ulong inodes)
+{
+ if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+ dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
+ return QUOTA_NL_NOWARN;
+
+ if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit)
+ return QUOTA_NL_ISOFTBELOW;
+ if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit &&
+ dquot->dq_dqb.dqb_curinodes - inodes < dquot->dq_dqb.dqb_ihardlimit)
+ return QUOTA_NL_IHARDBELOW;
+ return QUOTA_NL_NOWARN;
+}
+
+static int info_bdq_free(struct dquot *dquot, qsize_t space)
+{
+ if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+ toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
+ return QUOTA_NL_NOWARN;
+
+ if (toqb(dquot->dq_dqb.dqb_curspace - space) <=
+ dquot->dq_dqb.dqb_bsoftlimit)
+ return QUOTA_NL_BSOFTBELOW;
+ if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit &&
+ toqb(dquot->dq_dqb.dqb_curspace - space) <
+ dquot->dq_dqb.dqb_bhardlimit)
+ return QUOTA_NL_BHARDBELOW;
+ return QUOTA_NL_NOWARN;
+}
/*
* Initialize quota pointers in inode
* Transaction must be started at entry
@@ -1139,6 +1185,28 @@ int dquot_drop(struct inode *inode)
return 0;
}
+/* Wrapper to remove references to quota structures from inode */
+void vfs_dq_drop(struct inode *inode)
+{
+ /* Here we can get arbitrary inode from clear_inode() so we have
+ * to be careful. OTOH we don't need locking as quota operations
+ * are allowed to change only at mount time */
+ if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
+ && inode->i_sb->dq_op->drop) {
+ int cnt;
+ /* Test before calling to rule out calls from proc and such
+ * where we are not allowed to block. Note that this is
+ * actually reliable test even without the lock - the caller
+ * must assure that nobody can come after the DQUOT_DROP and
+ * add quota pointers back anyway */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (inode->i_dquot[cnt] != NODQUOT)
+ break;
+ if (cnt < MAXQUOTAS)
+ inode->i_sb->dq_op->drop(inode);
+ }
+}
+
/*
* Following four functions update i_blocks+i_bytes fields and
* quota information (together with appropriate checks)
@@ -1248,6 +1316,7 @@ warn_put_all:
int dquot_free_space(struct inode *inode, qsize_t number)
{
unsigned int cnt;
+ char warntype[MAXQUOTAS];
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
@@ -1256,6 +1325,7 @@ out_sub:
inode_sub_bytes(inode, number);
return QUOTA_OK;
}
+
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
/* Now recheck reliably when holding dqptr_sem */
if (IS_NOQUOTA(inode)) {
@@ -1266,6 +1336,7 @@ out_sub:
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (inode->i_dquot[cnt] == NODQUOT)
continue;
+ warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
dquot_decr_space(inode->i_dquot[cnt], number);
}
inode_sub_bytes(inode, number);
@@ -1274,6 +1345,7 @@ out_sub:
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
if (inode->i_dquot[cnt])
mark_dquot_dirty(inode->i_dquot[cnt]);
+ flush_warnings(inode->i_dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return QUOTA_OK;
}
@@ -1284,11 +1356,13 @@ out_sub:
int dquot_free_inode(const struct inode *inode, unsigned long number)
{
unsigned int cnt;
+ char warntype[MAXQUOTAS];
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
if (IS_NOQUOTA(inode))
return QUOTA_OK;
+
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
/* Now recheck reliably when holding dqptr_sem */
if (IS_NOQUOTA(inode)) {
@@ -1299,6 +1373,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (inode->i_dquot[cnt] == NODQUOT)
continue;
+ warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
dquot_decr_inodes(inode->i_dquot[cnt], number);
}
spin_unlock(&dq_data_lock);
@@ -1306,6 +1381,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
if (inode->i_dquot[cnt])
mark_dquot_dirty(inode->i_dquot[cnt]);
+ flush_warnings(inode->i_dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return QUOTA_OK;
}
@@ -1323,7 +1399,8 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
struct dquot *transfer_to[MAXQUOTAS];
int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid,
chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid;
- char warntype[MAXQUOTAS];
+ char warntype_to[MAXQUOTAS];
+ char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
@@ -1332,7 +1409,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
/* Clear the arrays */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
transfer_to[cnt] = transfer_from[cnt] = NODQUOT;
- warntype[cnt] = QUOTA_NL_NOWARN;
+ warntype_to[cnt] = QUOTA_NL_NOWARN;
}
down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
/* Now recheck reliably when holding dqptr_sem */
@@ -1364,8 +1441,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
if (transfer_to[cnt] == NODQUOT)
continue;
transfer_from[cnt] = inode->i_dquot[cnt];
- if (check_idq(transfer_to[cnt], 1, warntype+cnt) == NO_QUOTA ||
- check_bdq(transfer_to[cnt], space, 0, warntype+cnt) == NO_QUOTA)
+ if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) ==
+ NO_QUOTA || check_bdq(transfer_to[cnt], space, 0,
+ warntype_to + cnt) == NO_QUOTA)
goto warn_put_all;
}
@@ -1381,6 +1459,10 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
/* Due to IO error we might not have transfer_from[] structure */
if (transfer_from[cnt]) {
+ warntype_from_inodes[cnt] =
+ info_idq_free(transfer_from[cnt], 1);
+ warntype_from_space[cnt] =
+ info_bdq_free(transfer_from[cnt], space);
dquot_decr_inodes(transfer_from[cnt], 1);
dquot_decr_space(transfer_from[cnt], space);
}
@@ -1400,7 +1482,9 @@ warn_put_all:
if (transfer_to[cnt])
mark_dquot_dirty(transfer_to[cnt]);
}
- flush_warnings(transfer_to, warntype);
+ flush_warnings(transfer_to, warntype_to);
+ flush_warnings(transfer_from, warntype_from_inodes);
+ flush_warnings(transfer_from, warntype_from_space);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT)
@@ -1412,6 +1496,18 @@ warn_put_all:
return ret;
}
+/* Wrapper for transferring ownership of an inode */
+int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
+{
+ if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
+ vfs_dq_init(inode);
+ if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
+ return 1;
+ }
+ return 0;
+}
+
+
/*
* Write info of quota file to disk
*/
@@ -1697,6 +1793,21 @@ static int vfs_quota_on_remount(struct super_block *sb, int type)
return ret;
}
+int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
+ struct path *path)
+{
+ int error = security_quota_on(path->dentry);
+ if (error)
+ return error;
+ /* Quota file not on the same filesystem? */
+ if (path->mnt->mnt_sb != sb)
+ error = -EXDEV;
+ else
+ error = vfs_quota_on_inode(path->dentry->d_inode, type,
+ format_id);
+ return error;
+}
+
/* Actual function called from quotactl() */
int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
int remount)
@@ -1708,19 +1819,10 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
return vfs_quota_on_remount(sb, type);
error = path_lookup(path, LOOKUP_FOLLOW, &nd);
- if (error < 0)
- return error;
- error = security_quota_on(nd.path.dentry);
- if (error)
- goto out_path;
- /* Quota file not on the same filesystem? */
- if (nd.path.mnt->mnt_sb != sb)
- error = -EXDEV;
- else
- error = vfs_quota_on_inode(nd.path.dentry->d_inode, type,
- format_id);
-out_path:
- path_put(&nd.path);
+ if (!error) {
+ error = vfs_quota_on_path(sb, type, format_id, &nd.path);
+ path_put(&nd.path);
+ }
return error;
}
@@ -1752,6 +1854,22 @@ out:
return error;
}
+/* Wrapper to turn on quotas when remounting rw */
+int vfs_dq_quota_on_remount(struct super_block *sb)
+{
+ int cnt;
+ int ret = 0, err;
+
+ if (!sb->s_qcop || !sb->s_qcop->quota_on)
+ return -ENOSYS;
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
+ if (err < 0 && !ret)
+ ret = err;
+ }
+ return ret;
+}
+
/* Generic routine for getting common part of quota structure */
static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
{
@@ -2073,6 +2191,7 @@ EXPORT_SYMBOL(unregister_quota_format);
EXPORT_SYMBOL(dqstats);
EXPORT_SYMBOL(dq_data_lock);
EXPORT_SYMBOL(vfs_quota_on);
+EXPORT_SYMBOL(vfs_quota_on_path);
EXPORT_SYMBOL(vfs_quota_on_mount);
EXPORT_SYMBOL(vfs_quota_off);
EXPORT_SYMBOL(vfs_quota_sync);
@@ -2087,8 +2206,11 @@ EXPORT_SYMBOL(dquot_release);
EXPORT_SYMBOL(dquot_mark_dquot_dirty);
EXPORT_SYMBOL(dquot_initialize);
EXPORT_SYMBOL(dquot_drop);
+EXPORT_SYMBOL(vfs_dq_drop);
EXPORT_SYMBOL(dquot_alloc_space);
EXPORT_SYMBOL(dquot_alloc_inode);
EXPORT_SYMBOL(dquot_free_space);
EXPORT_SYMBOL(dquot_free_inode);
EXPORT_SYMBOL(dquot_transfer);
+EXPORT_SYMBOL(vfs_dq_transfer);
+EXPORT_SYMBOL(vfs_dq_quota_on_remount);
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 1e34a7fd488..b4755a85996 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,4 +4,4 @@
obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
-ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o debug.o
+ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o kthread.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index e2832bc7869..06db79d05c1 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -33,6 +33,7 @@
#include <linux/crypto.h>
#include <linux/file.h>
#include <linux/scatterlist.h>
+#include <asm/unaligned.h>
#include "ecryptfs_kernel.h"
static int
@@ -474,8 +475,8 @@ int ecryptfs_encrypt_page(struct page *page)
{
struct inode *ecryptfs_inode;
struct ecryptfs_crypt_stat *crypt_stat;
- char *enc_extent_virt = NULL;
- struct page *enc_extent_page;
+ char *enc_extent_virt;
+ struct page *enc_extent_page = NULL;
loff_t extent_offset;
int rc = 0;
@@ -491,14 +492,14 @@ int ecryptfs_encrypt_page(struct page *page)
page->index);
goto out;
}
- enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER);
- if (!enc_extent_virt) {
+ enc_extent_page = alloc_page(GFP_USER);
+ if (!enc_extent_page) {
rc = -ENOMEM;
ecryptfs_printk(KERN_ERR, "Error allocating memory for "
"encrypted extent\n");
goto out;
}
- enc_extent_page = virt_to_page(enc_extent_virt);
+ enc_extent_virt = kmap(enc_extent_page);
for (extent_offset = 0;
extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
extent_offset++) {
@@ -526,7 +527,10 @@ int ecryptfs_encrypt_page(struct page *page)
}
}
out:
- kfree(enc_extent_virt);
+ if (enc_extent_page) {
+ kunmap(enc_extent_page);
+ __free_page(enc_extent_page);
+ }
return rc;
}
@@ -608,8 +612,8 @@ int ecryptfs_decrypt_page(struct page *page)
{
struct inode *ecryptfs_inode;
struct ecryptfs_crypt_stat *crypt_stat;
- char *enc_extent_virt = NULL;
- struct page *enc_extent_page;
+ char *enc_extent_virt;
+ struct page *enc_extent_page = NULL;
unsigned long extent_offset;
int rc = 0;
@@ -626,14 +630,14 @@ int ecryptfs_decrypt_page(struct page *page)
page->index);
goto out;
}
- enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER);
- if (!enc_extent_virt) {
+ enc_extent_page = alloc_page(GFP_USER);
+ if (!enc_extent_page) {
rc = -ENOMEM;
ecryptfs_printk(KERN_ERR, "Error allocating memory for "
"encrypted extent\n");
goto out;
}
- enc_extent_page = virt_to_page(enc_extent_virt);
+ enc_extent_virt = kmap(enc_extent_page);
for (extent_offset = 0;
extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
extent_offset++) {
@@ -661,7 +665,10 @@ int ecryptfs_decrypt_page(struct page *page)
}
}
out:
- kfree(enc_extent_virt);
+ if (enc_extent_page) {
+ kunmap(enc_extent_page);
+ __free_page(enc_extent_page);
+ }
return rc;
}
@@ -1032,10 +1039,8 @@ static int contains_ecryptfs_marker(char *data)
{
u32 m_1, m_2;
- memcpy(&m_1, data, 4);
- m_1 = be32_to_cpu(m_1);
- memcpy(&m_2, (data + 4), 4);
- m_2 = be32_to_cpu(m_2);
+ m_1 = get_unaligned_be32(data);
+ m_2 = get_unaligned_be32(data + 4);
if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2)
return 1;
ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; "
@@ -1073,8 +1078,7 @@ static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat,
int i;
u32 flags;
- memcpy(&flags, page_virt, 4);
- flags = be32_to_cpu(flags);
+ flags = get_unaligned_be32(page_virt);
for (i = 0; i < ((sizeof(ecryptfs_flag_map)
/ sizeof(struct ecryptfs_flag_map_elem))); i++)
if (flags & ecryptfs_flag_map[i].file_flag) {
@@ -1100,11 +1104,9 @@ static void write_ecryptfs_marker(char *page_virt, size_t *written)
get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER);
- m_1 = cpu_to_be32(m_1);
- memcpy(page_virt, &m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
- m_2 = cpu_to_be32(m_2);
- memcpy(page_virt + (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2), &m_2,
- (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
+ put_unaligned_be32(m_1, page_virt);
+ page_virt += (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2);
+ put_unaligned_be32(m_2, page_virt);
(*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
}
@@ -1121,8 +1123,7 @@ write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat,
flags |= ecryptfs_flag_map[i].file_flag;
/* Version is in top 8 bits of the 32-bit flag vector */
flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000);
- flags = cpu_to_be32(flags);
- memcpy(page_virt, &flags, 4);
+ put_unaligned_be32(flags, page_virt);
(*written) = 4;
}
@@ -1238,11 +1239,9 @@ ecryptfs_write_header_metadata(char *virt,
num_header_extents_at_front =
(u16)(crypt_stat->num_header_bytes_at_front
/ crypt_stat->extent_size);
- header_extent_size = cpu_to_be32(header_extent_size);
- memcpy(virt, &header_extent_size, 4);
+ put_unaligned_be32(header_extent_size, virt);
virt += 4;
- num_header_extents_at_front = cpu_to_be16(num_header_extents_at_front);
- memcpy(virt, &num_header_extents_at_front, 2);
+ put_unaligned_be16(num_header_extents_at_front, virt);
(*written) = 6;
}
@@ -1410,15 +1409,13 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
u32 header_extent_size;
u16 num_header_extents_at_front;
- memcpy(&header_extent_size, virt, sizeof(u32));
- header_extent_size = be32_to_cpu(header_extent_size);
- virt += sizeof(u32);
- memcpy(&num_header_extents_at_front, virt, sizeof(u16));
- num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front);
+ header_extent_size = get_unaligned_be32(virt);
+ virt += sizeof(__be32);
+ num_header_extents_at_front = get_unaligned_be16(virt);
crypt_stat->num_header_bytes_at_front =
(((size_t)num_header_extents_at_front
* (size_t)header_extent_size));
- (*bytes_read) = (sizeof(u32) + sizeof(u16));
+ (*bytes_read) = (sizeof(__be32) + sizeof(__be16));
if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE)
&& (crypt_stat->num_header_bytes_at_front
< ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) {
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index c15c25745e0..b73fb752c5f 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -559,10 +559,25 @@ extern struct kmem_cache *ecryptfs_key_record_cache;
extern struct kmem_cache *ecryptfs_key_sig_cache;
extern struct kmem_cache *ecryptfs_global_auth_tok_cache;
extern struct kmem_cache *ecryptfs_key_tfm_cache;
+extern struct kmem_cache *ecryptfs_open_req_cache;
+struct ecryptfs_open_req {
+#define ECRYPTFS_REQ_PROCESSED 0x00000001
+#define ECRYPTFS_REQ_DROPPED 0x00000002
+#define ECRYPTFS_REQ_ZOMBIE 0x00000004
+ u32 flags;
+ struct file **lower_file;
+ struct dentry *lower_dentry;
+ struct vfsmount *lower_mnt;
+ wait_queue_head_t wait;
+ struct mutex mux;
+ struct list_head kthread_ctl_list;
+};
+
+#define ECRYPTFS_INTERPOSE_FLAG_D_ADD 0x00000001
int ecryptfs_interpose(struct dentry *hidden_dentry,
struct dentry *this_dentry, struct super_block *sb,
- int flag);
+ u32 flags);
int ecryptfs_fill_zeros(struct file *file, loff_t new_length);
int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat,
const char *name, int length,
@@ -690,5 +705,11 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx);
int
ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
struct user_namespace *user_ns, struct pid *pid);
+int ecryptfs_init_kthread(void);
+void ecryptfs_destroy_kthread(void);
+int ecryptfs_privileged_open(struct file **lower_file,
+ struct dentry *lower_dentry,
+ struct vfsmount *lower_mnt);
+int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry);
#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 24749bf0668..9244d653743 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -192,6 +192,23 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
| ECRYPTFS_ENCRYPTED);
}
mutex_unlock(&crypt_stat->cs_mutex);
+ if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
+ && !(file->f_flags & O_RDONLY)) {
+ rc = -EPERM;
+ printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
+ "file must hence be opened RO\n", __func__);
+ goto out;
+ }
+ if (!ecryptfs_inode_to_private(inode)->lower_file) {
+ rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+ if (rc) {
+ printk(KERN_ERR "%s: Error attempting to initialize "
+ "the persistent file for the dentry with name "
+ "[%s]; rc = [%d]\n", __func__,
+ ecryptfs_dentry->d_name.name, rc);
+ goto out;
+ }
+ }
ecryptfs_set_file_lower(
file, ecryptfs_inode_to_private(inode)->lower_file);
if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index c92cc1c00aa..89209f00f9c 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -31,6 +31,7 @@
#include <linux/mount.h>
#include <linux/crypto.h>
#include <linux/fs_stack.h>
+#include <asm/unaligned.h>
#include "ecryptfs_kernel.h"
static struct dentry *lock_parent(struct dentry *dentry)
@@ -188,6 +189,16 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
"context; rc = [%d]\n", rc);
goto out;
}
+ if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) {
+ rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+ if (rc) {
+ printk(KERN_ERR "%s: Error attempting to initialize "
+ "the persistent file for the dentry with name "
+ "[%s]; rc = [%d]\n", __func__,
+ ecryptfs_dentry->d_name.name, rc);
+ goto out;
+ }
+ }
rc = ecryptfs_write_metadata(ecryptfs_dentry);
if (rc) {
printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
@@ -307,10 +318,11 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
d_add(dentry, NULL);
goto out;
}
- rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 1);
+ rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb,
+ ECRYPTFS_INTERPOSE_FLAG_D_ADD);
if (rc) {
ecryptfs_printk(KERN_ERR, "Error interposing\n");
- goto out_dput;
+ goto out;
}
if (S_ISDIR(lower_inode->i_mode)) {
ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n");
@@ -336,11 +348,21 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
rc = -ENOMEM;
ecryptfs_printk(KERN_ERR,
"Cannot ecryptfs_kmalloc a page\n");
- goto out_dput;
+ goto out;
}
crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
ecryptfs_set_default_sizes(crypt_stat);
+ if (!ecryptfs_inode_to_private(dentry->d_inode)->lower_file) {
+ rc = ecryptfs_init_persistent_file(dentry);
+ if (rc) {
+ printk(KERN_ERR "%s: Error attempting to initialize "
+ "the persistent file for the dentry with name "
+ "[%s]; rc = [%d]\n", __func__,
+ dentry->d_name.name, rc);
+ goto out;
+ }
+ }
rc = ecryptfs_read_and_validate_header_region(page_virt,
dentry->d_inode);
if (rc) {
@@ -364,8 +386,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
else
file_size = i_size_read(lower_dentry->d_inode);
} else {
- memcpy(&file_size, page_virt, sizeof(file_size));
- file_size = be64_to_cpu(file_size);
+ file_size = get_unaligned_be64(page_virt);
}
i_size_write(dentry->d_inode, (loff_t)file_size);
kmem_cache_free(ecryptfs_header_cache_2, page_virt);
@@ -444,7 +465,6 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
int rc;
struct dentry *lower_dentry;
struct dentry *lower_dir_dentry;
- umode_t mode;
char *encoded_symname;
int encoded_symlen;
struct ecryptfs_crypt_stat *crypt_stat = NULL;
@@ -452,7 +472,6 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
lower_dentry = ecryptfs_dentry_to_lower(dentry);
dget(lower_dentry);
lower_dir_dentry = lock_parent(lower_dentry);
- mode = S_IALLUGO;
encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname,
strlen(symname),
&encoded_symname);
@@ -461,7 +480,7 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
goto out_lock;
}
rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry,
- encoded_symname, mode);
+ encoded_symname);
kfree(encoded_symname);
if (rc || !lower_dentry->d_inode)
goto out_lock;
@@ -809,22 +828,9 @@ out:
}
static int
-ecryptfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+ecryptfs_permission(struct inode *inode, int mask)
{
- int rc;
-
- if (nd) {
- struct vfsmount *vfsmnt_save = nd->path.mnt;
- struct dentry *dentry_save = nd->path.dentry;
-
- nd->path.mnt = ecryptfs_dentry_to_lower_mnt(nd->path.dentry);
- nd->path.dentry = ecryptfs_dentry_to_lower(nd->path.dentry);
- rc = permission(ecryptfs_inode_to_lower(inode), mask, nd);
- nd->path.mnt = vfsmnt_save;
- nd->path.dentry = dentry_save;
- } else
- rc = permission(ecryptfs_inode_to_lower(inode), mask, NULL);
- return rc;
+ return inode_permission(ecryptfs_inode_to_lower(inode), mask);
}
/**
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index e82b457180b..f5b76a331b9 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -44,15 +44,15 @@ static int process_request_key_err(long err_code)
int rc = 0;
switch (err_code) {
- case ENOKEY:
+ case -ENOKEY:
ecryptfs_printk(KERN_WARNING, "No key\n");
rc = -ENOENT;
break;
- case EKEYEXPIRED:
+ case -EKEYEXPIRED:
ecryptfs_printk(KERN_WARNING, "Key expired\n");
rc = -ETIME;
break;
- case EKEYREVOKED:
+ case -EKEYREVOKED:
ecryptfs_printk(KERN_WARNING, "Key revoked\n");
rc = -EINVAL;
break;
@@ -963,8 +963,7 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
printk(KERN_ERR "Could not find key with description: [%s]\n",
sig);
- process_request_key_err(PTR_ERR(*auth_tok_key));
- rc = -EINVAL;
+ rc = process_request_key_err(PTR_ERR(*auth_tok_key));
goto out;
}
(*auth_tok) = ecryptfs_get_key_payload_data(*auth_tok_key);
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
new file mode 100644
index 00000000000..c440c6b58b2
--- /dev/null
+++ b/fs/ecryptfs/kthread.c
@@ -0,0 +1,203 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ *
+ * Copyright (C) 2008 International Business Machines Corp.
+ * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/wait.h>
+#include <linux/mount.h>
+#include "ecryptfs_kernel.h"
+
+struct kmem_cache *ecryptfs_open_req_cache;
+
+static struct ecryptfs_kthread_ctl {
+#define ECRYPTFS_KTHREAD_ZOMBIE 0x00000001
+ u32 flags;
+ struct mutex mux;
+ struct list_head req_list;
+ wait_queue_head_t wait;
+} ecryptfs_kthread_ctl;
+
+static struct task_struct *ecryptfs_kthread;
+
+/**
+ * ecryptfs_threadfn
+ * @ignored: ignored
+ *
+ * The eCryptfs kernel thread that has the responsibility of getting
+ * the lower persistent file with RW permissions.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+static int ecryptfs_threadfn(void *ignored)
+{
+ set_freezable();
+ while (1) {
+ struct ecryptfs_open_req *req;
+
+ wait_event_freezable(
+ ecryptfs_kthread_ctl.wait,
+ (!list_empty(&ecryptfs_kthread_ctl.req_list)
+ || kthread_should_stop()));
+ mutex_lock(&ecryptfs_kthread_ctl.mux);
+ if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
+ mutex_unlock(&ecryptfs_kthread_ctl.mux);
+ goto out;
+ }
+ while (!list_empty(&ecryptfs_kthread_ctl.req_list)) {
+ req = list_first_entry(&ecryptfs_kthread_ctl.req_list,
+ struct ecryptfs_open_req,
+ kthread_ctl_list);
+ mutex_lock(&req->mux);
+ list_del(&req->kthread_ctl_list);
+ if (!(req->flags & ECRYPTFS_REQ_ZOMBIE)) {
+ dget(req->lower_dentry);
+ mntget(req->lower_mnt);
+ (*req->lower_file) = dentry_open(
+ req->lower_dentry, req->lower_mnt,
+ (O_RDWR | O_LARGEFILE));
+ req->flags |= ECRYPTFS_REQ_PROCESSED;
+ }
+ wake_up(&req->wait);
+ mutex_unlock(&req->mux);
+ }
+ mutex_unlock(&ecryptfs_kthread_ctl.mux);
+ }
+out:
+ return 0;
+}
+
+int ecryptfs_init_kthread(void)
+{
+ int rc = 0;
+
+ mutex_init(&ecryptfs_kthread_ctl.mux);
+ init_waitqueue_head(&ecryptfs_kthread_ctl.wait);
+ INIT_LIST_HEAD(&ecryptfs_kthread_ctl.req_list);
+ ecryptfs_kthread = kthread_run(&ecryptfs_threadfn, NULL,
+ "ecryptfs-kthread");
+ if (IS_ERR(ecryptfs_kthread)) {
+ rc = PTR_ERR(ecryptfs_kthread);
+ printk(KERN_ERR "%s: Failed to create kernel thread; rc = [%d]"
+ "\n", __func__, rc);
+ }
+ return rc;
+}
+
+void ecryptfs_destroy_kthread(void)
+{
+ struct ecryptfs_open_req *req;
+
+ mutex_lock(&ecryptfs_kthread_ctl.mux);
+ ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE;
+ list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list,
+ kthread_ctl_list) {
+ mutex_lock(&req->mux);
+ req->flags |= ECRYPTFS_REQ_ZOMBIE;
+ wake_up(&req->wait);
+ mutex_unlock(&req->mux);
+ }
+ mutex_unlock(&ecryptfs_kthread_ctl.mux);
+ kthread_stop(ecryptfs_kthread);
+ wake_up(&ecryptfs_kthread_ctl.wait);
+}
+
+/**
+ * ecryptfs_privileged_open
+ * @lower_file: Result of dentry_open by root on lower dentry
+ * @lower_dentry: Lower dentry for file to open
+ * @lower_mnt: Lower vfsmount for file to open
+ *
+ * This function gets a r/w file opened againt the lower dentry.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+int ecryptfs_privileged_open(struct file **lower_file,
+ struct dentry *lower_dentry,
+ struct vfsmount *lower_mnt)
+{
+ struct ecryptfs_open_req *req;
+ int rc = 0;
+
+ /* Corresponding dput() and mntput() are done when the
+ * persistent file is fput() when the eCryptfs inode is
+ * destroyed. */
+ dget(lower_dentry);
+ mntget(lower_mnt);
+ (*lower_file) = dentry_open(lower_dentry, lower_mnt,
+ (O_RDWR | O_LARGEFILE));
+ if (!IS_ERR(*lower_file))
+ goto out;
+ req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL);
+ if (!req) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ mutex_init(&req->mux);
+ req->lower_file = lower_file;
+ req->lower_dentry = lower_dentry;
+ req->lower_mnt = lower_mnt;
+ init_waitqueue_head(&req->wait);
+ req->flags = 0;
+ mutex_lock(&ecryptfs_kthread_ctl.mux);
+ if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
+ rc = -EIO;
+ mutex_unlock(&ecryptfs_kthread_ctl.mux);
+ printk(KERN_ERR "%s: We are in the middle of shutting down; "
+ "aborting privileged request to open lower file\n",
+ __func__);
+ goto out_free;
+ }
+ list_add_tail(&req->kthread_ctl_list, &ecryptfs_kthread_ctl.req_list);
+ mutex_unlock(&ecryptfs_kthread_ctl.mux);
+ wake_up(&ecryptfs_kthread_ctl.wait);
+ wait_event(req->wait, (req->flags != 0));
+ mutex_lock(&req->mux);
+ BUG_ON(req->flags == 0);
+ if (req->flags & ECRYPTFS_REQ_DROPPED
+ || req->flags & ECRYPTFS_REQ_ZOMBIE) {
+ rc = -EIO;
+ printk(KERN_WARNING "%s: Privileged open request dropped\n",
+ __func__);
+ goto out_unlock;
+ }
+ if (IS_ERR(*req->lower_file)) {
+ rc = PTR_ERR(*req->lower_file);
+ dget(lower_dentry);
+ mntget(lower_mnt);
+ (*lower_file) = dentry_open(lower_dentry, lower_mnt,
+ (O_RDONLY | O_LARGEFILE));
+ if (IS_ERR(*lower_file)) {
+ rc = PTR_ERR(*req->lower_file);
+ (*lower_file) = NULL;
+ printk(KERN_WARNING "%s: Error attempting privileged "
+ "open of lower file with either RW or RO "
+ "perms; rc = [%d]. Giving up.\n",
+ __func__, rc);
+ }
+ }
+out_unlock:
+ mutex_unlock(&req->mux);
+out_free:
+ kmem_cache_free(ecryptfs_open_req_cache, req);
+out:
+ return rc;
+}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d603631601e..448dfd597b5 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -117,7 +117,7 @@ void __ecryptfs_printk(const char *fmt, ...)
*
* Returns zero on success; non-zero otherwise
*/
-static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
+int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
{
struct ecryptfs_inode_info *inode_info =
ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
@@ -130,26 +130,12 @@ static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
- /* Corresponding dput() and mntput() are done when the
- * persistent file is fput() when the eCryptfs inode
- * is destroyed. */
- dget(lower_dentry);
- mntget(lower_mnt);
- inode_info->lower_file = dentry_open(lower_dentry,
- lower_mnt,
- (O_RDWR | O_LARGEFILE));
- if (IS_ERR(inode_info->lower_file)) {
- dget(lower_dentry);
- mntget(lower_mnt);
- inode_info->lower_file = dentry_open(lower_dentry,
- lower_mnt,
- (O_RDONLY
- | O_LARGEFILE));
- }
- if (IS_ERR(inode_info->lower_file)) {
+ rc = ecryptfs_privileged_open(&inode_info->lower_file,
+ lower_dentry, lower_mnt);
+ if (rc || IS_ERR(inode_info->lower_file)) {
printk(KERN_ERR "Error opening lower persistent file "
- "for lower_dentry [0x%p] and lower_mnt [0x%p]\n",
- lower_dentry, lower_mnt);
+ "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
+ "rc = [%d]\n", lower_dentry, lower_mnt, rc);
rc = PTR_ERR(inode_info->lower_file);
inode_info->lower_file = NULL;
}
@@ -163,14 +149,14 @@ static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
* @lower_dentry: Existing dentry in the lower filesystem
* @dentry: ecryptfs' dentry
* @sb: ecryptfs's super_block
- * @flag: If set to true, then d_add is called, else d_instantiate is called
+ * @flags: flags to govern behavior of interpose procedure
*
* Interposes upper and lower dentries.
*
* Returns zero on success; non-zero otherwise
*/
int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
- struct super_block *sb, int flag)
+ struct super_block *sb, u32 flags)
{
struct inode *lower_inode;
struct inode *inode;
@@ -207,7 +193,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
init_special_inode(inode, lower_inode->i_mode,
lower_inode->i_rdev);
dentry->d_op = &ecryptfs_dops;
- if (flag)
+ if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD)
d_add(dentry, inode);
else
d_instantiate(dentry, inode);
@@ -215,13 +201,6 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
/* This size will be overwritten for real files w/ headers and
* other metadata */
fsstack_copy_inode_size(inode, lower_inode);
- rc = ecryptfs_init_persistent_file(dentry);
- if (rc) {
- printk(KERN_ERR "%s: Error attempting to initialize the "
- "persistent file for the dentry with name [%s]; "
- "rc = [%d]\n", __func__, dentry->d_name.name, rc);
- goto out;
- }
out:
return rc;
}
@@ -262,10 +241,11 @@ static int ecryptfs_init_global_auth_toks(
"session keyring for sig specified in mount "
"option: [%s]\n", global_auth_tok->sig);
global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID;
- rc = 0;
+ goto out;
} else
global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID;
}
+out:
return rc;
}
@@ -314,7 +294,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
char *cipher_name_dst;
char *cipher_name_src;
char *cipher_key_bytes_src;
- int cipher_name_len;
if (!options) {
rc = -EINVAL;
@@ -395,17 +374,12 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
goto out;
}
if (!cipher_name_set) {
- cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
- if (unlikely(cipher_name_len
- >= ECRYPTFS_MAX_CIPHER_NAME_SIZE)) {
- rc = -EINVAL;
- BUG();
- goto out;
- }
- memcpy(mount_crypt_stat->global_default_cipher_name,
- ECRYPTFS_DEFAULT_CIPHER, cipher_name_len);
- mount_crypt_stat->global_default_cipher_name[cipher_name_len]
- = '\0';
+ int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
+
+ BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE);
+
+ strcpy(mount_crypt_stat->global_default_cipher_name,
+ ECRYPTFS_DEFAULT_CIPHER);
}
if (!cipher_key_bytes_set) {
mount_crypt_stat->global_default_cipher_key_size = 0;
@@ -430,7 +404,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
printk(KERN_WARNING "One or more global auth toks could not "
"properly register; rc = [%d]\n", rc);
}
- rc = 0;
out:
return rc;
}
@@ -605,7 +578,7 @@ static struct file_system_type ecryptfs_fs_type = {
* Initializes the ecryptfs_inode_info_cache when it is created
*/
static void
-inode_info_init_once(struct kmem_cache *cachep, void *vptr)
+inode_info_init_once(void *vptr)
{
struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr;
@@ -616,7 +589,7 @@ static struct ecryptfs_cache_info {
struct kmem_cache **cache;
const char *name;
size_t size;
- void (*ctor)(struct kmem_cache *cache, void *obj);
+ void (*ctor)(void *obj);
} ecryptfs_cache_infos[] = {
{
.cache = &ecryptfs_auth_tok_list_item_cache,
@@ -679,6 +652,11 @@ static struct ecryptfs_cache_info {
.name = "ecryptfs_key_tfm_cache",
.size = sizeof(struct ecryptfs_key_tfm),
},
+ {
+ .cache = &ecryptfs_open_req_cache,
+ .name = "ecryptfs_open_req_cache",
+ .size = sizeof(struct ecryptfs_open_req),
+ },
};
static void ecryptfs_free_kmem_caches(void)
@@ -795,11 +773,17 @@ static int __init ecryptfs_init(void)
printk(KERN_ERR "sysfs registration failed\n");
goto out_unregister_filesystem;
}
+ rc = ecryptfs_init_kthread();
+ if (rc) {
+ printk(KERN_ERR "%s: kthread initialization failed; "
+ "rc = [%d]\n", __func__, rc);
+ goto out_do_sysfs_unregistration;
+ }
rc = ecryptfs_init_messaging(ecryptfs_transport);
if (rc) {
- ecryptfs_printk(KERN_ERR, "Failure occured while attempting to "
+ printk(KERN_ERR "Failure occured while attempting to "
"initialize the eCryptfs netlink socket\n");
- goto out_do_sysfs_unregistration;
+ goto out_destroy_kthread;
}
rc = ecryptfs_init_crypto();
if (rc) {
@@ -814,6 +798,8 @@ static int __init ecryptfs_init(void)
goto out;
out_release_messaging:
ecryptfs_release_messaging(ecryptfs_transport);
+out_destroy_kthread:
+ ecryptfs_destroy_kthread();
out_do_sysfs_unregistration:
do_sysfs_unregistration();
out_unregister_filesystem:
@@ -833,6 +819,7 @@ static void __exit ecryptfs_exit(void)
printk(KERN_ERR "Failure whilst attempting to destroy crypto; "
"rc = [%d]\n", rc);
ecryptfs_release_messaging(ecryptfs_transport);
+ ecryptfs_destroy_kthread();
do_sysfs_unregistration();
unregister_filesystem(&ecryptfs_fs_type);
ecryptfs_free_kmem_caches();
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 09a4522f65e..b484792a099 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -358,46 +358,6 @@ out_unlock_daemon:
}
/**
- * ecryptfs_miscdev_helo
- * @euid: effective user id of miscdevess sending helo packet
- * @user_ns: The namespace in which @euid applies
- * @pid: miscdevess id of miscdevess sending helo packet
- *
- * Returns zero on success; non-zero otherwise
- */
-static int ecryptfs_miscdev_helo(uid_t euid, struct user_namespace *user_ns,
- struct pid *pid)
-{
- int rc;
-
- rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, euid, user_ns,
- pid);
- if (rc)
- printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
- return rc;
-}
-
-/**
- * ecryptfs_miscdev_quit
- * @euid: effective user id of miscdevess sending quit packet
- * @user_ns: The namespace in which @euid applies
- * @pid: miscdevess id of miscdevess sending quit packet
- *
- * Returns zero on success; non-zero otherwise
- */
-static int ecryptfs_miscdev_quit(uid_t euid, struct user_namespace *user_ns,
- struct pid *pid)
-{
- int rc;
-
- rc = ecryptfs_process_quit(euid, user_ns, pid);
- if (rc)
- printk(KERN_WARNING
- "Error processing QUIT message; rc = [%d]\n", rc);
- return rc;
-}
-
-/**
* ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon
* @data: Bytes comprising struct ecryptfs_message
* @data_size: sizeof(struct ecryptfs_message) + data len
@@ -512,26 +472,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
__func__, rc);
break;
case ECRYPTFS_MSG_HELO:
- rc = ecryptfs_miscdev_helo(current->euid,
- current->nsproxy->user_ns,
- task_pid(current));
- if (rc) {
- printk(KERN_ERR "%s: Error attempting to process "
- "helo from pid [0x%p]; rc = [%d]\n", __func__,
- task_pid(current), rc);
- goto out_free;
- }
- break;
case ECRYPTFS_MSG_QUIT:
- rc = ecryptfs_miscdev_quit(current->euid,
- current->nsproxy->user_ns,
- task_pid(current));
- if (rc) {
- printk(KERN_ERR "%s: Error attempting to process "
- "quit from pid [0x%p]; rc = [%d]\n", __func__,
- task_pid(current), rc);
- goto out_free;
- }
break;
default:
ecryptfs_printk(KERN_WARNING, "Dropping miscdev "
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 2b6fe1e6e8b..245c2dc02d5 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -32,6 +32,7 @@
#include <linux/file.h>
#include <linux/crypto.h>
#include <linux/scatterlist.h>
+#include <asm/unaligned.h>
#include "ecryptfs_kernel.h"
/**
@@ -372,7 +373,6 @@ out:
*/
static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
{
- u64 file_size;
char *file_size_virt;
int rc;
@@ -381,9 +381,7 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
rc = -ENOMEM;
goto out;
}
- file_size = (u64)i_size_read(ecryptfs_inode);
- file_size = cpu_to_be64(file_size);
- memcpy(file_size_virt, &file_size, sizeof(u64));
+ put_unaligned_be64(i_size_read(ecryptfs_inode), file_size_virt);
rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0,
sizeof(u64));
kfree(file_size_virt);
@@ -403,7 +401,6 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
struct dentry *lower_dentry =
ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry;
struct inode *lower_inode = lower_dentry->d_inode;
- u64 file_size;
int rc;
if (!lower_inode->i_op->getxattr || !lower_inode->i_op->setxattr) {
@@ -424,9 +421,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
xattr_virt, PAGE_CACHE_SIZE);
if (size < 0)
size = 8;
- file_size = (u64)i_size_read(ecryptfs_inode);
- file_size = cpu_to_be64(file_size);
- memcpy(xattr_virt, &file_size, sizeof(u64));
+ put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
xattr_virt, size, 0);
mutex_unlock(&lower_inode->i_mutex);
diff --git a/fs/efs/super.c b/fs/efs/super.c
index d733531b55e..567b134fa1f 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -70,7 +70,7 @@ static void efs_destroy_inode(struct inode *inode)
kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
}
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct efs_inode_info *ei = (struct efs_inode_info *) foo;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 343942deeec..08bf558d040 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -198,11 +198,18 @@ struct file *eventfd_fget(int fd)
return file;
}
-asmlinkage long sys_eventfd(unsigned int count)
+asmlinkage long sys_eventfd2(unsigned int count, int flags)
{
int fd;
struct eventfd_ctx *ctx;
+ /* Check the EFD_* constants for consistency. */
+ BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
+ BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
+
+ if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK))
+ return -EINVAL;
+
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
@@ -214,9 +221,15 @@ asmlinkage long sys_eventfd(unsigned int count)
* When we call this, the initialization must be complete, since
* anon_inode_getfd() will install the fd.
*/
- fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx);
+ fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
+ flags & (O_CLOEXEC | O_NONBLOCK));
if (fd < 0)
kfree(ctx);
return fd;
}
+asmlinkage long sys_eventfd(unsigned int count)
+{
+ return sys_eventfd2(count, 0);
+}
+
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 990c01d2d66..7cc0eb756b5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1041,25 +1041,27 @@ retry:
}
/*
- * It opens an eventpoll file descriptor. The "size" parameter is there
- * for historical reasons, when epoll was using an hash instead of an
- * RB tree. With the current implementation, the "size" parameter is ignored
- * (besides sanity checks).
+ * Open an eventpoll file descriptor.
*/
-asmlinkage long sys_epoll_create(int size)
+asmlinkage long sys_epoll_create1(int flags)
{
int error, fd = -1;
struct eventpoll *ep;
+ /* Check the EPOLL_* constant for consistency. */
+ BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
+
+ if (flags & ~EPOLL_CLOEXEC)
+ return -EINVAL;
+
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
- current, size));
+ current, flags));
/*
- * Sanity check on the size parameter, and create the internal data
- * structure ( "struct eventpoll" ).
+ * Create the internal data structure ( "struct eventpoll" ).
*/
- error = -EINVAL;
- if (size <= 0 || (error = ep_alloc(&ep)) < 0) {
+ error = ep_alloc(&ep);
+ if (error < 0) {
fd = error;
goto error_return;
}
@@ -1068,17 +1070,26 @@ asmlinkage long sys_epoll_create(int size)
* Creates all the items needed to setup an eventpoll file. That is,
* a file structure and a free file descriptor.
*/
- fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep);
+ fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
+ flags & O_CLOEXEC);
if (fd < 0)
ep_free(ep);
error_return:
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
- current, size, fd));
+ current, flags, fd));
return fd;
}
+asmlinkage long sys_epoll_create(int size)
+{
+ if (size < 0)
+ return -EINVAL;
+
+ return sys_epoll_create1(0);
+}
+
/*
* The following function implements the controller interface for
* the eventpoll file that enables the insertion/removal/change of
diff --git a/fs/exec.c b/fs/exec.c
index fd9234379e8..32993beecbe 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -25,10 +25,11 @@
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/fdtable.h>
-#include <linux/mman.h>
+#include <linux/mm.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/smp_lock.h>
+#include <linux/swap.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/pagemap.h>
@@ -37,20 +38,18 @@
#include <linux/key.h>
#include <linux/personality.h>
#include <linux/binfmts.h>
-#include <linux/swap.h>
#include <linux/utsname.h>
#include <linux/pid_namespace.h>
#include <linux/module.h>
#include <linux/namei.h>
#include <linux/proc_fs.h>
-#include <linux/ptrace.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/syscalls.h>
-#include <linux/rmap.h>
#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
#include <linux/audit.h>
+#include <linux/tracehook.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -108,11 +107,17 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
*/
asmlinkage long sys_uselib(const char __user * library)
{
- struct file * file;
+ struct file *file;
struct nameidata nd;
- int error;
-
- error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
+ char *tmp = getname(library);
+ int error = PTR_ERR(tmp);
+
+ if (!IS_ERR(tmp)) {
+ error = path_lookup_open(AT_FDCWD, tmp,
+ LOOKUP_FOLLOW, &nd,
+ FMODE_READ|FMODE_EXEC);
+ putname(tmp);
+ }
if (error)
goto out;
@@ -120,7 +125,11 @@ asmlinkage long sys_uselib(const char __user * library)
if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
goto exit;
- error = vfs_permission(&nd, MAY_READ | MAY_EXEC);
+ error = -EACCES;
+ if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
+ goto exit;
+
+ error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN);
if (error)
goto exit;
@@ -541,7 +550,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
/*
* when the old and new regions overlap clear from new_end.
*/
- free_pgd_range(&tlb, new_end, old_end, new_end,
+ free_pgd_range(tlb, new_end, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : 0);
} else {
/*
@@ -550,7 +559,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* have constraints on va-space that make this illegal (IA64) -
* for the others its just a little faster.
*/
- free_pgd_range(&tlb, old_start, old_end, new_end,
+ free_pgd_range(tlb, old_start, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : 0);
}
tlb_finish_mmu(tlb, new_end, old_end);
@@ -658,38 +667,43 @@ EXPORT_SYMBOL(setup_arg_pages);
struct file *open_exec(const char *name)
{
struct nameidata nd;
- int err;
struct file *file;
+ int err;
- err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
- file = ERR_PTR(err);
-
- if (!err) {
- struct inode *inode = nd.path.dentry->d_inode;
- file = ERR_PTR(-EACCES);
- if (S_ISREG(inode->i_mode)) {
- int err = vfs_permission(&nd, MAY_EXEC);
- file = ERR_PTR(err);
- if (!err) {
- file = nameidata_to_filp(&nd,
- O_RDONLY|O_LARGEFILE);
- if (!IS_ERR(file)) {
- err = deny_write_access(file);
- if (err) {
- fput(file);
- file = ERR_PTR(err);
- }
- }
-out:
- return file;
- }
- }
- release_open_intent(&nd);
- path_put(&nd.path);
+ err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd,
+ FMODE_READ|FMODE_EXEC);
+ if (err)
+ goto out;
+
+ err = -EACCES;
+ if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
+ goto out_path_put;
+
+ if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
+ goto out_path_put;
+
+ err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN);
+ if (err)
+ goto out_path_put;
+
+ file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE);
+ if (IS_ERR(file))
+ return file;
+
+ err = deny_write_access(file);
+ if (err) {
+ fput(file);
+ goto out;
}
- goto out;
-}
+ return file;
+
+ out_path_put:
+ release_open_intent(&nd);
+ path_put(&nd.path);
+ out:
+ return ERR_PTR(err);
+}
EXPORT_SYMBOL(open_exec);
int kernel_read(struct file *file, unsigned long offset,
@@ -724,12 +738,10 @@ static int exec_mmap(struct mm_struct *mm)
* Make sure that if there is a core dump in progress
* for the old mm, we get out and die instead of going
* through with the exec. We must hold mmap_sem around
- * checking core_waiters and changing tsk->mm. The
- * core-inducing thread will increment core_waiters for
- * each thread whose ->mm == old_mm.
+ * checking core_state and changing tsk->mm.
*/
down_read(&old_mm->mmap_sem);
- if (unlikely(old_mm->core_waiters)) {
+ if (unlikely(old_mm->core_state)) {
up_read(&old_mm->mmap_sem);
return -EINTR;
}
@@ -1075,13 +1087,8 @@ EXPORT_SYMBOL(prepare_binprm);
static int unsafe_exec(struct task_struct *p)
{
- int unsafe = 0;
- if (p->ptrace & PT_PTRACED) {
- if (p->ptrace & PT_PTRACE_CAP)
- unsafe |= LSM_UNSAFE_PTRACE_CAP;
- else
- unsafe |= LSM_UNSAFE_PTRACE;
- }
+ int unsafe = tracehook_unsafe_exec(p);
+
if (atomic_read(&p->fs->count) > 1 ||
atomic_read(&p->files->count) > 1 ||
atomic_read(&p->sighand->count) > 1)
@@ -1218,6 +1225,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
read_unlock(&binfmt_lock);
retval = fn(bprm, regs);
if (retval >= 0) {
+ tracehook_report_exec(fmt, bprm, regs);
put_binfmt(fmt);
allow_write_access(bprm->file);
if (bprm->file)
@@ -1328,6 +1336,7 @@ int do_execve(char * filename,
if (retval < 0)
goto out;
+ current->flags &= ~PF_KTHREAD;
retval = search_binary_handler(bprm,regs);
if (retval >= 0) {
/* execve success */
@@ -1382,17 +1391,14 @@ EXPORT_SYMBOL(set_binfmt);
* name into corename, which must have space for at least
* CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
*/
-static int format_corename(char *corename, const char *pattern, long signr)
+static int format_corename(char *corename, int nr_threads, long signr)
{
- const char *pat_ptr = pattern;
+ const char *pat_ptr = core_pattern;
+ int ispipe = (*pat_ptr == '|');
char *out_ptr = corename;
char *const out_end = corename + CORENAME_MAX_SIZE;
int rc;
int pid_in_pattern = 0;
- int ispipe = 0;
-
- if (*pattern == '|')
- ispipe = 1;
/* Repeat as long as we have more pattern to process and more output
space */
@@ -1493,7 +1499,7 @@ static int format_corename(char *corename, const char *pattern, long signr)
* and core_uses_pid is set, then .%pid will be appended to
* the filename. Do not do this for piped commands. */
if (!ispipe && !pid_in_pattern
- && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
+ && (core_uses_pid || nr_threads)) {
rc = snprintf(out_ptr, out_end - out_ptr,
".%d", task_tgid_vnr(current));
if (rc > out_end - out_ptr)
@@ -1505,9 +1511,10 @@ out:
return ispipe;
}
-static void zap_process(struct task_struct *start)
+static int zap_process(struct task_struct *start)
{
struct task_struct *t;
+ int nr = 0;
start->signal->flags = SIGNAL_GROUP_EXIT;
start->signal->group_stop_count = 0;
@@ -1515,72 +1522,99 @@ static void zap_process(struct task_struct *start)
t = start;
do {
if (t != current && t->mm) {
- t->mm->core_waiters++;
sigaddset(&t->pending.signal, SIGKILL);
signal_wake_up(t, 1);
+ nr++;
}
- } while ((t = next_thread(t)) != start);
+ } while_each_thread(start, t);
+
+ return nr;
}
static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
- int exit_code)
+ struct core_state *core_state, int exit_code)
{
struct task_struct *g, *p;
unsigned long flags;
- int err = -EAGAIN;
+ int nr = -EAGAIN;
spin_lock_irq(&tsk->sighand->siglock);
if (!signal_group_exit(tsk->signal)) {
+ mm->core_state = core_state;
tsk->signal->group_exit_code = exit_code;
- zap_process(tsk);
- err = 0;
+ nr = zap_process(tsk);
}
spin_unlock_irq(&tsk->sighand->siglock);
- if (err)
- return err;
+ if (unlikely(nr < 0))
+ return nr;
- if (atomic_read(&mm->mm_users) == mm->core_waiters + 1)
+ if (atomic_read(&mm->mm_users) == nr + 1)
goto done;
-
+ /*
+ * We should find and kill all tasks which use this mm, and we should
+ * count them correctly into ->nr_threads. We don't take tasklist
+ * lock, but this is safe wrt:
+ *
+ * fork:
+ * None of sub-threads can fork after zap_process(leader). All
+ * processes which were created before this point should be
+ * visible to zap_threads() because copy_process() adds the new
+ * process to the tail of init_task.tasks list, and lock/unlock
+ * of ->siglock provides a memory barrier.
+ *
+ * do_exit:
+ * The caller holds mm->mmap_sem. This means that the task which
+ * uses this mm can't pass exit_mm(), so it can't exit or clear
+ * its ->mm.
+ *
+ * de_thread:
+ * It does list_replace_rcu(&leader->tasks, &current->tasks),
+ * we must see either old or new leader, this does not matter.
+ * However, it can change p->sighand, so lock_task_sighand(p)
+ * must be used. Since p->mm != NULL and we hold ->mmap_sem
+ * it can't fail.
+ *
+ * Note also that "g" can be the old leader with ->mm == NULL
+ * and already unhashed and thus removed from ->thread_group.
+ * This is OK, __unhash_process()->list_del_rcu() does not
+ * clear the ->next pointer, we will find the new leader via
+ * next_thread().
+ */
rcu_read_lock();
for_each_process(g) {
if (g == tsk->group_leader)
continue;
-
+ if (g->flags & PF_KTHREAD)
+ continue;
p = g;
do {
if (p->mm) {
- if (p->mm == mm) {
- /*
- * p->sighand can't disappear, but
- * may be changed by de_thread()
- */
+ if (unlikely(p->mm == mm)) {
lock_task_sighand(p, &flags);
- zap_process(p);
+ nr += zap_process(p);
unlock_task_sighand(p, &flags);
}
break;
}
- } while ((p = next_thread(p)) != g);
+ } while_each_thread(g, p);
}
rcu_read_unlock();
done:
- return mm->core_waiters;
+ atomic_set(&core_state->nr_threads, nr);
+ return nr;
}
-static int coredump_wait(int exit_code)
+static int coredump_wait(int exit_code, struct core_state *core_state)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
- struct completion startup_done;
struct completion *vfork_done;
int core_waiters;
- init_completion(&mm->core_done);
- init_completion(&startup_done);
- mm->core_startup_done = &startup_done;
-
- core_waiters = zap_threads(tsk, mm, exit_code);
+ init_completion(&core_state->startup);
+ core_state->dumper.task = tsk;
+ core_state->dumper.next = NULL;
+ core_waiters = zap_threads(tsk, mm, core_state, exit_code);
up_write(&mm->mmap_sem);
if (unlikely(core_waiters < 0))
@@ -1597,12 +1631,32 @@ static int coredump_wait(int exit_code)
}
if (core_waiters)
- wait_for_completion(&startup_done);
+ wait_for_completion(&core_state->startup);
fail:
- BUG_ON(mm->core_waiters);
return core_waiters;
}
+static void coredump_finish(struct mm_struct *mm)
+{
+ struct core_thread *curr, *next;
+ struct task_struct *task;
+
+ next = mm->core_state->dumper.next;
+ while ((curr = next) != NULL) {
+ next = curr->next;
+ task = curr->task;
+ /*
+ * see exit_mm(), curr->task must not see
+ * ->task == NULL before we read ->next.
+ */
+ smp_mb();
+ curr->task = NULL;
+ wake_up_process(task);
+ }
+
+ mm->core_state = NULL;
+}
+
/*
* set_dumpable converts traditional three-value dumpable to two flags and
* stores them into mm->flags. It modifies lower two bits of mm->flags, but
@@ -1654,6 +1708,7 @@ int get_dumpable(struct mm_struct *mm)
int do_coredump(long signr, int exit_code, struct pt_regs * regs)
{
+ struct core_state core_state;
char corename[CORENAME_MAX_SIZE + 1];
struct mm_struct *mm = current->mm;
struct linux_binfmt * binfmt;
@@ -1677,7 +1732,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
/*
* If another thread got here first, or we are not dumpable, bail out.
*/
- if (mm->core_waiters || !get_dumpable(mm)) {
+ if (mm->core_state || !get_dumpable(mm)) {
up_write(&mm->mmap_sem);
goto fail;
}
@@ -1692,7 +1747,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
current->fsuid = 0; /* Dump root private */
}
- retval = coredump_wait(exit_code);
+ retval = coredump_wait(exit_code, &core_state);
if (retval < 0)
goto fail;
@@ -1707,7 +1762,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
* uses lock_kernel()
*/
lock_kernel();
- ispipe = format_corename(corename, core_pattern, signr);
+ ispipe = format_corename(corename, retval, signr);
unlock_kernel();
/*
* Don't bother to check the RLIMIT_CORE value if core_pattern points
@@ -1786,7 +1841,7 @@ fail_unlock:
argv_free(helper_argv);
current->fsuid = fsuid;
- complete_all(&mm->core_done);
+ coredump_finish(mm);
fail:
return retval;
}
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index e58669e1b87..ae8c4f850b2 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -294,7 +294,7 @@ ext2_check_acl(struct inode *inode, int mask)
}
int
-ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
+ext2_permission(struct inode *inode, int mask)
{
return generic_permission(inode, mask, ext2_check_acl);
}
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index 0bde85bafe3..b42cf578554 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -58,7 +58,7 @@ static inline int ext2_acl_count(size_t size)
#define EXT2_ACL_NOT_CACHED ((void *)-1)
/* acl.c */
-extern int ext2_permission (struct inode *, int, struct nameidata *);
+extern int ext2_permission (struct inode *, int);
extern int ext2_acl_chmod (struct inode *);
extern int ext2_init_acl (struct inode *, struct inode *);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 384fc0d1dd7..991d6dfeb51 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -791,6 +791,7 @@ const struct address_space_operations ext2_aops = {
.direct_IO = ext2_direct_IO,
.writepages = ext2_writepages,
.migratepage = buffer_migrate_page,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
const struct address_space_operations ext2_aops_xip = {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index ef50cbc792d..fd88c7b43e6 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
#include <linux/seq_file.h>
#include <linux/mount.h>
#include <linux/log2.h>
+#include <linux/quotaops.h>
#include <asm/uaccess.h>
#include "ext2.h"
#include "xattr.h"
@@ -158,7 +159,7 @@ static void ext2_destroy_inode(struct inode *inode)
kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
}
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
{
struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index eaa23d2d521..70c0dbdcdcb 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -14,7 +14,7 @@ static size_t
ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const int prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
+ const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
if (list && total_len <= list_size) {
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 83ee149f353..e8219f8eae9 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -12,13 +12,11 @@
#include <linux/ext2_fs.h>
#include "xattr.h"
-#define XATTR_TRUSTED_PREFIX "trusted."
-
static size_t
ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
+ const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index f383e7c3a7b..92495d28c62 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -11,13 +11,11 @@
#include "ext2.h"
#include "xattr.h"
-#define XATTR_USER_PREFIX "user."
-
static size_t
ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
+ const size_t prefix_len = XATTR_USER_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
if (!test_opt(inode->i_sb, XATTR_USER))
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index a754d184817..b60bb241880 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -299,7 +299,7 @@ ext3_check_acl(struct inode *inode, int mask)
}
int
-ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
+ext3_permission(struct inode *inode, int mask)
{
return generic_permission(inode, mask, ext3_check_acl);
}
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 0d1e6279cbf..42da16b8cac 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -58,7 +58,7 @@ static inline int ext3_acl_count(size_t size)
#define EXT3_ACL_NOT_CACHED ((void *)-1)
/* acl.c */
-extern int ext3_permission (struct inode *, int, struct nameidata *);
+extern int ext3_permission (struct inode *, int);
extern int ext3_acl_chmod (struct inode *);
extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 8ca3bfd7242..2eea96ec78e 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -272,7 +272,7 @@ static void free_rb_tree_fname(struct rb_root *root)
while (n) {
/* Do the node's children first */
- if ((n)->rb_left) {
+ if (n->rb_left) {
n = n->rb_left;
continue;
}
@@ -301,24 +301,18 @@ static void free_rb_tree_fname(struct rb_root *root)
parent->rb_right = NULL;
n = parent;
}
- root->rb_node = NULL;
}
-static struct dir_private_info *create_dir_info(loff_t pos)
+static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos)
{
struct dir_private_info *p;
- p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
+ p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
if (!p)
return NULL;
- p->root.rb_node = NULL;
- p->curr_node = NULL;
- p->extra_fname = NULL;
- p->last_pos = 0;
p->curr_hash = pos2maj_hash(pos);
p->curr_minor_hash = pos2min_hash(pos);
- p->next_hash = 0;
return p;
}
@@ -433,7 +427,7 @@ static int ext3_dx_readdir(struct file * filp,
int ret;
if (!info) {
- info = create_dir_info(filp->f_pos);
+ info = ext3_htree_create_dir_info(filp->f_pos);
if (!info)
return -ENOMEM;
filp->private_data = info;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 77126821b2e..47b678d73e7 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -669,6 +669,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
if (IS_ERR(inode))
goto iget_failed;
+ /*
+ * If the orphans has i_nlinks > 0 then it should be able to be
+ * truncated, otherwise it won't be removed from the orphan list
+ * during processing and an infinite loop will result.
+ */
+ if (inode->i_nlink && !ext3_can_truncate(inode))
+ goto bad_orphan;
+
if (NEXT_ORPHAN(inode) > max_ino)
goto bad_orphan;
brelse(bitmap_bh);
@@ -690,6 +698,7 @@ bad_orphan:
printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
NEXT_ORPHAN(inode));
printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
+ printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
/* Avoid freeing blocks if we got a bad deleted inode */
if (inode->i_nlink == 0)
inode->i_blocks = 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 6ae4ecf3ce4..507d8689b11 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1767,44 +1767,47 @@ static int ext3_journalled_set_page_dirty(struct page *page)
}
static const struct address_space_operations ext3_ordered_aops = {
- .readpage = ext3_readpage,
- .readpages = ext3_readpages,
- .writepage = ext3_ordered_writepage,
- .sync_page = block_sync_page,
- .write_begin = ext3_write_begin,
- .write_end = ext3_ordered_write_end,
- .bmap = ext3_bmap,
- .invalidatepage = ext3_invalidatepage,
- .releasepage = ext3_releasepage,
- .direct_IO = ext3_direct_IO,
- .migratepage = buffer_migrate_page,
+ .readpage = ext3_readpage,
+ .readpages = ext3_readpages,
+ .writepage = ext3_ordered_writepage,
+ .sync_page = block_sync_page,
+ .write_begin = ext3_write_begin,
+ .write_end = ext3_ordered_write_end,
+ .bmap = ext3_bmap,
+ .invalidatepage = ext3_invalidatepage,
+ .releasepage = ext3_releasepage,
+ .direct_IO = ext3_direct_IO,
+ .migratepage = buffer_migrate_page,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
static const struct address_space_operations ext3_writeback_aops = {
- .readpage = ext3_readpage,
- .readpages = ext3_readpages,
- .writepage = ext3_writeback_writepage,
- .sync_page = block_sync_page,
- .write_begin = ext3_write_begin,
- .write_end = ext3_writeback_write_end,
- .bmap = ext3_bmap,
- .invalidatepage = ext3_invalidatepage,
- .releasepage = ext3_releasepage,
- .direct_IO = ext3_direct_IO,
- .migratepage = buffer_migrate_page,
+ .readpage = ext3_readpage,
+ .readpages = ext3_readpages,
+ .writepage = ext3_writeback_writepage,
+ .sync_page = block_sync_page,
+ .write_begin = ext3_write_begin,
+ .write_end = ext3_writeback_write_end,
+ .bmap = ext3_bmap,
+ .invalidatepage = ext3_invalidatepage,
+ .releasepage = ext3_releasepage,
+ .direct_IO = ext3_direct_IO,
+ .migratepage = buffer_migrate_page,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
static const struct address_space_operations ext3_journalled_aops = {
- .readpage = ext3_readpage,
- .readpages = ext3_readpages,
- .writepage = ext3_journalled_writepage,
- .sync_page = block_sync_page,
- .write_begin = ext3_write_begin,
- .write_end = ext3_journalled_write_end,
- .set_page_dirty = ext3_journalled_set_page_dirty,
- .bmap = ext3_bmap,
- .invalidatepage = ext3_invalidatepage,
- .releasepage = ext3_releasepage,
+ .readpage = ext3_readpage,
+ .readpages = ext3_readpages,
+ .writepage = ext3_journalled_writepage,
+ .sync_page = block_sync_page,
+ .write_begin = ext3_write_begin,
+ .write_end = ext3_journalled_write_end,
+ .set_page_dirty = ext3_journalled_set_page_dirty,
+ .bmap = ext3_bmap,
+ .invalidatepage = ext3_invalidatepage,
+ .releasepage = ext3_releasepage,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
void ext3_set_aops(struct inode *inode)
@@ -2127,7 +2130,21 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
if (this_bh) {
BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
- ext3_journal_dirty_metadata(handle, this_bh);
+
+ /*
+ * The buffer head should have an attached journal head at this
+ * point. However, if the data is corrupted and an indirect
+ * block pointed to itself, it would have been detached when
+ * the block was cleared. Check for this instead of OOPSing.
+ */
+ if (bh2jh(this_bh))
+ ext3_journal_dirty_metadata(handle, this_bh);
+ else
+ ext3_error(inode->i_sb, "ext3_free_data",
+ "circular indirect block detected, "
+ "inode=%lu, block=%llu",
+ inode->i_ino,
+ (unsigned long long)this_bh->b_blocknr);
}
}
@@ -2253,6 +2270,19 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
}
}
+int ext3_can_truncate(struct inode *inode)
+{
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ return 0;
+ if (S_ISREG(inode->i_mode))
+ return 1;
+ if (S_ISDIR(inode->i_mode))
+ return 1;
+ if (S_ISLNK(inode->i_mode))
+ return !ext3_inode_is_fast_symlink(inode);
+ return 0;
+}
+
/*
* ext3_truncate()
*
@@ -2297,12 +2327,7 @@ void ext3_truncate(struct inode *inode)
unsigned blocksize = inode->i_sb->s_blocksize;
struct page *page;
- if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- S_ISLNK(inode->i_mode)))
- return;
- if (ext3_inode_is_fast_symlink(inode))
- return;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ if (!ext3_can_truncate(inode))
return;
/*
@@ -2513,6 +2538,16 @@ static int __ext3_get_inode_loc(struct inode *inode,
}
if (!buffer_uptodate(bh)) {
lock_buffer(bh);
+
+ /*
+ * If the buffer has the write error flag, we have failed
+ * to write out another inode in the same block. In this
+ * case, we don't have to read the block because we may
+ * read the old inode data successfully.
+ */
+ if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+
if (buffer_uptodate(bh)) {
/* someone brought it uptodate while we waited */
unlock_buffer(bh);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 0b8cf80154f..de13e919cd8 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -240,13 +240,13 @@ static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
{
unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
EXT3_DIR_REC_LEN(2) - infosize;
- return 0? 20: entry_space / sizeof(struct dx_entry);
+ return entry_space / sizeof(struct dx_entry);
}
static inline unsigned dx_node_limit (struct inode *dir)
{
unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
- return 0? 22: entry_space / sizeof(struct dx_entry);
+ return entry_space / sizeof(struct dx_entry);
}
/*
@@ -991,19 +991,21 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
de = (struct ext3_dir_entry_2 *) bh->b_data;
top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
EXT3_DIR_REC_LEN(0));
- for (; de < top; de = ext3_next_entry(de))
- if (ext3_match (namelen, name, de)) {
- if (!ext3_check_dir_entry("ext3_find_entry",
- dir, de, bh,
- (block<<EXT3_BLOCK_SIZE_BITS(sb))
- +((char *)de - bh->b_data))) {
- brelse (bh);
+ for (; de < top; de = ext3_next_entry(de)) {
+ int off = (block << EXT3_BLOCK_SIZE_BITS(sb))
+ + ((char *) de - bh->b_data);
+
+ if (!ext3_check_dir_entry(__func__, dir, de, bh, off)) {
+ brelse(bh);
*err = ERR_BAD_DX_DIR;
goto errout;
}
- *res_dir = de;
- dx_release (frames);
- return bh;
+
+ if (ext3_match(namelen, name, de)) {
+ *res_dir = de;
+ dx_release(frames);
+ return bh;
+ }
}
brelse (bh);
/* Check to see if we should continue to search */
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 2845425077e..f38a5afc39a 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -472,7 +472,7 @@ static void ext3_destroy_inode(struct inode *inode)
kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
}
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
{
struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
@@ -842,7 +842,7 @@ static int parse_options (char *options, struct super_block *sb,
int data_opt = 0;
int option;
#ifdef CONFIG_QUOTA
- int qtype;
+ int qtype, qfmt;
char *qname;
#endif
@@ -1018,9 +1018,11 @@ static int parse_options (char *options, struct super_block *sb,
case Opt_grpjquota:
qtype = GRPQUOTA;
set_qf_name:
- if (sb_any_quota_enabled(sb)) {
+ if ((sb_any_quota_enabled(sb) ||
+ sb_any_quota_suspended(sb)) &&
+ !sbi->s_qf_names[qtype]) {
printk(KERN_ERR
- "EXT3-fs: Cannot change journalled "
+ "EXT3-fs: Cannot change journaled "
"quota options when quota turned on.\n");
return 0;
}
@@ -1056,9 +1058,11 @@ set_qf_name:
case Opt_offgrpjquota:
qtype = GRPQUOTA;
clear_qf_name:
- if (sb_any_quota_enabled(sb)) {
+ if ((sb_any_quota_enabled(sb) ||
+ sb_any_quota_suspended(sb)) &&
+ sbi->s_qf_names[qtype]) {
printk(KERN_ERR "EXT3-fs: Cannot change "
- "journalled quota options when "
+ "journaled quota options when "
"quota turned on.\n");
return 0;
}
@@ -1069,10 +1073,20 @@ clear_qf_name:
sbi->s_qf_names[qtype] = NULL;
break;
case Opt_jqfmt_vfsold:
- sbi->s_jquota_fmt = QFMT_VFS_OLD;
- break;
+ qfmt = QFMT_VFS_OLD;
+ goto set_qf_format;
case Opt_jqfmt_vfsv0:
- sbi->s_jquota_fmt = QFMT_VFS_V0;
+ qfmt = QFMT_VFS_V0;
+set_qf_format:
+ if ((sb_any_quota_enabled(sb) ||
+ sb_any_quota_suspended(sb)) &&
+ sbi->s_jquota_fmt != qfmt) {
+ printk(KERN_ERR "EXT3-fs: Cannot change "
+ "journaled quota options when "
+ "quota turned on.\n");
+ return 0;
+ }
+ sbi->s_jquota_fmt = qfmt;
break;
case Opt_quota:
case Opt_usrquota:
@@ -1084,7 +1098,8 @@ clear_qf_name:
set_opt(sbi->s_mount_opt, GRPQUOTA);
break;
case Opt_noquota:
- if (sb_any_quota_enabled(sb)) {
+ if (sb_any_quota_enabled(sb) ||
+ sb_any_quota_suspended(sb)) {
printk(KERN_ERR "EXT3-fs: Cannot change quota "
"options when quota turned on.\n");
return 0;
@@ -1169,14 +1184,14 @@ clear_qf_name:
}
if (!sbi->s_jquota_fmt) {
- printk(KERN_ERR "EXT3-fs: journalled quota format "
+ printk(KERN_ERR "EXT3-fs: journaled quota format "
"not specified.\n");
return 0;
}
} else {
if (sbi->s_jquota_fmt) {
- printk(KERN_ERR "EXT3-fs: journalled quota format "
- "specified with no journalling "
+ printk(KERN_ERR "EXT3-fs: journaled quota format "
+ "specified with no journaling "
"enabled.\n");
return 0;
}
@@ -1370,7 +1385,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
int ret = ext3_quota_on_mount(sb, i);
if (ret < 0)
printk(KERN_ERR
- "EXT3-fs: Cannot turn on journalled "
+ "EXT3-fs: Cannot turn on journaled "
"quota: error %d\n", ret);
}
}
@@ -2712,7 +2727,7 @@ static int ext3_release_dquot(struct dquot *dquot)
static int ext3_mark_dquot_dirty(struct dquot *dquot)
{
- /* Are we journalling quotas? */
+ /* Are we journaling quotas? */
if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
dquot_mark_dquot_dirty(dquot);
@@ -2759,25 +2774,45 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
if (!test_opt(sb, QUOTA))
return -EINVAL;
- /* Not journalling quota or remount? */
- if ((!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
- !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) || remount)
+ /* When remounting, no checks are needed and in fact, path is NULL */
+ if (remount)
return vfs_quota_on(sb, type, format_id, path, remount);
+
err = path_lookup(path, LOOKUP_FOLLOW, &nd);
if (err)
return err;
+
/* Quotafile not on the same filesystem? */
if (nd.path.mnt->mnt_sb != sb) {
path_put(&nd.path);
return -EXDEV;
}
- /* Quotafile not in fs root? */
- if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
- printk(KERN_WARNING
- "EXT3-fs: Quota file not on filesystem root. "
- "Journalled quota will not work.\n");
+ /* Journaling quota? */
+ if (EXT3_SB(sb)->s_qf_names[type]) {
+ /* Quotafile not of fs root? */
+ if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
+ printk(KERN_WARNING
+ "EXT3-fs: Quota file not on filesystem root. "
+ "Journaled quota will not work.\n");
+ }
+
+ /*
+ * When we journal data on quota file, we have to flush journal to see
+ * all updates to the file when we bypass pagecache...
+ */
+ if (ext3_should_journal_data(nd.path.dentry->d_inode)) {
+ /*
+ * We don't need to lock updates but journal_flush() could
+ * otherwise be livelocked...
+ */
+ journal_lock_updates(EXT3_SB(sb)->s_journal);
+ journal_flush(EXT3_SB(sb)->s_journal);
+ journal_unlock_updates(EXT3_SB(sb)->s_journal);
+ }
+
+ err = vfs_quota_on_path(sb, type, format_id, &nd.path);
path_put(&nd.path);
- return vfs_quota_on(sb, type, format_id, path, remount);
+ return err;
}
/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 821efaf2b94..37b81097bdf 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -15,7 +15,7 @@ static size_t
ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
+ const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index 0327497a55c..c7c41a410c4 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -13,13 +13,11 @@
#include <linux/ext3_fs.h>
#include "xattr.h"
-#define XATTR_TRUSTED_PREFIX "trusted."
-
static size_t
ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
+ const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 1abd8f92c44..430fe63b31b 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -12,13 +12,11 @@
#include <linux/ext3_fs.h>
#include "xattr.h"
-#define XATTR_USER_PREFIX "user."
-
static size_t
ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
+ const size_t prefix_len = XATTR_USER_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
if (!test_opt(inode->i_sb, XATTR_USER))
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 3c8dab880d9..694ed6fadcc 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -40,34 +40,35 @@ ext4_acl_from_disk(const void *value, size_t size)
acl = posix_acl_alloc(count, GFP_NOFS);
if (!acl)
return ERR_PTR(-ENOMEM);
- for (n=0; n < count; n++) {
+ for (n = 0; n < count; n++) {
ext4_acl_entry *entry =
(ext4_acl_entry *)value;
if ((char *)value + sizeof(ext4_acl_entry_short) > end)
goto fail;
acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
- switch(acl->a_entries[n].e_tag) {
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- value = (char *)value +
- sizeof(ext4_acl_entry_short);
- acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
- break;
-
- case ACL_USER:
- case ACL_GROUP:
- value = (char *)value + sizeof(ext4_acl_entry);
- if ((char *)value > end)
- goto fail;
- acl->a_entries[n].e_id =
- le32_to_cpu(entry->e_id);
- break;
-
- default:
+
+ switch (acl->a_entries[n].e_tag) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ value = (char *)value +
+ sizeof(ext4_acl_entry_short);
+ acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
+ break;
+
+ case ACL_USER:
+ case ACL_GROUP:
+ value = (char *)value + sizeof(ext4_acl_entry);
+ if ((char *)value > end)
goto fail;
+ acl->a_entries[n].e_id =
+ le32_to_cpu(entry->e_id);
+ break;
+
+ default:
+ goto fail;
}
}
if (value != end)
@@ -96,27 +97,26 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
return ERR_PTR(-ENOMEM);
ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
e = (char *)ext_acl + sizeof(ext4_acl_header);
- for (n=0; n < acl->a_count; n++) {
+ for (n = 0; n < acl->a_count; n++) {
ext4_acl_entry *entry = (ext4_acl_entry *)e;
entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
- switch(acl->a_entries[n].e_tag) {
- case ACL_USER:
- case ACL_GROUP:
- entry->e_id =
- cpu_to_le32(acl->a_entries[n].e_id);
- e += sizeof(ext4_acl_entry);
- break;
-
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- e += sizeof(ext4_acl_entry_short);
- break;
-
- default:
- goto fail;
+ switch (acl->a_entries[n].e_tag) {
+ case ACL_USER:
+ case ACL_GROUP:
+ entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
+ e += sizeof(ext4_acl_entry);
+ break;
+
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ e += sizeof(ext4_acl_entry_short);
+ break;
+
+ default:
+ goto fail;
}
}
return (char *)ext_acl;
@@ -167,23 +167,23 @@ ext4_get_acl(struct inode *inode, int type)
if (!test_opt(inode->i_sb, POSIX_ACL))
return NULL;
- switch(type) {
- case ACL_TYPE_ACCESS:
- acl = ext4_iget_acl(inode, &ei->i_acl);
- if (acl != EXT4_ACL_NOT_CACHED)
- return acl;
- name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
- break;
-
- case ACL_TYPE_DEFAULT:
- acl = ext4_iget_acl(inode, &ei->i_default_acl);
- if (acl != EXT4_ACL_NOT_CACHED)
- return acl;
- name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
- break;
-
- default:
- return ERR_PTR(-EINVAL);
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ acl = ext4_iget_acl(inode, &ei->i_acl);
+ if (acl != EXT4_ACL_NOT_CACHED)
+ return acl;
+ name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ acl = ext4_iget_acl(inode, &ei->i_default_acl);
+ if (acl != EXT4_ACL_NOT_CACHED)
+ return acl;
+ name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ break;
+
+ default:
+ return ERR_PTR(-EINVAL);
}
retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
if (retval > 0) {
@@ -201,14 +201,14 @@ ext4_get_acl(struct inode *inode, int type)
kfree(value);
if (!IS_ERR(acl)) {
- switch(type) {
- case ACL_TYPE_ACCESS:
- ext4_iset_acl(inode, &ei->i_acl, acl);
- break;
-
- case ACL_TYPE_DEFAULT:
- ext4_iset_acl(inode, &ei->i_default_acl, acl);
- break;
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ ext4_iset_acl(inode, &ei->i_acl, acl);
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ ext4_iset_acl(inode, &ei->i_default_acl, acl);
+ break;
}
}
return acl;
@@ -232,31 +232,31 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
- switch(type) {
- case ACL_TYPE_ACCESS:
- name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
- if (acl) {
- mode_t mode = inode->i_mode;
- error = posix_acl_equiv_mode(acl, &mode);
- if (error < 0)
- return error;
- else {
- inode->i_mode = mode;
- ext4_mark_inode_dirty(handle, inode);
- if (error == 0)
- acl = NULL;
- }
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
+ if (acl) {
+ mode_t mode = inode->i_mode;
+ error = posix_acl_equiv_mode(acl, &mode);
+ if (error < 0)
+ return error;
+ else {
+ inode->i_mode = mode;
+ ext4_mark_inode_dirty(handle, inode);
+ if (error == 0)
+ acl = NULL;
}
- break;
+ }
+ break;
- case ACL_TYPE_DEFAULT:
- name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
- if (!S_ISDIR(inode->i_mode))
- return acl ? -EACCES : 0;
- break;
+ case ACL_TYPE_DEFAULT:
+ name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ if (!S_ISDIR(inode->i_mode))
+ return acl ? -EACCES : 0;
+ break;
- default:
- return -EINVAL;
+ default:
+ return -EINVAL;
}
if (acl) {
value = ext4_acl_to_disk(acl, &size);
@@ -269,14 +269,14 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
kfree(value);
if (!error) {
- switch(type) {
- case ACL_TYPE_ACCESS:
- ext4_iset_acl(inode, &ei->i_acl, acl);
- break;
-
- case ACL_TYPE_DEFAULT:
- ext4_iset_acl(inode, &ei->i_default_acl, acl);
- break;
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ ext4_iset_acl(inode, &ei->i_acl, acl);
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ ext4_iset_acl(inode, &ei->i_default_acl, acl);
+ break;
}
}
return error;
@@ -299,7 +299,7 @@ ext4_check_acl(struct inode *inode, int mask)
}
int
-ext4_permission(struct inode *inode, int mask, struct nameidata *nd)
+ext4_permission(struct inode *inode, int mask)
{
return generic_permission(inode, mask, ext4_check_acl);
}
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 26a5c1abf14..cd2b855a07d 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -58,7 +58,7 @@ static inline int ext4_acl_count(size_t size)
#define EXT4_ACL_NOT_CACHED ((void *)-1)
/* acl.c */
-extern int ext4_permission (struct inode *, int, struct nameidata *);
+extern int ext4_permission (struct inode *, int);
extern int ext4_acl_chmod (struct inode *);
extern int ext4_init_acl (handle_t *, struct inode *, struct inode *);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 495ab21b983..1ae5004e93f 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -314,25 +314,28 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
if (unlikely(!bh)) {
ext4_error(sb, __func__,
"Cannot read block bitmap - "
- "block_group = %d, block_bitmap = %llu",
- (int)block_group, (unsigned long long)bitmap_blk);
+ "block_group = %lu, block_bitmap = %llu",
+ block_group, bitmap_blk);
return NULL;
}
if (bh_uptodate_or_lock(bh))
return bh;
+ spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
ext4_init_block_bitmap(sb, bh, block_group, desc);
set_buffer_uptodate(bh);
unlock_buffer(bh);
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
return bh;
}
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
if (bh_submit_read(bh) < 0) {
put_bh(bh);
ext4_error(sb, __func__,
"Cannot read block bitmap - "
- "block_group = %d, block_bitmap = %llu",
- (int)block_group, (unsigned long long)bitmap_blk);
+ "block_group = %lu, block_bitmap = %llu",
+ block_group, bitmap_blk);
return NULL;
}
ext4_valid_block_bitmap(sb, desc, block_group, bh);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 303e41cf7b1..6c7924d9e35 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1044,7 +1044,6 @@ extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
/* inode.c */
-void ext4_da_release_space(struct inode *inode, int used, int to_free);
int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t blocknr);
struct buffer_head *ext4_getblk(handle_t *, struct inode *,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 42c4c0c892e..612c3d2c382 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -99,7 +99,7 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed)
if (handle->h_buffer_credits > needed)
return 0;
err = ext4_journal_extend(handle, needed);
- if (err)
+ if (err <= 0)
return err;
return ext4_journal_restart(handle, needed);
}
@@ -1441,7 +1441,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode,
/*
* get the next allocated block if the extent in the path
- * is before the requested block(s)
+ * is before the requested block(s)
*/
if (b2 < b1) {
b2 = ext4_ext_next_allocated_block(path);
@@ -1910,9 +1910,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
BUG_ON(b != ex_ee_block + ex_ee_len - 1);
}
- /* at present, extent can't cross block group: */
- /* leaf + bitmap + group desc + sb + inode */
- credits = 5;
+ /*
+ * 3 for leaf, sb, and inode plus 2 (bmap and group
+ * descriptor) for each block group; assume two block
+ * groups plus ex_ee_len/blocks_per_block_group for
+ * the worst case
+ */
+ credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
if (ex == EXT_FIRST_EXTENT(eh)) {
correct_index = 1;
credits += (ext_depth(inode)) + 1;
@@ -2323,7 +2327,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
unsigned int newdepth;
/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
if (allocated <= EXT4_EXT_ZERO_LEN) {
- /* Mark first half uninitialized.
+ /*
+ * iblock == ee_block is handled by the zerouout
+ * at the beginning.
+ * Mark first half uninitialized.
* Mark second half initialized and zero out the
* initialized extent
*/
@@ -2346,7 +2353,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
- /* zeroed the full extent */
+ /* blocks available from iblock */
return allocated;
} else if (err)
@@ -2374,6 +2381,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
err = PTR_ERR(path);
return err;
}
+ /* get the second half extent details */
ex = path[depth].p_ext;
err = ext4_ext_get_access(handle, inode,
path + depth);
@@ -2403,6 +2411,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */
+ /* blocks available from iblock */
return allocated;
} else if (err)
@@ -2418,23 +2427,22 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
*/
orig_ex.ee_len = cpu_to_le16(ee_len -
ext4_ext_get_actual_len(ex3));
- if (newdepth != depth) {
- depth = newdepth;
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode, iblock, path);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- goto out;
- }
- eh = path[depth].p_hdr;
- ex = path[depth].p_ext;
- if (ex2 != &newex)
- ex2 = ex;
-
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto out;
+ depth = newdepth;
+ ext4_ext_drop_refs(path);
+ path = ext4_ext_find_extent(inode, iblock, path);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ goto out;
}
+ eh = path[depth].p_hdr;
+ ex = path[depth].p_ext;
+ if (ex2 != &newex)
+ ex2 = ex;
+
+ err = ext4_ext_get_access(handle, inode, path + depth);
+ if (err)
+ goto out;
+
allocated = max_blocks;
/* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
@@ -2452,6 +2460,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */
+ /* blocks available from iblock */
return allocated;
}
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index a92eb305344..655e760212b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -97,34 +97,44 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
* Return buffer_head of bitmap on success or NULL.
*/
static struct buffer_head *
-read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
+ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
{
struct ext4_group_desc *desc;
struct buffer_head *bh = NULL;
+ ext4_fsblk_t bitmap_blk;
desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
- goto error_out;
- if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
- bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc));
- if (!buffer_uptodate(bh)) {
- lock_buffer(bh);
- if (!buffer_uptodate(bh)) {
- ext4_init_inode_bitmap(sb, bh, block_group,
- desc);
- set_buffer_uptodate(bh);
- }
- unlock_buffer(bh);
- }
- } else {
- bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
+ return NULL;
+ bitmap_blk = ext4_inode_bitmap(sb, desc);
+ bh = sb_getblk(sb, bitmap_blk);
+ if (unlikely(!bh)) {
+ ext4_error(sb, __func__,
+ "Cannot read inode bitmap - "
+ "block_group = %lu, inode_bitmap = %llu",
+ block_group, bitmap_blk);
+ return NULL;
}
- if (!bh)
- ext4_error(sb, "read_inode_bitmap",
+ if (bh_uptodate_or_lock(bh))
+ return bh;
+
+ spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
+ if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+ ext4_init_inode_bitmap(sb, bh, block_group, desc);
+ set_buffer_uptodate(bh);
+ unlock_buffer(bh);
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+ return bh;
+ }
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+ if (bh_submit_read(bh) < 0) {
+ put_bh(bh);
+ ext4_error(sb, __func__,
"Cannot read inode bitmap - "
"block_group = %lu, inode_bitmap = %llu",
- block_group, ext4_inode_bitmap(sb, desc));
-error_out:
+ block_group, bitmap_blk);
+ return NULL;
+ }
return bh;
}
@@ -200,7 +210,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
}
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
- bitmap_bh = read_inode_bitmap(sb, block_group);
+ bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
if (!bitmap_bh)
goto error_return;
@@ -623,7 +633,7 @@ got_group:
goto fail;
brelse(bitmap_bh);
- bitmap_bh = read_inode_bitmap(sb, group);
+ bitmap_bh = ext4_read_inode_bitmap(sb, group);
if (!bitmap_bh)
goto fail;
@@ -728,7 +738,7 @@ got:
/* When marking the block group with
* ~EXT4_BG_INODE_UNINIT we don't want to depend
- * on the value of bg_itable_unsed even though
+ * on the value of bg_itable_unused even though
* mke2fs could have initialized the same for us.
* Instead we calculated the value below
*/
@@ -891,7 +901,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
- bitmap_bh = read_inode_bitmap(sb, block_group);
+ bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
if (!bitmap_bh) {
ext4_warning(sb, __func__,
"inode bitmap error for orphan %lu", ino);
@@ -969,7 +979,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
continue;
desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
brelse(bitmap_bh);
- bitmap_bh = read_inode_bitmap(sb, i);
+ bitmap_bh = ext4_read_inode_bitmap(sb, i);
if (!bitmap_bh)
continue;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8ca2763df09..59fbbe899ac 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -191,6 +191,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
void ext4_delete_inode (struct inode * inode)
{
handle_t *handle;
+ int err;
if (ext4_should_order_data(inode))
ext4_begin_ordered_truncate(inode, 0);
@@ -199,8 +200,9 @@ void ext4_delete_inode (struct inode * inode)
if (is_bad_inode(inode))
goto no_delete;
- handle = start_transaction(inode);
+ handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3);
if (IS_ERR(handle)) {
+ ext4_std_error(inode->i_sb, PTR_ERR(handle));
/*
* If we're going to skip the normal cleanup, we still need to
* make sure that the in-core orphan linked list is properly
@@ -213,8 +215,34 @@ void ext4_delete_inode (struct inode * inode)
if (IS_SYNC(inode))
handle->h_sync = 1;
inode->i_size = 0;
+ err = ext4_mark_inode_dirty(handle, inode);
+ if (err) {
+ ext4_warning(inode->i_sb, __func__,
+ "couldn't mark inode dirty (err %d)", err);
+ goto stop_handle;
+ }
if (inode->i_blocks)
ext4_truncate(inode);
+
+ /*
+ * ext4_ext_truncate() doesn't reserve any slop when it
+ * restarts journal transactions; therefore there may not be
+ * enough credits left in the handle to remove the inode from
+ * the orphan list and set the dtime field.
+ */
+ if (handle->h_buffer_credits < 3) {
+ err = ext4_journal_extend(handle, 3);
+ if (err > 0)
+ err = ext4_journal_restart(handle, 3);
+ if (err != 0) {
+ ext4_warning(inode->i_sb, __func__,
+ "couldn't extend journal (err %d)", err);
+ stop_handle:
+ ext4_journal_stop(handle);
+ goto no_delete;
+ }
+ }
+
/*
* Kill off the orphan record which ext4_truncate created.
* AKPM: I think this can be inside the above `if'.
@@ -952,6 +980,67 @@ out:
return err;
}
+/*
+ * Calculate the number of metadata blocks need to reserve
+ * to allocate @blocks for non extent file based file
+ */
+static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
+{
+ int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+ int ind_blks, dind_blks, tind_blks;
+
+ /* number of new indirect blocks needed */
+ ind_blks = (blocks + icap - 1) / icap;
+
+ dind_blks = (ind_blks + icap - 1) / icap;
+
+ tind_blks = 1;
+
+ return ind_blks + dind_blks + tind_blks;
+}
+
+/*
+ * Calculate the number of metadata blocks need to reserve
+ * to allocate given number of blocks
+ */
+static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
+{
+ if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+ return ext4_ext_calc_metadata_amount(inode, blocks);
+
+ return ext4_indirect_calc_metadata_amount(inode, blocks);
+}
+
+static void ext4_da_update_reserve_space(struct inode *inode, int used)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int total, mdb, mdb_free;
+
+ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+ /* recalculate the number of metablocks still need to be reserved */
+ total = EXT4_I(inode)->i_reserved_data_blocks - used;
+ mdb = ext4_calc_metadata_amount(inode, total);
+
+ /* figure out how many metablocks to release */
+ BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+ mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
+
+ /* Account for allocated meta_blocks */
+ mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+
+ /* update fs free blocks counter for truncate case */
+ percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
+
+ /* update per-inode reservations */
+ BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
+ EXT4_I(inode)->i_reserved_data_blocks -= used;
+
+ BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+ EXT4_I(inode)->i_reserved_meta_blocks = mdb;
+ EXT4_I(inode)->i_allocated_meta_blocks = 0;
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+}
+
/* Maximum number of blocks we map for direct IO at once. */
#define DIO_MAX_BLOCKS 4096
/*
@@ -965,10 +1054,9 @@ out:
/*
+ * The ext4_get_blocks_wrap() function try to look up the requested blocks,
+ * and returns if the blocks are already mapped.
*
- *
- * ext4_ext4 get_block() wrapper function
- * It will do a look up first, and returns if the blocks already mapped.
* Otherwise it takes the write lock of the i_data_sem and allocate blocks
* and store the allocated blocks in the result buffer head and mark it
* mapped.
@@ -1069,7 +1157,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
* which were deferred till now
*/
if ((retval > 0) && buffer_delay(bh))
- ext4_da_release_space(inode, retval, 0);
+ ext4_da_update_reserve_space(inode, retval);
}
up_write((&EXT4_I(inode)->i_data_sem));
@@ -1336,12 +1424,8 @@ static int ext4_ordered_write_end(struct file *file,
{
handle_t *handle = ext4_journal_current_handle();
struct inode *inode = mapping->host;
- unsigned from, to;
int ret = 0, ret2;
- from = pos & (PAGE_CACHE_SIZE - 1);
- to = from + len;
-
ret = ext4_jbd2_file_inode(handle, inode);
if (ret == 0) {
@@ -1437,36 +1521,6 @@ static int ext4_journalled_write_end(struct file *file,
return ret ? ret : copied;
}
-/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate @blocks for non extent file based file
- */
-static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
-{
- int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- int ind_blks, dind_blks, tind_blks;
-
- /* number of new indirect blocks needed */
- ind_blks = (blocks + icap - 1) / icap;
-
- dind_blks = (ind_blks + icap - 1) / icap;
-
- tind_blks = 1;
-
- return ind_blks + dind_blks + tind_blks;
-}
-
-/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate given number of blocks
- */
-static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
-{
- if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
- return ext4_ext_calc_metadata_amount(inode, blocks);
-
- return ext4_indirect_calc_metadata_amount(inode, blocks);
-}
static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
{
@@ -1490,7 +1544,6 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
return -ENOSPC;
}
-
/* reduce fs free blocks counter */
percpu_counter_sub(&sbi->s_freeblocks_counter, total);
@@ -1501,35 +1554,31 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
return 0; /* success */
}
-void ext4_da_release_space(struct inode *inode, int used, int to_free)
+static void ext4_da_release_space(struct inode *inode, int to_free)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int total, mdb, mdb_free, release;
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
/* recalculate the number of metablocks still need to be reserved */
- total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free;
+ total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
mdb = ext4_calc_metadata_amount(inode, total);
/* figure out how many metablocks to release */
BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
- /* Account for allocated meta_blocks */
- mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
-
release = to_free + mdb_free;
/* update fs free blocks counter for truncate case */
percpu_counter_add(&sbi->s_freeblocks_counter, release);
/* update per-inode reservations */
- BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks);
- EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free);
+ BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
+ EXT4_I(inode)->i_reserved_data_blocks -= to_free;
BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
EXT4_I(inode)->i_reserved_meta_blocks = mdb;
- EXT4_I(inode)->i_allocated_meta_blocks = 0;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
}
@@ -1551,7 +1600,7 @@ static void ext4_da_page_release_reservation(struct page *page,
}
curr_off = next_off;
} while ((bh = bh->b_this_page) != head);
- ext4_da_release_space(page->mapping->host, 0, to_release);
+ ext4_da_release_space(page->mapping->host, to_release);
}
/*
@@ -2280,8 +2329,11 @@ retry:
}
page = __grab_cache_page(mapping, index);
- if (!page)
- return -ENOMEM;
+ if (!page) {
+ ext4_journal_stop(handle);
+ ret = -ENOMEM;
+ goto out;
+ }
*pagep = page;
ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
@@ -2806,59 +2858,63 @@ static int ext4_journalled_set_page_dirty(struct page *page)
}
static const struct address_space_operations ext4_ordered_aops = {
- .readpage = ext4_readpage,
- .readpages = ext4_readpages,
- .writepage = ext4_normal_writepage,
- .sync_page = block_sync_page,
- .write_begin = ext4_write_begin,
- .write_end = ext4_ordered_write_end,
- .bmap = ext4_bmap,
- .invalidatepage = ext4_invalidatepage,
- .releasepage = ext4_releasepage,
- .direct_IO = ext4_direct_IO,
- .migratepage = buffer_migrate_page,
+ .readpage = ext4_readpage,
+ .readpages = ext4_readpages,
+ .writepage = ext4_normal_writepage,
+ .sync_page = block_sync_page,
+ .write_begin = ext4_write_begin,
+ .write_end = ext4_ordered_write_end,
+ .bmap = ext4_bmap,
+ .invalidatepage = ext4_invalidatepage,
+ .releasepage = ext4_releasepage,
+ .direct_IO = ext4_direct_IO,
+ .migratepage = buffer_migrate_page,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
static const struct address_space_operations ext4_writeback_aops = {
- .readpage = ext4_readpage,
- .readpages = ext4_readpages,
- .writepage = ext4_normal_writepage,
- .sync_page = block_sync_page,
- .write_begin = ext4_write_begin,
- .write_end = ext4_writeback_write_end,
- .bmap = ext4_bmap,
- .invalidatepage = ext4_invalidatepage,
- .releasepage = ext4_releasepage,
- .direct_IO = ext4_direct_IO,
- .migratepage = buffer_migrate_page,
+ .readpage = ext4_readpage,
+ .readpages = ext4_readpages,
+ .writepage = ext4_normal_writepage,
+ .sync_page = block_sync_page,
+ .write_begin = ext4_write_begin,
+ .write_end = ext4_writeback_write_end,
+ .bmap = ext4_bmap,
+ .invalidatepage = ext4_invalidatepage,
+ .releasepage = ext4_releasepage,
+ .direct_IO = ext4_direct_IO,
+ .migratepage = buffer_migrate_page,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
static const struct address_space_operations ext4_journalled_aops = {
- .readpage = ext4_readpage,
- .readpages = ext4_readpages,
- .writepage = ext4_journalled_writepage,
- .sync_page = block_sync_page,
- .write_begin = ext4_write_begin,
- .write_end = ext4_journalled_write_end,
- .set_page_dirty = ext4_journalled_set_page_dirty,
- .bmap = ext4_bmap,
- .invalidatepage = ext4_invalidatepage,
- .releasepage = ext4_releasepage,
+ .readpage = ext4_readpage,
+ .readpages = ext4_readpages,
+ .writepage = ext4_journalled_writepage,
+ .sync_page = block_sync_page,
+ .write_begin = ext4_write_begin,
+ .write_end = ext4_journalled_write_end,
+ .set_page_dirty = ext4_journalled_set_page_dirty,
+ .bmap = ext4_bmap,
+ .invalidatepage = ext4_invalidatepage,
+ .releasepage = ext4_releasepage,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
static const struct address_space_operations ext4_da_aops = {
- .readpage = ext4_readpage,
- .readpages = ext4_readpages,
- .writepage = ext4_da_writepage,
- .writepages = ext4_da_writepages,
- .sync_page = block_sync_page,
- .write_begin = ext4_da_write_begin,
- .write_end = ext4_da_write_end,
- .bmap = ext4_bmap,
- .invalidatepage = ext4_da_invalidatepage,
- .releasepage = ext4_releasepage,
- .direct_IO = ext4_direct_IO,
- .migratepage = buffer_migrate_page,
+ .readpage = ext4_readpage,
+ .readpages = ext4_readpages,
+ .writepage = ext4_da_writepage,
+ .writepages = ext4_da_writepages,
+ .sync_page = block_sync_page,
+ .write_begin = ext4_da_write_begin,
+ .write_end = ext4_da_write_end,
+ .bmap = ext4_bmap,
+ .invalidatepage = ext4_da_invalidatepage,
+ .releasepage = ext4_releasepage,
+ .direct_IO = ext4_direct_IO,
+ .migratepage = buffer_migrate_page,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
void ext4_set_aops(struct inode *inode)
@@ -3586,6 +3642,16 @@ static int __ext4_get_inode_loc(struct inode *inode,
}
if (!buffer_uptodate(bh)) {
lock_buffer(bh);
+
+ /*
+ * If the buffer has the write error flag, we have failed
+ * to write out another inode in the same block. In this
+ * case, we don't have to read the block because we may
+ * read the old inode data successfully.
+ */
+ if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+
if (buffer_uptodate(bh)) {
/* someone brought it uptodate while we waited */
unlock_buffer(bh);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8d141a25bbe..865e9ddb44d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -787,13 +787,16 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
if (bh_uptodate_or_lock(bh[i]))
continue;
+ spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
ext4_init_block_bitmap(sb, bh[i],
first_group + i, desc);
set_buffer_uptodate(bh[i]);
unlock_buffer(bh[i]);
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
continue;
}
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
get_bh(bh[i]);
bh[i]->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh[i]);
@@ -2477,7 +2480,7 @@ err_freesgi:
int ext4_mb_init(struct super_block *sb, int needs_recovery)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- unsigned i;
+ unsigned i, j;
unsigned offset;
unsigned max;
int ret;
@@ -2537,7 +2540,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
- i = sizeof(struct ext4_locality_group) * NR_CPUS;
+ i = sizeof(struct ext4_locality_group) * nr_cpu_ids;
sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
if (sbi->s_locality_groups == NULL) {
clear_opt(sbi->s_mount_opt, MBALLOC);
@@ -2545,11 +2548,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
kfree(sbi->s_mb_maxs);
return -ENOMEM;
}
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
struct ext4_locality_group *lg;
lg = &sbi->s_locality_groups[i];
mutex_init(&lg->lg_mutex);
- INIT_LIST_HEAD(&lg->lg_prealloc_list);
+ for (j = 0; j < PREALLOC_TB_SIZE; j++)
+ INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
spin_lock_init(&lg->lg_prealloc_lock);
}
@@ -3260,6 +3264,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa)
{
unsigned int len = ac->ac_o_ex.fe_len;
+
ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
&ac->ac_b_ex.fe_group,
&ac->ac_b_ex.fe_start);
@@ -3282,6 +3287,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
static noinline_for_stack int
ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
{
+ int order, i;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_locality_group *lg;
struct ext4_prealloc_space *pa;
@@ -3322,22 +3328,29 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
lg = ac->ac_lg;
if (lg == NULL)
return 0;
-
- rcu_read_lock();
- list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
- spin_lock(&pa->pa_lock);
- if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
- atomic_inc(&pa->pa_count);
- ext4_mb_use_group_pa(ac, pa);
+ order = fls(ac->ac_o_ex.fe_len) - 1;
+ if (order > PREALLOC_TB_SIZE - 1)
+ /* The max size of hash table is PREALLOC_TB_SIZE */
+ order = PREALLOC_TB_SIZE - 1;
+
+ for (i = order; i < PREALLOC_TB_SIZE; i++) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
+ pa_inode_list) {
+ spin_lock(&pa->pa_lock);
+ if (pa->pa_deleted == 0 &&
+ pa->pa_free >= ac->ac_o_ex.fe_len) {
+ atomic_inc(&pa->pa_count);
+ ext4_mb_use_group_pa(ac, pa);
+ spin_unlock(&pa->pa_lock);
+ ac->ac_criteria = 20;
+ rcu_read_unlock();
+ return 1;
+ }
spin_unlock(&pa->pa_lock);
- ac->ac_criteria = 20;
- rcu_read_unlock();
- return 1;
}
- spin_unlock(&pa->pa_lock);
+ rcu_read_unlock();
}
- rcu_read_unlock();
-
return 0;
}
@@ -3560,6 +3573,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
pa->pa_free = pa->pa_len;
atomic_set(&pa->pa_count, 1);
spin_lock_init(&pa->pa_lock);
+ INIT_LIST_HEAD(&pa->pa_inode_list);
pa->pa_deleted = 0;
pa->pa_linear = 1;
@@ -3580,10 +3594,10 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
- spin_lock(pa->pa_obj_lock);
- list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list);
- spin_unlock(pa->pa_obj_lock);
-
+ /*
+ * We will later add the new pa to the right bucket
+ * after updating the pa_free in ext4_mb_release_context
+ */
return 0;
}
@@ -3733,20 +3747,23 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
bitmap_bh = ext4_read_block_bitmap(sb, group);
if (bitmap_bh == NULL) {
- /* error handling here */
- ext4_mb_release_desc(&e4b);
- BUG_ON(bitmap_bh == NULL);
+ ext4_error(sb, __func__, "Error in reading block "
+ "bitmap for %lu\n", group);
+ return 0;
}
err = ext4_mb_load_buddy(sb, group, &e4b);
- BUG_ON(err != 0); /* error handling here */
+ if (err) {
+ ext4_error(sb, __func__, "Error in loading buddy "
+ "information for %lu\n", group);
+ put_bh(bitmap_bh);
+ return 0;
+ }
if (needed == 0)
needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
- grp = ext4_get_group_info(sb, group);
INIT_LIST_HEAD(&list);
-
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
repeat:
ext4_lock_group(sb, group);
@@ -3903,13 +3920,18 @@ repeat:
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
err = ext4_mb_load_buddy(sb, group, &e4b);
- BUG_ON(err != 0); /* error handling here */
+ if (err) {
+ ext4_error(sb, __func__, "Error in loading buddy "
+ "information for %lu\n", group);
+ continue;
+ }
bitmap_bh = ext4_read_block_bitmap(sb, group);
if (bitmap_bh == NULL) {
- /* error handling here */
+ ext4_error(sb, __func__, "Error in reading block "
+ "bitmap for %lu\n", group);
ext4_mb_release_desc(&e4b);
- BUG_ON(bitmap_bh == NULL);
+ continue;
}
ext4_lock_group(sb, group);
@@ -4112,22 +4134,168 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
}
+static noinline_for_stack void
+ext4_mb_discard_lg_preallocations(struct super_block *sb,
+ struct ext4_locality_group *lg,
+ int order, int total_entries)
+{
+ ext4_group_t group = 0;
+ struct ext4_buddy e4b;
+ struct list_head discard_list;
+ struct ext4_prealloc_space *pa, *tmp;
+ struct ext4_allocation_context *ac;
+
+ mb_debug("discard locality group preallocation\n");
+
+ INIT_LIST_HEAD(&discard_list);
+ ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+
+ spin_lock(&lg->lg_prealloc_lock);
+ list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
+ pa_inode_list) {
+ spin_lock(&pa->pa_lock);
+ if (atomic_read(&pa->pa_count)) {
+ /*
+ * This is the pa that we just used
+ * for block allocation. So don't
+ * free that
+ */
+ spin_unlock(&pa->pa_lock);
+ continue;
+ }
+ if (pa->pa_deleted) {
+ spin_unlock(&pa->pa_lock);
+ continue;
+ }
+ /* only lg prealloc space */
+ BUG_ON(!pa->pa_linear);
+
+ /* seems this one can be freed ... */
+ pa->pa_deleted = 1;
+ spin_unlock(&pa->pa_lock);
+
+ list_del_rcu(&pa->pa_inode_list);
+ list_add(&pa->u.pa_tmp_list, &discard_list);
+
+ total_entries--;
+ if (total_entries <= 5) {
+ /*
+ * we want to keep only 5 entries
+ * allowing it to grow to 8. This
+ * mak sure we don't call discard
+ * soon for this list.
+ */
+ break;
+ }
+ }
+ spin_unlock(&lg->lg_prealloc_lock);
+
+ list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
+
+ ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
+ if (ext4_mb_load_buddy(sb, group, &e4b)) {
+ ext4_error(sb, __func__, "Error in loading buddy "
+ "information for %lu\n", group);
+ continue;
+ }
+ ext4_lock_group(sb, group);
+ list_del(&pa->pa_group_list);
+ ext4_mb_release_group_pa(&e4b, pa, ac);
+ ext4_unlock_group(sb, group);
+
+ ext4_mb_release_desc(&e4b);
+ list_del(&pa->u.pa_tmp_list);
+ call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
+ }
+ if (ac)
+ kmem_cache_free(ext4_ac_cachep, ac);
+}
+
+/*
+ * We have incremented pa_count. So it cannot be freed at this
+ * point. Also we hold lg_mutex. So no parallel allocation is
+ * possible from this lg. That means pa_free cannot be updated.
+ *
+ * A parallel ext4_mb_discard_group_preallocations is possible.
+ * which can cause the lg_prealloc_list to be updated.
+ */
+
+static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
+{
+ int order, added = 0, lg_prealloc_count = 1;
+ struct super_block *sb = ac->ac_sb;
+ struct ext4_locality_group *lg = ac->ac_lg;
+ struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
+
+ order = fls(pa->pa_free) - 1;
+ if (order > PREALLOC_TB_SIZE - 1)
+ /* The max size of hash table is PREALLOC_TB_SIZE */
+ order = PREALLOC_TB_SIZE - 1;
+ /* Add the prealloc space to lg */
+ rcu_read_lock();
+ list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
+ pa_inode_list) {
+ spin_lock(&tmp_pa->pa_lock);
+ if (tmp_pa->pa_deleted) {
+ spin_unlock(&pa->pa_lock);
+ continue;
+ }
+ if (!added && pa->pa_free < tmp_pa->pa_free) {
+ /* Add to the tail of the previous entry */
+ list_add_tail_rcu(&pa->pa_inode_list,
+ &tmp_pa->pa_inode_list);
+ added = 1;
+ /*
+ * we want to count the total
+ * number of entries in the list
+ */
+ }
+ spin_unlock(&tmp_pa->pa_lock);
+ lg_prealloc_count++;
+ }
+ if (!added)
+ list_add_tail_rcu(&pa->pa_inode_list,
+ &lg->lg_prealloc_list[order]);
+ rcu_read_unlock();
+
+ /* Now trim the list to be not more than 8 elements */
+ if (lg_prealloc_count > 8) {
+ ext4_mb_discard_lg_preallocations(sb, lg,
+ order, lg_prealloc_count);
+ return;
+ }
+ return ;
+}
+
/*
* release all resource we used in allocation
*/
static int ext4_mb_release_context(struct ext4_allocation_context *ac)
{
- if (ac->ac_pa) {
- if (ac->ac_pa->pa_linear) {
+ struct ext4_prealloc_space *pa = ac->ac_pa;
+ if (pa) {
+ if (pa->pa_linear) {
/* see comment in ext4_mb_use_group_pa() */
- spin_lock(&ac->ac_pa->pa_lock);
- ac->ac_pa->pa_pstart += ac->ac_b_ex.fe_len;
- ac->ac_pa->pa_lstart += ac->ac_b_ex.fe_len;
- ac->ac_pa->pa_free -= ac->ac_b_ex.fe_len;
- ac->ac_pa->pa_len -= ac->ac_b_ex.fe_len;
- spin_unlock(&ac->ac_pa->pa_lock);
+ spin_lock(&pa->pa_lock);
+ pa->pa_pstart += ac->ac_b_ex.fe_len;
+ pa->pa_lstart += ac->ac_b_ex.fe_len;
+ pa->pa_free -= ac->ac_b_ex.fe_len;
+ pa->pa_len -= ac->ac_b_ex.fe_len;
+ spin_unlock(&pa->pa_lock);
+ /*
+ * We want to add the pa to the right bucket.
+ * Remove it from the list and while adding
+ * make sure the list to which we are adding
+ * doesn't grow big.
+ */
+ if (likely(pa->pa_free)) {
+ spin_lock(pa->pa_obj_lock);
+ list_del_rcu(&pa->pa_inode_list);
+ spin_unlock(pa->pa_obj_lock);
+ ext4_mb_add_n_trim(ac);
+ }
}
- ext4_mb_put_pa(ac, ac->ac_sb, ac->ac_pa);
+ ext4_mb_put_pa(ac, ac->ac_sb, pa);
}
if (ac->ac_bitmap_page)
page_cache_release(ac->ac_bitmap_page);
@@ -4420,11 +4588,15 @@ do_more:
count -= overflow;
}
bitmap_bh = ext4_read_block_bitmap(sb, block_group);
- if (!bitmap_bh)
+ if (!bitmap_bh) {
+ err = -EIO;
goto error_return;
+ }
gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
- if (!gdp)
+ if (!gdp) {
+ err = -EIO;
goto error_return;
+ }
if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index bfe6add46bc..c7c9906c2a7 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -164,11 +164,17 @@ struct ext4_free_extent {
* Locality group:
* we try to group all related changes together
* so that writeback can flush/allocate them together as well
+ * Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
+ * (512). We store prealloc space into the hash based on the pa_free blocks
+ * order value.ie, fls(pa_free)-1;
*/
+#define PREALLOC_TB_SIZE 10
struct ext4_locality_group {
/* for allocator */
- struct mutex lg_mutex; /* to serialize allocates */
- struct list_head lg_prealloc_list;/* list of preallocations */
+ /* to serialize allocates */
+ struct mutex lg_mutex;
+ /* list of preallocations */
+ struct list_head lg_prealloc_list[PREALLOC_TB_SIZE];
spinlock_t lg_prealloc_lock;
};
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index f000fbe2cd9..0a926516426 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -73,7 +73,7 @@ static int verify_group_input(struct super_block *sb,
"Inode bitmap not in group (block %llu)",
(unsigned long long)input->inode_bitmap);
else if (outside(input->inode_table, start, end) ||
- outside(itend - 1, start, end))
+ outside(itend - 1, start, end))
ext4_warning(sb, __func__,
"Inode table not in group (blocks %llu-%llu)",
(unsigned long long)input->inode_table, itend - 1);
@@ -104,7 +104,7 @@ static int verify_group_input(struct super_block *sb,
(unsigned long long)input->inode_bitmap,
start, metaend - 1);
else if (inside(input->inode_table, start, metaend) ||
- inside(itend - 1, start, metaend))
+ inside(itend - 1, start, metaend))
ext4_warning(sb, __func__,
"Inode table (%llu-%llu) overlaps"
"GDT table (%llu-%llu)",
@@ -158,9 +158,9 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
if (err) {
if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
return err;
- if ((err = ext4_journal_get_write_access(handle, bh)))
+ if ((err = ext4_journal_get_write_access(handle, bh)))
return err;
- }
+ }
return 0;
}
@@ -416,11 +416,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
"EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
gdb_num);
- /*
- * If we are not using the primary superblock/GDT copy don't resize,
- * because the user tools have no way of handling this. Probably a
- * bad time to do it anyways.
- */
+ /*
+ * If we are not using the primary superblock/GDT copy don't resize,
+ * because the user tools have no way of handling this. Probably a
+ * bad time to do it anyways.
+ */
if (EXT4_SB(sb)->s_sbh->b_blocknr !=
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
ext4_warning(sb, __func__,
@@ -507,14 +507,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
return 0;
exit_inode:
- //ext4_journal_release_buffer(handle, iloc.bh);
+ /* ext4_journal_release_buffer(handle, iloc.bh); */
brelse(iloc.bh);
exit_dindj:
- //ext4_journal_release_buffer(handle, dind);
+ /* ext4_journal_release_buffer(handle, dind); */
exit_primary:
- //ext4_journal_release_buffer(handle, *primary);
+ /* ext4_journal_release_buffer(handle, *primary); */
exit_sbh:
- //ext4_journal_release_buffer(handle, *primary);
+ /* ext4_journal_release_buffer(handle, *primary); */
exit_dind:
brelse(dind);
exit_bh:
@@ -818,12 +818,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
goto exit_journal;
- /*
- * We will only either add reserved group blocks to a backup group
- * or remove reserved blocks for the first group in a new group block.
- * Doing both would be mean more complex code, and sane people don't
- * use non-sparse filesystems anymore. This is already checked above.
- */
+ /*
+ * We will only either add reserved group blocks to a backup group
+ * or remove reserved blocks for the first group in a new group block.
+ * Doing both would be mean more complex code, and sane people don't
+ * use non-sparse filesystems anymore. This is already checked above.
+ */
if (gdb_off) {
primary = sbi->s_group_desc[gdb_num];
if ((err = ext4_journal_get_write_access(handle, primary)))
@@ -835,24 +835,24 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
} else if ((err = add_new_gdb(handle, inode, input, &primary)))
goto exit_journal;
- /*
- * OK, now we've set up the new group. Time to make it active.
- *
- * Current kernels don't lock all allocations via lock_super(),
- * so we have to be safe wrt. concurrent accesses the group
- * data. So we need to be careful to set all of the relevant
- * group descriptor data etc. *before* we enable the group.
- *
- * The key field here is sbi->s_groups_count: as long as
- * that retains its old value, nobody is going to access the new
- * group.
- *
- * So first we update all the descriptor metadata for the new
- * group; then we update the total disk blocks count; then we
- * update the groups count to enable the group; then finally we
- * update the free space counts so that the system can start
- * using the new disk blocks.
- */
+ /*
+ * OK, now we've set up the new group. Time to make it active.
+ *
+ * Current kernels don't lock all allocations via lock_super(),
+ * so we have to be safe wrt. concurrent accesses the group
+ * data. So we need to be careful to set all of the relevant
+ * group descriptor data etc. *before* we enable the group.
+ *
+ * The key field here is sbi->s_groups_count: as long as
+ * that retains its old value, nobody is going to access the new
+ * group.
+ *
+ * So first we update all the descriptor metadata for the new
+ * group; then we update the total disk blocks count; then we
+ * update the groups count to enable the group; then finally we
+ * update the free space counts so that the system can start
+ * using the new disk blocks.
+ */
/* Update group descriptor block for new group */
gdp = (struct ext4_group_desc *)((char *)primary->b_data +
@@ -946,7 +946,8 @@ exit_put:
return err;
} /* ext4_group_add */
-/* Extend the filesystem to the new number of blocks specified. This entry
+/*
+ * Extend the filesystem to the new number of blocks specified. This entry
* point is only used to extend the current filesystem to the end of the last
* existing group. It can be accessed via ioctl, or by "remount,resize=<size>"
* for emergencies (because it has no dependencies on reserved blocks).
@@ -1024,7 +1025,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
o_blocks_count + add, add);
/* See if the device is actually as big as what was requested */
- bh = sb_bread(sb, o_blocks_count + add -1);
+ bh = sb_bread(sb, o_blocks_count + add - 1);
if (!bh) {
ext4_warning(sb, __func__,
"can't read last block, resize aborted");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1cb371dcd60..d5d77958b86 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -49,20 +49,19 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
unsigned int);
-static void ext4_commit_super (struct super_block * sb,
- struct ext4_super_block * es,
- int sync);
-static void ext4_mark_recovery_complete(struct super_block * sb,
- struct ext4_super_block * es);
-static void ext4_clear_journal_err(struct super_block * sb,
- struct ext4_super_block * es);
+static void ext4_commit_super(struct super_block *sb,
+ struct ext4_super_block *es, int sync);
+static void ext4_mark_recovery_complete(struct super_block *sb,
+ struct ext4_super_block *es);
+static void ext4_clear_journal_err(struct super_block *sb,
+ struct ext4_super_block *es);
static int ext4_sync_fs(struct super_block *sb, int wait);
-static const char *ext4_decode_error(struct super_block * sb, int errno,
+static const char *ext4_decode_error(struct super_block *sb, int errno,
char nbuf[16]);
-static int ext4_remount (struct super_block * sb, int * flags, char * data);
-static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf);
+static int ext4_remount(struct super_block *sb, int *flags, char *data);
+static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
static void ext4_unlockfs(struct super_block *sb);
-static void ext4_write_super (struct super_block * sb);
+static void ext4_write_super(struct super_block *sb);
static void ext4_write_super_lockfs(struct super_block *sb);
@@ -211,15 +210,15 @@ static void ext4_handle_error(struct super_block *sb)
if (sb->s_flags & MS_RDONLY)
return;
- if (!test_opt (sb, ERRORS_CONT)) {
+ if (!test_opt(sb, ERRORS_CONT)) {
journal_t *journal = EXT4_SB(sb)->s_journal;
EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
if (journal)
jbd2_journal_abort(journal, -EIO);
}
- if (test_opt (sb, ERRORS_RO)) {
- printk (KERN_CRIT "Remounting filesystem read-only\n");
+ if (test_opt(sb, ERRORS_RO)) {
+ printk(KERN_CRIT "Remounting filesystem read-only\n");
sb->s_flags |= MS_RDONLY;
}
ext4_commit_super(sb, es, 1);
@@ -228,13 +227,13 @@ static void ext4_handle_error(struct super_block *sb)
sb->s_id);
}
-void ext4_error (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext4_error(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function);
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
vprintk(fmt, args);
printk("\n");
va_end(args);
@@ -242,7 +241,7 @@ void ext4_error (struct super_block * sb, const char * function,
ext4_handle_error(sb);
}
-static const char *ext4_decode_error(struct super_block * sb, int errno,
+static const char *ext4_decode_error(struct super_block *sb, int errno,
char nbuf[16])
{
char *errstr = NULL;
@@ -278,8 +277,7 @@ static const char *ext4_decode_error(struct super_block * sb, int errno,
/* __ext4_std_error decodes expected errors from journaling functions
* automatically and invokes the appropriate error response. */
-void __ext4_std_error (struct super_block * sb, const char * function,
- int errno)
+void __ext4_std_error(struct super_block *sb, const char *function, int errno)
{
char nbuf[16];
const char *errstr;
@@ -292,8 +290,8 @@ void __ext4_std_error (struct super_block * sb, const char * function,
return;
errstr = ext4_decode_error(sb, errno, nbuf);
- printk (KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
- sb->s_id, function, errstr);
+ printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
+ sb->s_id, function, errstr);
ext4_handle_error(sb);
}
@@ -308,15 +306,15 @@ void __ext4_std_error (struct super_block * sb, const char * function,
* case we take the easy way out and panic immediately.
*/
-void ext4_abort (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext4_abort(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
va_list args;
- printk (KERN_CRIT "ext4_abort called.\n");
+ printk(KERN_CRIT "ext4_abort called.\n");
va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function);
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
vprintk(fmt, args);
printk("\n");
va_end(args);
@@ -334,8 +332,8 @@ void ext4_abort (struct super_block * sb, const char * function,
jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
}
-void ext4_warning (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext4_warning(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
va_list args;
@@ -496,7 +494,7 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
}
}
-static void ext4_put_super (struct super_block * sb)
+static void ext4_put_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
@@ -595,7 +593,7 @@ static void ext4_destroy_inode(struct inode *inode)
kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
}
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
@@ -647,7 +645,8 @@ static void ext4_clear_inode(struct inode *inode)
&EXT4_I(inode)->jinode);
}
-static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb)
+static inline void ext4_show_quota_options(struct seq_file *seq,
+ struct super_block *sb)
{
#if defined(CONFIG_QUOTA)
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -822,8 +821,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
}
#ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
-#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group")
+#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
static int ext4_dquot_initialize(struct inode *inode, int type);
static int ext4_dquot_drop(struct inode *inode);
@@ -991,12 +990,12 @@ static ext4_fsblk_t get_sb_block(void **data)
return sb_block;
}
-static int parse_options (char *options, struct super_block *sb,
- unsigned int *inum, unsigned long *journal_devnum,
- ext4_fsblk_t *n_blocks_count, int is_remount)
+static int parse_options(char *options, struct super_block *sb,
+ unsigned int *inum, unsigned long *journal_devnum,
+ ext4_fsblk_t *n_blocks_count, int is_remount)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- char * p;
+ char *p;
substring_t args[MAX_OPT_ARGS];
int data_opt = 0;
int option;
@@ -1009,7 +1008,7 @@ static int parse_options (char *options, struct super_block *sb,
if (!options)
return 1;
- while ((p = strsep (&options, ",")) != NULL) {
+ while ((p = strsep(&options, ",")) != NULL) {
int token;
if (!*p)
continue;
@@ -1017,16 +1016,16 @@ static int parse_options (char *options, struct super_block *sb,
token = match_token(p, tokens, args);
switch (token) {
case Opt_bsd_df:
- clear_opt (sbi->s_mount_opt, MINIX_DF);
+ clear_opt(sbi->s_mount_opt, MINIX_DF);
break;
case Opt_minix_df:
- set_opt (sbi->s_mount_opt, MINIX_DF);
+ set_opt(sbi->s_mount_opt, MINIX_DF);
break;
case Opt_grpid:
- set_opt (sbi->s_mount_opt, GRPID);
+ set_opt(sbi->s_mount_opt, GRPID);
break;
case Opt_nogrpid:
- clear_opt (sbi->s_mount_opt, GRPID);
+ clear_opt(sbi->s_mount_opt, GRPID);
break;
case Opt_resuid:
if (match_int(&args[0], &option))
@@ -1043,41 +1042,41 @@ static int parse_options (char *options, struct super_block *sb,
/* *sb_block = match_int(&args[0]); */
break;
case Opt_err_panic:
- clear_opt (sbi->s_mount_opt, ERRORS_CONT);
- clear_opt (sbi->s_mount_opt, ERRORS_RO);
- set_opt (sbi->s_mount_opt, ERRORS_PANIC);
+ clear_opt(sbi->s_mount_opt, ERRORS_CONT);
+ clear_opt(sbi->s_mount_opt, ERRORS_RO);
+ set_opt(sbi->s_mount_opt, ERRORS_PANIC);
break;
case Opt_err_ro:
- clear_opt (sbi->s_mount_opt, ERRORS_CONT);
- clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
- set_opt (sbi->s_mount_opt, ERRORS_RO);
+ clear_opt(sbi->s_mount_opt, ERRORS_CONT);
+ clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
+ set_opt(sbi->s_mount_opt, ERRORS_RO);
break;
case Opt_err_cont:
- clear_opt (sbi->s_mount_opt, ERRORS_RO);
- clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
- set_opt (sbi->s_mount_opt, ERRORS_CONT);
+ clear_opt(sbi->s_mount_opt, ERRORS_RO);
+ clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
+ set_opt(sbi->s_mount_opt, ERRORS_CONT);
break;
case Opt_nouid32:
- set_opt (sbi->s_mount_opt, NO_UID32);
+ set_opt(sbi->s_mount_opt, NO_UID32);
break;
case Opt_nocheck:
- clear_opt (sbi->s_mount_opt, CHECK);
+ clear_opt(sbi->s_mount_opt, CHECK);
break;
case Opt_debug:
- set_opt (sbi->s_mount_opt, DEBUG);
+ set_opt(sbi->s_mount_opt, DEBUG);
break;
case Opt_oldalloc:
- set_opt (sbi->s_mount_opt, OLDALLOC);
+ set_opt(sbi->s_mount_opt, OLDALLOC);
break;
case Opt_orlov:
- clear_opt (sbi->s_mount_opt, OLDALLOC);
+ clear_opt(sbi->s_mount_opt, OLDALLOC);
break;
#ifdef CONFIG_EXT4DEV_FS_XATTR
case Opt_user_xattr:
- set_opt (sbi->s_mount_opt, XATTR_USER);
+ set_opt(sbi->s_mount_opt, XATTR_USER);
break;
case Opt_nouser_xattr:
- clear_opt (sbi->s_mount_opt, XATTR_USER);
+ clear_opt(sbi->s_mount_opt, XATTR_USER);
break;
#else
case Opt_user_xattr:
@@ -1115,7 +1114,7 @@ static int parse_options (char *options, struct super_block *sb,
"journal on remount\n");
return 0;
}
- set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
+ set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
break;
case Opt_journal_inum:
if (is_remount) {
@@ -1145,7 +1144,7 @@ static int parse_options (char *options, struct super_block *sb,
set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
break;
case Opt_noload:
- set_opt (sbi->s_mount_opt, NOLOAD);
+ set_opt(sbi->s_mount_opt, NOLOAD);
break;
case Opt_commit:
if (match_int(&args[0], &option))
@@ -1331,7 +1330,7 @@ set_qf_format:
"on this filesystem, use tune2fs\n");
return 0;
}
- set_opt (sbi->s_mount_opt, EXTENTS);
+ set_opt(sbi->s_mount_opt, EXTENTS);
break;
case Opt_noextents:
/*
@@ -1348,7 +1347,7 @@ set_qf_format:
"-o noextents options\n");
return 0;
}
- clear_opt (sbi->s_mount_opt, EXTENTS);
+ clear_opt(sbi->s_mount_opt, EXTENTS);
break;
case Opt_i_version:
set_opt(sbi->s_mount_opt, I_VERSION);
@@ -1374,9 +1373,9 @@ set_qf_format:
set_opt(sbi->s_mount_opt, DELALLOC);
break;
default:
- printk (KERN_ERR
- "EXT4-fs: Unrecognized mount option \"%s\" "
- "or missing value\n", p);
+ printk(KERN_ERR
+ "EXT4-fs: Unrecognized mount option \"%s\" "
+ "or missing value\n", p);
return 0;
}
}
@@ -1423,31 +1422,31 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
int res = 0;
if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
- printk (KERN_ERR "EXT4-fs warning: revision level too high, "
- "forcing read-only mode\n");
+ printk(KERN_ERR "EXT4-fs warning: revision level too high, "
+ "forcing read-only mode\n");
res = MS_RDONLY;
}
if (read_only)
return res;
if (!(sbi->s_mount_state & EXT4_VALID_FS))
- printk (KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
- "running e2fsck is recommended\n");
+ printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
+ "running e2fsck is recommended\n");
else if ((sbi->s_mount_state & EXT4_ERROR_FS))
- printk (KERN_WARNING
- "EXT4-fs warning: mounting fs with errors, "
- "running e2fsck is recommended\n");
+ printk(KERN_WARNING
+ "EXT4-fs warning: mounting fs with errors, "
+ "running e2fsck is recommended\n");
else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
le16_to_cpu(es->s_mnt_count) >=
(unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
- printk (KERN_WARNING
- "EXT4-fs warning: maximal mount count reached, "
- "running e2fsck is recommended\n");
+ printk(KERN_WARNING
+ "EXT4-fs warning: maximal mount count reached, "
+ "running e2fsck is recommended\n");
else if (le32_to_cpu(es->s_checkinterval) &&
(le32_to_cpu(es->s_lastcheck) +
le32_to_cpu(es->s_checkinterval) <= get_seconds()))
- printk (KERN_WARNING
- "EXT4-fs warning: checktime reached, "
- "running e2fsck is recommended\n");
+ printk(KERN_WARNING
+ "EXT4-fs warning: checktime reached, "
+ "running e2fsck is recommended\n");
#if 0
/* @@@ We _will_ want to clear the valid bit if we find
* inconsistencies, to force a fsck at reboot. But for
@@ -1506,14 +1505,13 @@ static int ext4_fill_flex_info(struct super_block *sb)
flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) /
groups_per_flex;
- sbi->s_flex_groups = kmalloc(flex_group_count *
+ sbi->s_flex_groups = kzalloc(flex_group_count *
sizeof(struct flex_groups), GFP_KERNEL);
if (sbi->s_flex_groups == NULL) {
- printk(KERN_ERR "EXT4-fs: not enough memory\n");
+ printk(KERN_ERR "EXT4-fs: not enough memory for "
+ "%lu flex groups\n", flex_group_count);
goto failed;
}
- memset(sbi->s_flex_groups, 0, flex_group_count *
- sizeof(struct flex_groups));
gdp = ext4_get_group_desc(sb, 1, &bh);
block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
@@ -1597,16 +1595,14 @@ static int ext4_check_descriptors(struct super_block *sb)
(EXT4_BLOCKS_PER_GROUP(sb) - 1);
block_bitmap = ext4_block_bitmap(sb, gdp);
- if (block_bitmap < first_block || block_bitmap > last_block)
- {
+ if (block_bitmap < first_block || block_bitmap > last_block) {
printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
"Block bitmap for group %lu not in group "
"(block %llu)!", i, block_bitmap);
return 0;
}
inode_bitmap = ext4_inode_bitmap(sb, gdp);
- if (inode_bitmap < first_block || inode_bitmap > last_block)
- {
+ if (inode_bitmap < first_block || inode_bitmap > last_block) {
printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
"Inode bitmap for group %lu not in group "
"(block %llu)!", i, inode_bitmap);
@@ -1614,26 +1610,28 @@ static int ext4_check_descriptors(struct super_block *sb)
}
inode_table = ext4_inode_table(sb, gdp);
if (inode_table < first_block ||
- inode_table + sbi->s_itb_per_group - 1 > last_block)
- {
+ inode_table + sbi->s_itb_per_group - 1 > last_block) {
printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
"Inode table for group %lu not in group "
"(block %llu)!", i, inode_table);
return 0;
}
+ spin_lock(sb_bgl_lock(sbi, i));
if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
"Checksum for group %lu failed (%u!=%u)\n",
i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
gdp)), le16_to_cpu(gdp->bg_checksum));
- return 0;
+ if (!(sb->s_flags & MS_RDONLY))
+ return 0;
}
+ spin_unlock(sb_bgl_lock(sbi, i));
if (!flexbg_flag)
first_block += EXT4_BLOCKS_PER_GROUP(sb);
}
ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
- sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb));
+ sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
return 1;
}
@@ -1654,8 +1652,8 @@ static int ext4_check_descriptors(struct super_block *sb)
* e2fsck was run on this filesystem, and it must have already done the orphan
* inode cleanup for us, so we can safely abort without any further action.
*/
-static void ext4_orphan_cleanup (struct super_block * sb,
- struct ext4_super_block * es)
+static void ext4_orphan_cleanup(struct super_block *sb,
+ struct ext4_super_block *es)
{
unsigned int s_flags = sb->s_flags;
int nr_orphans = 0, nr_truncates = 0;
@@ -1732,7 +1730,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
iput(inode); /* The delete magic happens here! */
}
-#define PLURAL(x) (x), ((x)==1) ? "" : "s"
+#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
if (nr_orphans)
printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
@@ -1899,12 +1897,12 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
return 0;
}
-static int ext4_fill_super (struct super_block *sb, void *data, int silent)
+static int ext4_fill_super(struct super_block *sb, void *data, int silent)
__releases(kernel_lock)
__acquires(kernel_lock)
{
- struct buffer_head * bh;
+ struct buffer_head *bh;
struct ext4_super_block *es = NULL;
struct ext4_sb_info *sbi;
ext4_fsblk_t block;
@@ -1953,7 +1951,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
}
if (!(bh = sb_bread(sb, logical_sb_block))) {
- printk (KERN_ERR "EXT4-fs: unable to read superblock\n");
+ printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
goto out_fail;
}
/*
@@ -2026,8 +2024,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
set_opt(sbi->s_mount_opt, DELALLOC);
- if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
- NULL, 0))
+ if (!parse_options((char *) data, sb, &journal_inum, &journal_devnum,
+ NULL, 0))
goto failed_mount;
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -2102,7 +2100,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount;
}
- brelse (bh);
+ brelse(bh);
logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
offset = do_div(logical_sb_block, blocksize);
bh = sb_bread(sb, logical_sb_block);
@@ -2114,8 +2112,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
sbi->s_es = es;
if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
- printk (KERN_ERR
- "EXT4-fs: Magic mismatch, very weird !\n");
+ printk(KERN_ERR
+ "EXT4-fs: Magic mismatch, very weird !\n");
goto failed_mount;
}
}
@@ -2132,9 +2130,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
(!is_power_of_2(sbi->s_inode_size)) ||
(sbi->s_inode_size > blocksize)) {
- printk (KERN_ERR
- "EXT4-fs: unsupported inode size: %d\n",
- sbi->s_inode_size);
+ printk(KERN_ERR
+ "EXT4-fs: unsupported inode size: %d\n",
+ sbi->s_inode_size);
goto failed_mount;
}
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
@@ -2166,20 +2164,20 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
sbi->s_mount_state = le16_to_cpu(es->s_state);
sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
- for (i=0; i < 4; i++)
+ for (i = 0; i < 4; i++)
sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
sbi->s_def_hash_version = es->s_def_hash_version;
if (sbi->s_blocks_per_group > blocksize * 8) {
- printk (KERN_ERR
- "EXT4-fs: #blocks per group too big: %lu\n",
- sbi->s_blocks_per_group);
+ printk(KERN_ERR
+ "EXT4-fs: #blocks per group too big: %lu\n",
+ sbi->s_blocks_per_group);
goto failed_mount;
}
if (sbi->s_inodes_per_group > blocksize * 8) {
- printk (KERN_ERR
- "EXT4-fs: #inodes per group too big: %lu\n",
- sbi->s_inodes_per_group);
+ printk(KERN_ERR
+ "EXT4-fs: #inodes per group too big: %lu\n",
+ sbi->s_inodes_per_group);
goto failed_mount;
}
@@ -2213,10 +2211,10 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
sbi->s_groups_count = blocks_count;
db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
EXT4_DESC_PER_BLOCK(sb);
- sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
+ sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
GFP_KERNEL);
if (sbi->s_group_desc == NULL) {
- printk (KERN_ERR "EXT4-fs: not enough memory\n");
+ printk(KERN_ERR "EXT4-fs: not enough memory\n");
goto failed_mount;
}
@@ -2226,13 +2224,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
block = descriptor_loc(sb, logical_sb_block, i);
sbi->s_group_desc[i] = sb_bread(sb, block);
if (!sbi->s_group_desc[i]) {
- printk (KERN_ERR "EXT4-fs: "
- "can't read group descriptor %d\n", i);
+ printk(KERN_ERR "EXT4-fs: "
+ "can't read group descriptor %d\n", i);
db_count = i;
goto failed_mount2;
}
}
- if (!ext4_check_descriptors (sb)) {
+ if (!ext4_check_descriptors(sb)) {
printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
goto failed_mount2;
}
@@ -2308,11 +2306,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
EXT4_SB(sb)->s_journal->j_failed_commit) {
printk(KERN_CRIT "EXT4-fs error (device %s): "
"ext4_fill_super: Journal transaction "
- "%u is corrupt\n", sb->s_id,
+ "%u is corrupt\n", sb->s_id,
EXT4_SB(sb)->s_journal->j_failed_commit);
- if (test_opt (sb, ERRORS_RO)) {
- printk (KERN_CRIT
- "Mounting filesystem read-only\n");
+ if (test_opt(sb, ERRORS_RO)) {
+ printk(KERN_CRIT
+ "Mounting filesystem read-only\n");
sb->s_flags |= MS_RDONLY;
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -2332,9 +2330,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount3;
} else {
if (!silent)
- printk (KERN_ERR
- "ext4: No journal on filesystem on %s\n",
- sb->s_id);
+ printk(KERN_ERR
+ "ext4: No journal on filesystem on %s\n",
+ sb->s_id);
goto failed_mount3;
}
@@ -2418,7 +2416,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount4;
}
- ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY);
+ ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
/* determine the minimum size of new large inodes, if present */
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
@@ -2457,12 +2455,12 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
ext4_orphan_cleanup(sb, es);
EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
if (needs_recovery)
- printk (KERN_INFO "EXT4-fs: recovery complete.\n");
+ printk(KERN_INFO "EXT4-fs: recovery complete.\n");
ext4_mark_recovery_complete(sb, es);
- printk (KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
- test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
- test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
- "writeback");
+ printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
+ test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
+ test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
+ "writeback");
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
@@ -2575,14 +2573,14 @@ static journal_t *ext4_get_journal(struct super_block *sb,
static journal_t *ext4_get_dev_journal(struct super_block *sb,
dev_t j_dev)
{
- struct buffer_head * bh;
+ struct buffer_head *bh;
journal_t *journal;
ext4_fsblk_t start;
ext4_fsblk_t len;
int hblock, blocksize;
ext4_fsblk_t sb_block;
unsigned long offset;
- struct ext4_super_block * es;
+ struct ext4_super_block *es;
struct block_device *bdev;
bdev = ext4_blkdev_get(j_dev);
@@ -2697,8 +2695,8 @@ static int ext4_load_journal(struct super_block *sb,
"unavailable, cannot proceed.\n");
return -EROFS;
}
- printk (KERN_INFO "EXT4-fs: write access will "
- "be enabled during recovery.\n");
+ printk(KERN_INFO "EXT4-fs: write access will "
+ "be enabled during recovery.\n");
}
}
@@ -2751,8 +2749,8 @@ static int ext4_load_journal(struct super_block *sb,
return 0;
}
-static int ext4_create_journal(struct super_block * sb,
- struct ext4_super_block * es,
+static int ext4_create_journal(struct super_block *sb,
+ struct ext4_super_block *es,
unsigned int journal_inum)
{
journal_t *journal;
@@ -2793,9 +2791,8 @@ static int ext4_create_journal(struct super_block * sb,
return 0;
}
-static void ext4_commit_super (struct super_block * sb,
- struct ext4_super_block * es,
- int sync)
+static void ext4_commit_super(struct super_block *sb,
+ struct ext4_super_block *es, int sync)
{
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
@@ -2816,8 +2813,8 @@ static void ext4_commit_super (struct super_block * sb,
* remounting) the filesystem readonly, then we will end up with a
* consistent fs on disk. Record that fact.
*/
-static void ext4_mark_recovery_complete(struct super_block * sb,
- struct ext4_super_block * es)
+static void ext4_mark_recovery_complete(struct super_block *sb,
+ struct ext4_super_block *es)
{
journal_t *journal = EXT4_SB(sb)->s_journal;
@@ -2839,8 +2836,8 @@ static void ext4_mark_recovery_complete(struct super_block * sb,
* has recorded an error from a previous lifetime, move that error to the
* main filesystem now.
*/
-static void ext4_clear_journal_err(struct super_block * sb,
- struct ext4_super_block * es)
+static void ext4_clear_journal_err(struct super_block *sb,
+ struct ext4_super_block *es)
{
journal_t *journal;
int j_errno;
@@ -2865,7 +2862,7 @@ static void ext4_clear_journal_err(struct super_block * sb,
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
- ext4_commit_super (sb, es, 1);
+ ext4_commit_super(sb, es, 1);
jbd2_journal_clear_err(journal);
}
@@ -2898,7 +2895,7 @@ int ext4_force_commit(struct super_block *sb)
* This implicitly triggers the writebehind on sync().
*/
-static void ext4_write_super (struct super_block * sb)
+static void ext4_write_super(struct super_block *sb)
{
if (mutex_trylock(&sb->s_lock) != 0)
BUG();
@@ -2954,13 +2951,14 @@ static void ext4_unlockfs(struct super_block *sb)
}
}
-static int ext4_remount (struct super_block * sb, int * flags, char * data)
+static int ext4_remount(struct super_block *sb, int *flags, char *data)
{
- struct ext4_super_block * es;
+ struct ext4_super_block *es;
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t n_blocks_count = 0;
unsigned long old_sb_flags;
struct ext4_mount_options old_opts;
+ ext4_group_t g;
int err;
#ifdef CONFIG_QUOTA
int i;
@@ -3039,6 +3037,26 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
}
/*
+ * Make sure the group descriptor checksums
+ * are sane. If they aren't, refuse to
+ * remount r/w.
+ */
+ for (g = 0; g < sbi->s_groups_count; g++) {
+ struct ext4_group_desc *gdp =
+ ext4_get_group_desc(sb, g, NULL);
+
+ if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
+ printk(KERN_ERR
+ "EXT4-fs: ext4_remount: "
+ "Checksum for group %lu failed (%u!=%u)\n",
+ g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
+ le16_to_cpu(gdp->bg_checksum));
+ err = -EINVAL;
+ goto restore_opts;
+ }
+ }
+
+ /*
* If we have an unprocessed orphan list hanging
* around from a previously readonly bdev mount,
* require a full umount/remount for now.
@@ -3063,7 +3081,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
sbi->s_mount_state = le16_to_cpu(es->s_state);
if ((err = ext4_group_extend(sb, es, n_blocks_count)))
goto restore_opts;
- if (!ext4_setup_super (sb, es, 0))
+ if (!ext4_setup_super(sb, es, 0))
sb->s_flags &= ~MS_RDONLY;
}
}
@@ -3093,7 +3111,7 @@ restore_opts:
return err;
}
-static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
+static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3331,12 +3349,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
}
/* Journaling quota? */
if (EXT4_SB(sb)->s_qf_names[type]) {
- /* Quotafile not of fs root? */
+ /* Quotafile not in fs root? */
if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
printk(KERN_WARNING
"EXT4-fs: Quota file not on filesystem root. "
"Journaled quota will not work.\n");
- }
+ }
/*
* When we journal data on quota file, we have to flush journal to see
@@ -3352,8 +3370,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
}
+ err = vfs_quota_on_path(sb, type, format_id, &nd.path);
path_put(&nd.path);
- return vfs_quota_on(sb, type, format_id, path, remount);
+ return err;
}
/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 93c5fdcdad2..8954208b489 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1512,7 +1512,7 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
char *name = entry->e_name;
int n;
- for (n=0; n < entry->e_name_len; n++) {
+ for (n = 0; n < entry->e_name_len; n++) {
hash = (hash << NAME_HASH_SHIFT) ^
(hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
*name++;
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 3a9ecac8d61..3222f51c41c 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -36,7 +36,7 @@ static inline int fat_max_cache(struct inode *inode)
static struct kmem_cache *fat_cache_cachep;
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct fat_cache *cache = (struct fat_cache *)foo;
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 34541d06e62..cd4a0162e10 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -17,7 +17,6 @@
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/msdos_fs.h>
-#include <linux/dirent.h>
#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/compat.h>
@@ -124,10 +123,11 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
* but ignore that right now.
* Ahem... Stack smashing in ring 0 isn't fun. Fixed.
*/
-static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len,
+static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
int uni_xlate, struct nls_table *nls)
{
- wchar_t *ip, ec;
+ const wchar_t *ip;
+ wchar_t ec;
unsigned char *op, nc;
int charlen;
int k;
@@ -167,6 +167,16 @@ static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len,
return (op - ascii);
}
+static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni,
+ unsigned char *buf, int size)
+{
+ if (sbi->options.utf8)
+ return utf8_wcstombs(buf, uni, size);
+ else
+ return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
+ sbi->nls_io);
+}
+
static inline int
fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni)
{
@@ -227,6 +237,19 @@ fat_shortname2uni(struct nls_table *nls, unsigned char *buf, int buf_size,
return len;
}
+static inline int fat_name_match(struct msdos_sb_info *sbi,
+ const unsigned char *a, int a_len,
+ const unsigned char *b, int b_len)
+{
+ if (a_len != b_len)
+ return 0;
+
+ if (sbi->options.name_check != 's')
+ return !nls_strnicmp(sbi->nls_io, a, b, a_len);
+ else
+ return !memcmp(a, b, a_len);
+}
+
enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, };
/**
@@ -302,6 +325,19 @@ parse_long:
}
/*
+ * Maximum buffer size of short name.
+ * [(MSDOS_NAME + '.') * max one char + nul]
+ * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
+ */
+#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
+/*
+ * Maximum buffer size of unicode chars from slots.
+ * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
+ */
+#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1)
+#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t))
+
+/*
* Return values: negative -> error, 0 -> not found, positive -> found,
* value is the total amount of slots, including the shortname entry.
*/
@@ -312,29 +348,20 @@ int fat_search_long(struct inode *inode, const unsigned char *name,
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct buffer_head *bh = NULL;
struct msdos_dir_entry *de;
- struct nls_table *nls_io = sbi->nls_io;
struct nls_table *nls_disk = sbi->nls_disk;
- wchar_t bufuname[14];
unsigned char nr_slots;
- int xlate_len;
+ wchar_t bufuname[14];
wchar_t *unicode = NULL;
unsigned char work[MSDOS_NAME];
- unsigned char *bufname = NULL;
- int uni_xlate = sbi->options.unicode_xlate;
- int utf8 = sbi->options.utf8;
- int anycase = (sbi->options.name_check != 's');
+ unsigned char bufname[FAT_MAX_SHORT_SIZE];
unsigned short opt_shortname = sbi->options.shortname;
loff_t cpos = 0;
- int chl, i, j, last_u, err;
-
- bufname = __getname();
- if (!bufname)
- return -ENOMEM;
+ int chl, i, j, last_u, err, len;
err = -ENOENT;
- while(1) {
+ while (1) {
if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
- goto EODir;
+ goto end_of_dir;
parse_record:
nr_slots = 0;
if (de->name[0] == DELETED_FLAG)
@@ -353,7 +380,7 @@ parse_record:
else if (status == PARSE_NOT_LONGNAME)
goto parse_record;
else if (status == PARSE_EOF)
- goto EODir;
+ goto end_of_dir;
}
memcpy(work, de->name, sizeof(de->name));
@@ -394,30 +421,24 @@ parse_record:
if (!last_u)
continue;
+ /* Compare shortname */
bufuname[last_u] = 0x0000;
- xlate_len = utf8
- ?utf8_wcstombs(bufname, bufuname, PATH_MAX)
- :uni16_to_x8(bufname, bufuname, PATH_MAX, uni_xlate, nls_io);
- if (xlate_len == name_len)
- if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
- (anycase && !nls_strnicmp(nls_io, name, bufname,
- xlate_len)))
- goto Found;
+ len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
+ if (fat_name_match(sbi, name, name_len, bufname, len))
+ goto found;
if (nr_slots) {
- xlate_len = utf8
- ?utf8_wcstombs(bufname, unicode, PATH_MAX)
- :uni16_to_x8(bufname, unicode, PATH_MAX, uni_xlate, nls_io);
- if (xlate_len != name_len)
- continue;
- if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
- (anycase && !nls_strnicmp(nls_io, name, bufname,
- xlate_len)))
- goto Found;
+ void *longname = unicode + FAT_MAX_UNI_CHARS;
+ int size = PATH_MAX - FAT_MAX_UNI_SIZE;
+
+ /* Compare longname */
+ len = fat_uni_to_x8(sbi, unicode, longname, size);
+ if (fat_name_match(sbi, name, name_len, longname, len))
+ goto found;
}
}
-Found:
+found:
nr_slots++; /* include the de */
sinfo->slot_off = cpos - nr_slots * sizeof(*de);
sinfo->nr_slots = nr_slots;
@@ -425,9 +446,7 @@ Found:
sinfo->bh = bh;
sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de);
err = 0;
-EODir:
- if (bufname)
- __putname(bufname);
+end_of_dir:
if (unicode)
__putname(unicode);
@@ -453,23 +472,20 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct buffer_head *bh;
struct msdos_dir_entry *de;
- struct nls_table *nls_io = sbi->nls_io;
struct nls_table *nls_disk = sbi->nls_disk;
- unsigned char long_slots;
- const char *fill_name;
- int fill_len;
+ unsigned char nr_slots;
wchar_t bufuname[14];
wchar_t *unicode = NULL;
- unsigned char c, work[MSDOS_NAME], bufname[56], *ptname = bufname;
- unsigned long lpos, dummy, *furrfu = &lpos;
- int uni_xlate = sbi->options.unicode_xlate;
+ unsigned char c, work[MSDOS_NAME];
+ unsigned char bufname[FAT_MAX_SHORT_SIZE], *ptname = bufname;
+ unsigned short opt_shortname = sbi->options.shortname;
int isvfat = sbi->options.isvfat;
- int utf8 = sbi->options.utf8;
int nocase = sbi->options.nocase;
- unsigned short opt_shortname = sbi->options.shortname;
+ const char *fill_name = NULL;
unsigned long inum;
- int chi, chl, i, i2, j, last, last_u, dotoffset = 0;
+ unsigned long lpos, dummy, *furrfu = &lpos;
loff_t cpos;
+ int chi, chl, i, i2, j, last, last_u, dotoffset = 0, fill_len = 0;
int ret = 0;
lock_super(sb);
@@ -489,43 +505,58 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
cpos = 0;
}
}
- if (cpos & (sizeof(struct msdos_dir_entry)-1)) {
+ if (cpos & (sizeof(struct msdos_dir_entry) - 1)) {
ret = -ENOENT;
goto out;
}
bh = NULL;
-GetNew:
+get_new:
if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
- goto EODir;
+ goto end_of_dir;
parse_record:
- long_slots = 0;
- /* Check for long filename entry */
- if (isvfat) {
+ nr_slots = 0;
+ /*
+ * Check for long filename entry, but if short_only, we don't
+ * need to parse long filename.
+ */
+ if (isvfat && !short_only) {
if (de->name[0] == DELETED_FLAG)
- goto RecEnd;
+ goto record_end;
if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME))
- goto RecEnd;
+ goto record_end;
if (de->attr != ATTR_EXT && IS_FREE(de->name))
- goto RecEnd;
+ goto record_end;
} else {
if ((de->attr & ATTR_VOLUME) || IS_FREE(de->name))
- goto RecEnd;
+ goto record_end;
}
if (isvfat && de->attr == ATTR_EXT) {
int status = fat_parse_long(inode, &cpos, &bh, &de,
- &unicode, &long_slots);
+ &unicode, &nr_slots);
if (status < 0) {
filp->f_pos = cpos;
ret = status;
goto out;
} else if (status == PARSE_INVALID)
- goto RecEnd;
+ goto record_end;
else if (status == PARSE_NOT_LONGNAME)
goto parse_record;
else if (status == PARSE_EOF)
- goto EODir;
+ goto end_of_dir;
+
+ if (nr_slots) {
+ void *longname = unicode + FAT_MAX_UNI_CHARS;
+ int size = PATH_MAX - FAT_MAX_UNI_SIZE;
+ int len = fat_uni_to_x8(sbi, unicode, longname, size);
+
+ fill_name = longname;
+ fill_len = len;
+ /* !both && !short_only, so we don't need shortname. */
+ if (!both)
+ goto start_filldir;
+ }
}
if (sbi->options.dotsOK) {
@@ -587,12 +618,32 @@ parse_record:
}
}
if (!last)
- goto RecEnd;
+ goto record_end;
i = last + dotoffset;
j = last_u;
- lpos = cpos - (long_slots+1)*sizeof(struct msdos_dir_entry);
+ if (isvfat) {
+ bufuname[j] = 0x0000;
+ i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
+ }
+ if (nr_slots) {
+ /* hack for fat_ioctl_filldir() */
+ struct fat_ioctl_filldir_callback *p = dirent;
+
+ p->longname = fill_name;
+ p->long_len = fill_len;
+ p->shortname = bufname;
+ p->short_len = i;
+ fill_name = NULL;
+ fill_len = 0;
+ } else {
+ fill_name = bufname;
+ fill_len = i;
+ }
+
+start_filldir:
+ lpos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME))
inum = inode->i_ino;
else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) {
@@ -607,49 +658,17 @@ parse_record:
inum = iunique(sb, MSDOS_ROOT_INO);
}
- if (isvfat) {
- bufuname[j] = 0x0000;
- i = utf8 ? utf8_wcstombs(bufname, bufuname, sizeof(bufname))
- : uni16_to_x8(bufname, bufuname, sizeof(bufname), uni_xlate, nls_io);
- }
-
- fill_name = bufname;
- fill_len = i;
- if (!short_only && long_slots) {
- /* convert the unicode long name. 261 is maximum size
- * of unicode buffer. (13 * slots + nul) */
- void *longname = unicode + 261;
- int buf_size = PATH_MAX - (261 * sizeof(unicode[0]));
- int long_len = utf8
- ? utf8_wcstombs(longname, unicode, buf_size)
- : uni16_to_x8(longname, unicode, buf_size, uni_xlate, nls_io);
-
- if (!both) {
- fill_name = longname;
- fill_len = long_len;
- } else {
- /* hack for fat_ioctl_filldir() */
- struct fat_ioctl_filldir_callback *p = dirent;
-
- p->longname = longname;
- p->long_len = long_len;
- p->shortname = bufname;
- p->short_len = i;
- fill_name = NULL;
- fill_len = 0;
- }
- }
if (filldir(dirent, fill_name, fill_len, *furrfu, inum,
(de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0)
- goto FillFailed;
+ goto fill_failed;
-RecEnd:
+record_end:
furrfu = &lpos;
filp->f_pos = cpos;
- goto GetNew;
-EODir:
+ goto get_new;
+end_of_dir:
filp->f_pos = cpos;
-FillFailed:
+fill_failed:
brelse(bh);
if (unicode)
__putname(unicode);
@@ -715,7 +734,7 @@ efault: \
return -EFAULT; \
}
-FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, dirent)
+FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent)
static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
void __user *dirent, filldir_t filldir,
@@ -741,7 +760,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
static int fat_dir_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
- struct dirent __user *d1 = (struct dirent __user *)arg;
+ struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg;
int short_only, both;
switch (cmd) {
@@ -757,7 +776,7 @@ static int fat_dir_ioctl(struct inode *inode, struct file *filp,
return fat_generic_ioctl(inode, filp, cmd, arg);
}
- if (!access_ok(VERIFY_WRITE, d1, sizeof(struct dirent[2])))
+ if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2])))
return -EFAULT;
/*
* Yes, we don't need this put_user() absolutely. However old
@@ -1082,7 +1101,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
goto error_free;
}
- fat_date_unix2dos(ts->tv_sec, &time, &date);
+ fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
de = (struct msdos_dir_entry *)bhs[0]->b_data;
/* filling the new directory slots ("." and ".." entries) */
diff --git a/fs/fat/file.c b/fs/fat/file.c
index c672df4036e..ddde37025ca 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -15,6 +15,8 @@
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
+#include <linux/fsnotify.h>
+#include <linux/security.h>
int fat_generic_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
@@ -64,6 +66,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
/* Equivalent to a chmod() */
ia.ia_valid = ATTR_MODE | ATTR_CTIME;
+ ia.ia_ctime = current_fs_time(inode->i_sb);
if (is_dir) {
ia.ia_mode = MSDOS_MKMODE(attr,
S_IRWXUGO & ~sbi->options.fs_dmask)
@@ -90,11 +93,21 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
}
}
+ /*
+ * The security check is questionable... We single
+ * out the RO attribute for checking by the security
+ * module, just because it maps to a file mode.
+ */
+ err = security_inode_setattr(filp->f_path.dentry, &ia);
+ if (err)
+ goto up;
+
/* This MUST be done before doing anything irreversible... */
- err = notify_change(filp->f_path.dentry, &ia);
+ err = fat_setattr(filp->f_path.dentry, &ia);
if (err)
goto up;
+ fsnotify_change(filp->f_path.dentry, ia.ia_valid);
if (sbi->options.sys_immutable) {
if (attr & ATTR_SYS)
inode->i_flags |= S_IMMUTABLE;
@@ -300,6 +313,8 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
return 0;
}
+#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
+
int fat_setattr(struct dentry *dentry, struct iattr *attr)
{
struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
@@ -323,9 +338,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
/* Check for setting the inode time. */
ia_valid = attr->ia_valid;
- if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) {
+ if (ia_valid & TIMES_SET_FLAGS) {
if (fat_allow_set_time(sbi, inode))
- attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET);
+ attr->ia_valid &= ~TIMES_SET_FLAGS;
}
error = inode_change_ok(inode, attr);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 46a4508ffd2..6d266d793e2 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -382,17 +382,20 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
inode->i_mtime.tv_sec =
- date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date));
+ date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date),
+ sbi->options.tz_utc);
inode->i_mtime.tv_nsec = 0;
if (sbi->options.isvfat) {
int secs = de->ctime_cs / 100;
int csecs = de->ctime_cs % 100;
inode->i_ctime.tv_sec =
date_dos2unix(le16_to_cpu(de->ctime),
- le16_to_cpu(de->cdate)) + secs;
+ le16_to_cpu(de->cdate),
+ sbi->options.tz_utc) + secs;
inode->i_ctime.tv_nsec = csecs * 10000000;
inode->i_atime.tv_sec =
- date_dos2unix(0, le16_to_cpu(de->adate));
+ date_dos2unix(0, le16_to_cpu(de->adate),
+ sbi->options.tz_utc);
inode->i_atime.tv_nsec = 0;
} else
inode->i_ctime = inode->i_atime = inode->i_mtime;
@@ -495,7 +498,7 @@ static void fat_destroy_inode(struct inode *inode)
kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
}
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
@@ -591,11 +594,14 @@ retry:
raw_entry->attr = fat_attr(inode);
raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
- fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date);
+ fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time,
+ &raw_entry->date, sbi->options.tz_utc);
if (sbi->options.isvfat) {
__le16 atime;
- fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate);
- fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate);
+ fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime,
+ &raw_entry->cdate, sbi->options.tz_utc);
+ fat_date_unix2dos(inode->i_atime.tv_sec, &atime,
+ &raw_entry->adate, sbi->options.tz_utc);
raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
inode->i_ctime.tv_nsec / 10000000;
}
@@ -836,6 +842,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
}
if (sbi->options.flush)
seq_puts(m, ",flush");
+ if (opts->tz_utc)
+ seq_puts(m, ",tz=UTC");
return 0;
}
@@ -848,7 +856,7 @@ enum {
Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
- Opt_obsolate, Opt_flush, Opt_err,
+ Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err,
};
static match_table_t fat_tokens = {
@@ -883,6 +891,7 @@ static match_table_t fat_tokens = {
{Opt_obsolate, "cvf_options=%100s"},
{Opt_obsolate, "posix"},
{Opt_flush, "flush"},
+ {Opt_tz_utc, "tz=UTC"},
{Opt_err, NULL},
};
static match_table_t msdos_tokens = {
@@ -947,10 +956,11 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
opts->utf8 = opts->unicode_xlate = 0;
opts->numtail = 1;
opts->usefree = opts->nocase = 0;
+ opts->tz_utc = 0;
*debug = 0;
if (!options)
- return 0;
+ goto out;
while ((p = strsep(&options, ",")) != NULL) {
int token;
@@ -1036,6 +1046,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
case Opt_flush:
opts->flush = 1;
break;
+ case Opt_tz_utc:
+ opts->tz_utc = 1;
+ break;
/* msdos specific */
case Opt_dots:
@@ -1104,10 +1117,13 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
return -EINVAL;
}
}
+
+out:
/* UTF-8 doesn't provide FAT semantics */
if (!strcmp(opts->iocharset, "utf8")) {
printk(KERN_ERR "FAT: utf8 is not a recommended IO charset"
- " for FAT filesystems, filesystem will be case sensitive!\n");
+ " for FAT filesystems, filesystem will be "
+ "case sensitive!\n");
}
/* If user doesn't specify allow_utime, it's initialized from dmask. */
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 61f23511eac..79fb98ad36d 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -142,7 +142,7 @@ static int day_n[] = {
};
/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
-int date_dos2unix(unsigned short time, unsigned short date)
+int date_dos2unix(unsigned short time, unsigned short date, int tz_utc)
{
int month, year, secs;
@@ -156,16 +156,18 @@ int date_dos2unix(unsigned short time, unsigned short date)
((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 &&
month < 2 ? 1 : 0)+3653);
/* days since 1.1.70 plus 80's leap day */
- secs += sys_tz.tz_minuteswest*60;
+ if (!tz_utc)
+ secs += sys_tz.tz_minuteswest*60;
return secs;
}
/* Convert linear UNIX date to a MS-DOS time/date pair. */
-void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date)
+void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc)
{
int day, year, nl_day, month;
- unix_date -= sys_tz.tz_minuteswest*60;
+ if (!tz_utc)
+ unix_date -= sys_tz.tz_minuteswest*60;
/* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
if (unix_date < 315532800)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 330a7d78259..ac4f7db9f13 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -49,145 +49,94 @@ static int get_close_on_exec(unsigned int fd)
return res;
}
-/*
- * locate_fd finds a free file descriptor in the open_fds fdset,
- * expanding the fd arrays if necessary. Must be called with the
- * file_lock held for write.
- */
-
-static int locate_fd(unsigned int orig_start, int cloexec)
-{
- struct files_struct *files = current->files;
- unsigned int newfd;
- unsigned int start;
- int error;
- struct fdtable *fdt;
-
- spin_lock(&files->file_lock);
-
- error = -EINVAL;
- if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
- goto out;
-
-repeat:
- fdt = files_fdtable(files);
- /*
- * Someone might have closed fd's in the range
- * orig_start..fdt->next_fd
- */
- start = orig_start;
- if (start < files->next_fd)
- start = files->next_fd;
-
- newfd = start;
- if (start < fdt->max_fds)
- newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
- fdt->max_fds, start);
-
- error = -EMFILE;
- if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
- goto out;
-
- error = expand_files(files, newfd);
- if (error < 0)
- goto out;
-
- /*
- * If we needed to expand the fs array we
- * might have blocked - try again.
- */
- if (error)
- goto repeat;
-
- if (start <= files->next_fd)
- files->next_fd = newfd + 1;
-
- FD_SET(newfd, fdt->open_fds);
- if (cloexec)
- FD_SET(newfd, fdt->close_on_exec);
- else
- FD_CLR(newfd, fdt->close_on_exec);
- error = newfd;
-
-out:
- spin_unlock(&files->file_lock);
- return error;
-}
-
-static int dupfd(struct file *file, unsigned int start, int cloexec)
-{
- int fd = locate_fd(start, cloexec);
- if (fd >= 0)
- fd_install(fd, file);
- else
- fput(file);
-
- return fd;
-}
-
-asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
+asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
{
int err = -EBADF;
struct file * file, *tofree;
struct files_struct * files = current->files;
struct fdtable *fdt;
- spin_lock(&files->file_lock);
- if (!(file = fcheck(oldfd)))
- goto out_unlock;
- err = newfd;
- if (newfd == oldfd)
- goto out_unlock;
- err = -EBADF;
- if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
- goto out_unlock;
- get_file(file); /* We are now finished with oldfd */
-
- err = expand_files(files, newfd);
- if (err < 0)
- goto out_fput;
+ if ((flags & ~O_CLOEXEC) != 0)
+ return -EINVAL;
- /* To avoid races with open() and dup(), we will mark the fd as
- * in-use in the open-file bitmap throughout the entire dup2()
- * process. This is quite safe: do_close() uses the fd array
- * entry, not the bitmap, to decide what work needs to be
- * done. --sct */
- /* Doesn't work. open() might be there first. --AV */
+ if (unlikely(oldfd == newfd))
+ return -EINVAL;
- /* Yes. It's a race. In user space. Nothing sane to do */
+ spin_lock(&files->file_lock);
+ err = expand_files(files, newfd);
+ file = fcheck(oldfd);
+ if (unlikely(!file))
+ goto Ebadf;
+ if (unlikely(err < 0)) {
+ if (err == -EMFILE)
+ goto Ebadf;
+ goto out_unlock;
+ }
+ /*
+ * We need to detect attempts to do dup2() over allocated but still
+ * not finished descriptor. NB: OpenBSD avoids that at the price of
+ * extra work in their equivalent of fget() - they insert struct
+ * file immediately after grabbing descriptor, mark it larval if
+ * more work (e.g. actual opening) is needed and make sure that
+ * fget() treats larval files as absent. Potentially interesting,
+ * but while extra work in fget() is trivial, locking implications
+ * and amount of surgery on open()-related paths in VFS are not.
+ * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
+ * deadlocks in rather amusing ways, AFAICS. All of that is out of
+ * scope of POSIX or SUS, since neither considers shared descriptor
+ * tables and this condition does not arise without those.
+ */
err = -EBUSY;
fdt = files_fdtable(files);
tofree = fdt->fd[newfd];
if (!tofree && FD_ISSET(newfd, fdt->open_fds))
- goto out_fput;
-
+ goto out_unlock;
+ get_file(file);
rcu_assign_pointer(fdt->fd[newfd], file);
FD_SET(newfd, fdt->open_fds);
- FD_CLR(newfd, fdt->close_on_exec);
+ if (flags & O_CLOEXEC)
+ FD_SET(newfd, fdt->close_on_exec);
+ else
+ FD_CLR(newfd, fdt->close_on_exec);
spin_unlock(&files->file_lock);
if (tofree)
filp_close(tofree, files);
- err = newfd;
-out:
- return err;
+
+ return newfd;
+
+Ebadf:
+ err = -EBADF;
out_unlock:
spin_unlock(&files->file_lock);
- goto out;
+ return err;
+}
-out_fput:
- spin_unlock(&files->file_lock);
- fput(file);
- goto out;
+asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
+{
+ if (unlikely(newfd == oldfd)) { /* corner case */
+ struct files_struct *files = current->files;
+ rcu_read_lock();
+ if (!fcheck_files(files, oldfd))
+ oldfd = -EBADF;
+ rcu_read_unlock();
+ return oldfd;
+ }
+ return sys_dup3(oldfd, newfd, 0);
}
asmlinkage long sys_dup(unsigned int fildes)
{
int ret = -EBADF;
- struct file * file = fget(fildes);
-
- if (file)
- ret = dupfd(file, 0, 0);
+ struct file *file = fget(fildes);
+
+ if (file) {
+ ret = get_unused_fd();
+ if (ret >= 0)
+ fd_install(ret, file);
+ else
+ fput(file);
+ }
return ret;
}
@@ -310,8 +259,13 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
switch (cmd) {
case F_DUPFD:
case F_DUPFD_CLOEXEC:
- get_file(filp);
- err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC);
+ if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
+ break;
+ err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
+ if (err >= 0) {
+ get_file(filp);
+ fd_install(err, filp);
+ }
break;
case F_GETFD:
err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
diff --git a/fs/fifo.c b/fs/fifo.c
index 9785e36f81e..987bf941149 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -57,7 +57,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
* POSIX.1 says that O_NONBLOCK means return with the FIFO
* opened, even when there is no process writing the FIFO.
*/
- filp->f_op = &read_fifo_fops;
+ filp->f_op = &read_pipefifo_fops;
pipe->r_counter++;
if (pipe->readers++ == 0)
wake_up_partner(inode);
@@ -86,7 +86,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
goto err;
- filp->f_op = &write_fifo_fops;
+ filp->f_op = &write_pipefifo_fops;
pipe->w_counter++;
if (!pipe->writers++)
wake_up_partner(inode);
@@ -105,7 +105,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
* This implementation will NEVER block on a O_RDWR open, since
* the process can at least talk to itself.
*/
- filp->f_op = &rdwr_fifo_fops;
+ filp->f_op = &rdwr_pipefifo_fops;
pipe->readers++;
pipe->writers++;
@@ -151,5 +151,5 @@ err_nocleanup:
* depending on the access mode of the file...
*/
const struct file_operations def_fifo_fops = {
- .open = fifo_open, /* will set read or write pipe_fops */
+ .open = fifo_open, /* will set read_ or write_pipefifo_fops */
};
diff --git a/fs/file.c b/fs/file.c
index 7b3887e054d..f313314f996 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -6,6 +6,7 @@
* Manage the dynamic fd arrays in the process files_struct.
*/
+#include <linux/module.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/time.h>
@@ -250,9 +251,18 @@ int expand_files(struct files_struct *files, int nr)
struct fdtable *fdt;
fdt = files_fdtable(files);
+
+ /*
+ * N.B. For clone tasks sharing a files structure, this test
+ * will limit the total number of files that can be opened.
+ */
+ if (nr >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
+ return -EMFILE;
+
/* Do we need to expand? */
if (nr < fdt->max_fds)
return 0;
+
/* Can we expand? */
if (nr >= sysctl_nr_open)
return -EMFILE;
@@ -423,3 +433,63 @@ struct files_struct init_files = {
},
.file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
};
+
+/*
+ * allocate a file descriptor, mark it busy.
+ */
+int alloc_fd(unsigned start, unsigned flags)
+{
+ struct files_struct *files = current->files;
+ unsigned int fd;
+ int error;
+ struct fdtable *fdt;
+
+ spin_lock(&files->file_lock);
+repeat:
+ fdt = files_fdtable(files);
+ fd = start;
+ if (fd < files->next_fd)
+ fd = files->next_fd;
+
+ if (fd < fdt->max_fds)
+ fd = find_next_zero_bit(fdt->open_fds->fds_bits,
+ fdt->max_fds, fd);
+
+ error = expand_files(files, fd);
+ if (error < 0)
+ goto out;
+
+ /*
+ * If we needed to expand the fs array we
+ * might have blocked - try again.
+ */
+ if (error)
+ goto repeat;
+
+ if (start <= files->next_fd)
+ files->next_fd = fd + 1;
+
+ FD_SET(fd, fdt->open_fds);
+ if (flags & O_CLOEXEC)
+ FD_SET(fd, fdt->close_on_exec);
+ else
+ FD_CLR(fd, fdt->close_on_exec);
+ error = fd;
+#if 1
+ /* Sanity check */
+ if (rcu_dereference(fdt->fd[fd]) != NULL) {
+ printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
+ rcu_assign_pointer(fdt->fd[fd], NULL);
+ }
+#endif
+
+out:
+ spin_unlock(&files->file_lock);
+ return error;
+}
+
+int get_unused_fd(void)
+{
+ return alloc_fd(0, 0);
+}
+EXPORT_SYMBOL(get_unused_fd);
diff --git a/fs/file_table.c b/fs/file_table.c
index 83084225b4c..f45a4493f9e 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -120,7 +120,7 @@ struct file *get_empty_filp(void)
tsk = current;
INIT_LIST_HEAD(&f->f_u.fu_list);
- atomic_set(&f->f_count, 1);
+ atomic_long_set(&f->f_count, 1);
rwlock_init(&f->f_owner.lock);
f->f_uid = tsk->fsuid;
f->f_gid = tsk->fsgid;
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(init_file);
void fput(struct file *file)
{
- if (atomic_dec_and_test(&file->f_count))
+ if (atomic_long_dec_and_test(&file->f_count))
__fput(file);
}
@@ -294,7 +294,7 @@ struct file *fget(unsigned int fd)
rcu_read_lock();
file = fcheck_files(files, fd);
if (file) {
- if (!atomic_inc_not_zero(&file->f_count)) {
+ if (!atomic_long_inc_not_zero(&file->f_count)) {
/* File object ref couldn't be taken */
rcu_read_unlock();
return NULL;
@@ -326,7 +326,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
rcu_read_lock();
file = fcheck_files(files, fd);
if (file) {
- if (atomic_inc_not_zero(&file->f_count))
+ if (atomic_long_inc_not_zero(&file->f_count))
*fput_needed = 1;
else
/* Didn't get the reference, someone's freed */
@@ -341,7 +341,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
void put_filp(struct file *file)
{
- if (atomic_dec_and_test(&file->f_count)) {
+ if (atomic_long_dec_and_test(&file->f_count)) {
security_file_free(file);
file_kill(file);
file_free(file);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 2060bf06b90..fd03330cade 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -97,7 +97,7 @@ void fuse_invalidate_attr(struct inode *inode)
* timeout is unknown (unlink, rmdir, rename and in some cases
* lookup)
*/
-static void fuse_invalidate_entry_cache(struct dentry *entry)
+void fuse_invalidate_entry_cache(struct dentry *entry)
{
fuse_dentry_settime(entry, 0);
}
@@ -112,18 +112,16 @@ static void fuse_invalidate_entry(struct dentry *entry)
fuse_invalidate_entry_cache(entry);
}
-static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
- struct dentry *entry,
+static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req,
+ u64 nodeid, struct qstr *name,
struct fuse_entry_out *outarg)
{
- struct fuse_conn *fc = get_fuse_conn(dir);
-
memset(outarg, 0, sizeof(struct fuse_entry_out));
req->in.h.opcode = FUSE_LOOKUP;
- req->in.h.nodeid = get_node_id(dir);
+ req->in.h.nodeid = nodeid;
req->in.numargs = 1;
- req->in.args[0].size = entry->d_name.len + 1;
- req->in.args[0].value = entry->d_name.name;
+ req->in.args[0].size = name->len + 1;
+ req->in.args[0].value = name->name;
req->out.numargs = 1;
if (fc->minor < 9)
req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
@@ -189,7 +187,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
attr_version = fuse_get_attr_version(fc);
parent = dget_parent(entry);
- fuse_lookup_init(req, parent->d_inode, entry, &outarg);
+ fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
+ &entry->d_name, &outarg);
request_send(fc, req);
dput(parent);
err = req->out.h.error;
@@ -225,7 +224,7 @@ static int invalid_nodeid(u64 nodeid)
return !nodeid || nodeid == FUSE_ROOT_ID;
}
-static struct dentry_operations fuse_dentry_operations = {
+struct dentry_operations fuse_dentry_operations = {
.d_revalidate = fuse_dentry_revalidate,
};
@@ -239,85 +238,127 @@ int fuse_valid_type(int m)
* Add a directory inode to a dentry, ensuring that no other dentry
* refers to this inode. Called with fc->inst_mutex.
*/
-static int fuse_d_add_directory(struct dentry *entry, struct inode *inode)
+static struct dentry *fuse_d_add_directory(struct dentry *entry,
+ struct inode *inode)
{
struct dentry *alias = d_find_alias(inode);
- if (alias) {
+ if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
/* This tries to shrink the subtree below alias */
fuse_invalidate_entry(alias);
dput(alias);
if (!list_empty(&inode->i_dentry))
- return -EBUSY;
+ return ERR_PTR(-EBUSY);
+ } else {
+ dput(alias);
}
- d_add(entry, inode);
- return 0;
+ return d_splice_alias(inode, entry);
}
-static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
- struct nameidata *nd)
+int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
+ struct fuse_entry_out *outarg, struct inode **inode)
{
- int err;
- struct fuse_entry_out outarg;
- struct inode *inode = NULL;
- struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
struct fuse_req *req;
struct fuse_req *forget_req;
u64 attr_version;
+ int err;
- if (entry->d_name.len > FUSE_NAME_MAX)
- return ERR_PTR(-ENAMETOOLONG);
+ *inode = NULL;
+ err = -ENAMETOOLONG;
+ if (name->len > FUSE_NAME_MAX)
+ goto out;
req = fuse_get_req(fc);
+ err = PTR_ERR(req);
if (IS_ERR(req))
- return ERR_CAST(req);
+ goto out;
forget_req = fuse_get_req(fc);
+ err = PTR_ERR(forget_req);
if (IS_ERR(forget_req)) {
fuse_put_request(fc, req);
- return ERR_CAST(forget_req);
+ goto out;
}
attr_version = fuse_get_attr_version(fc);
- fuse_lookup_init(req, dir, entry, &outarg);
+ fuse_lookup_init(fc, req, nodeid, name, outarg);
request_send(fc, req);
err = req->out.h.error;
fuse_put_request(fc, req);
/* Zero nodeid is same as -ENOENT, but with valid timeout */
- if (!err && outarg.nodeid &&
- (invalid_nodeid(outarg.nodeid) ||
- !fuse_valid_type(outarg.attr.mode)))
- err = -EIO;
- if (!err && outarg.nodeid) {
- inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
- &outarg.attr, entry_attr_timeout(&outarg),
- attr_version);
- if (!inode) {
- fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
- return ERR_PTR(-ENOMEM);
- }
+ if (err || !outarg->nodeid)
+ goto out_put_forget;
+
+ err = -EIO;
+ if (!outarg->nodeid)
+ goto out_put_forget;
+ if (!fuse_valid_type(outarg->attr.mode))
+ goto out_put_forget;
+
+ *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
+ &outarg->attr, entry_attr_timeout(outarg),
+ attr_version);
+ err = -ENOMEM;
+ if (!*inode) {
+ fuse_send_forget(fc, forget_req, outarg->nodeid, 1);
+ goto out;
}
+ err = 0;
+
+ out_put_forget:
fuse_put_request(fc, forget_req);
- if (err && err != -ENOENT)
- return ERR_PTR(err);
+ out:
+ return err;
+}
+
+static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
+ struct nameidata *nd)
+{
+ int err;
+ struct fuse_entry_out outarg;
+ struct inode *inode;
+ struct dentry *newent;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ bool outarg_valid = true;
+
+ err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
+ &outarg, &inode);
+ if (err == -ENOENT) {
+ outarg_valid = false;
+ err = 0;
+ }
+ if (err)
+ goto out_err;
+
+ err = -EIO;
+ if (inode && get_node_id(inode) == FUSE_ROOT_ID)
+ goto out_iput;
if (inode && S_ISDIR(inode->i_mode)) {
mutex_lock(&fc->inst_mutex);
- err = fuse_d_add_directory(entry, inode);
+ newent = fuse_d_add_directory(entry, inode);
mutex_unlock(&fc->inst_mutex);
- if (err) {
- iput(inode);
- return ERR_PTR(err);
- }
- } else
- d_add(entry, inode);
+ err = PTR_ERR(newent);
+ if (IS_ERR(newent))
+ goto out_iput;
+ } else {
+ newent = d_splice_alias(inode, entry);
+ }
+ entry = newent ? newent : entry;
entry->d_op = &fuse_dentry_operations;
- if (!err)
+ if (outarg_valid)
fuse_change_entry_timeout(entry, &outarg);
else
fuse_invalidate_entry_cache(entry);
- return NULL;
+
+ return newent;
+
+ out_iput:
+ iput(inode);
+ out_err:
+ return ERR_PTR(err);
}
/*
@@ -857,7 +898,7 @@ static int fuse_access(struct inode *inode, int mask)
return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
- inarg.mask = mask;
+ inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
req->in.h.opcode = FUSE_ACCESS;
req->in.h.nodeid = get_node_id(inode);
req->in.numargs = 1;
@@ -886,7 +927,7 @@ static int fuse_access(struct inode *inode, int mask)
* access request is sent. Execute permission is still checked
* locally based on file mode.
*/
-static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int fuse_permission(struct inode *inode, int mask)
{
struct fuse_conn *fc = get_fuse_conn(inode);
bool refreshed = false;
@@ -921,7 +962,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
exist. So if permissions are revoked this won't be
noticed immediately, only after the attribute
timeout has expired */
- } else if (nd && (nd->flags & (LOOKUP_ACCESS | LOOKUP_CHDIR))) {
+ } else if (mask & MAY_ACCESS) {
err = fuse_access(inode, mask);
} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
if (!(inode->i_mode & S_IXUGO)) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 8092f0d9fd1..2bada6bbc31 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -893,7 +893,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (count == 0)
goto out;
- err = remove_suid(file->f_path.dentry);
+ err = file_remove_suid(file);
if (err)
goto out;
@@ -1341,6 +1341,11 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
int err;
+ if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
+ /* NLM needs asynchronous locks, which we don't support yet */
+ return -ENOLCK;
+ }
+
/* Unlock on close is handled by the flush method */
if (fl->fl_flags & FL_CLOSE)
return 0;
@@ -1365,7 +1370,9 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
struct fuse_conn *fc = get_fuse_conn(inode);
int err;
- if (cmd == F_GETLK) {
+ if (cmd == F_CANCELLK) {
+ err = 0;
+ } else if (cmd == F_GETLK) {
if (fc->no_lock) {
posix_test_lock(file, fl);
err = 0;
@@ -1373,7 +1380,7 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
err = fuse_getlk(file, fl);
} else {
if (fc->no_lock)
- err = posix_lock_file_wait(file, fl);
+ err = posix_lock_file(file, fl, NULL);
else
err = fuse_setlk(file, fl, 0);
}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index bae948657c4..3a876076bdd 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -363,6 +363,9 @@ struct fuse_conn {
/** Do not send separate SETATTR request before open(O_TRUNC) */
unsigned atomic_o_trunc : 1;
+ /** Filesystem supports NFS exporting. Only set in INIT */
+ unsigned export_support : 1;
+
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
@@ -464,6 +467,8 @@ static inline u64 get_node_id(struct inode *inode)
/** Device operations */
extern const struct file_operations fuse_dev_operations;
+extern struct dentry_operations fuse_dentry_operations;
+
/**
* Get a filled in inode
*/
@@ -471,6 +476,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
int generation, struct fuse_attr *attr,
u64 attr_valid, u64 attr_version);
+int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
+ struct fuse_entry_out *outarg, struct inode **inode);
+
/**
* Send FORGET command
*/
@@ -604,6 +612,8 @@ void fuse_abort_conn(struct fuse_conn *fc);
*/
void fuse_invalidate_attr(struct inode *inode);
+void fuse_invalidate_entry_cache(struct dentry *entry);
+
/**
* Acquire reference to fuse_conn
*/
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3141690558c..d2249f174e2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -18,6 +18,7 @@
#include <linux/statfs.h>
#include <linux/random.h>
#include <linux/sched.h>
+#include <linux/exportfs.h>
MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -552,6 +553,174 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
return fuse_iget(sb, 1, 0, &attr, 0, 0);
}
+struct fuse_inode_handle
+{
+ u64 nodeid;
+ u32 generation;
+};
+
+static struct dentry *fuse_get_dentry(struct super_block *sb,
+ struct fuse_inode_handle *handle)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+ struct inode *inode;
+ struct dentry *entry;
+ int err = -ESTALE;
+
+ if (handle->nodeid == 0)
+ goto out_err;
+
+ inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
+ if (!inode) {
+ struct fuse_entry_out outarg;
+ struct qstr name;
+
+ if (!fc->export_support)
+ goto out_err;
+
+ name.len = 1;
+ name.name = ".";
+ err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
+ &inode);
+ if (err && err != -ENOENT)
+ goto out_err;
+ if (err || !inode) {
+ err = -ESTALE;
+ goto out_err;
+ }
+ err = -EIO;
+ if (get_node_id(inode) != handle->nodeid)
+ goto out_iput;
+ }
+ err = -ESTALE;
+ if (inode->i_generation != handle->generation)
+ goto out_iput;
+
+ entry = d_alloc_anon(inode);
+ err = -ENOMEM;
+ if (!entry)
+ goto out_iput;
+
+ if (get_node_id(inode) != FUSE_ROOT_ID) {
+ entry->d_op = &fuse_dentry_operations;
+ fuse_invalidate_entry_cache(entry);
+ }
+
+ return entry;
+
+ out_iput:
+ iput(inode);
+ out_err:
+ return ERR_PTR(err);
+}
+
+static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
+ int connectable)
+{
+ struct inode *inode = dentry->d_inode;
+ bool encode_parent = connectable && !S_ISDIR(inode->i_mode);
+ int len = encode_parent ? 6 : 3;
+ u64 nodeid;
+ u32 generation;
+
+ if (*max_len < len)
+ return 255;
+
+ nodeid = get_fuse_inode(inode)->nodeid;
+ generation = inode->i_generation;
+
+ fh[0] = (u32)(nodeid >> 32);
+ fh[1] = (u32)(nodeid & 0xffffffff);
+ fh[2] = generation;
+
+ if (encode_parent) {
+ struct inode *parent;
+
+ spin_lock(&dentry->d_lock);
+ parent = dentry->d_parent->d_inode;
+ nodeid = get_fuse_inode(parent)->nodeid;
+ generation = parent->i_generation;
+ spin_unlock(&dentry->d_lock);
+
+ fh[3] = (u32)(nodeid >> 32);
+ fh[4] = (u32)(nodeid & 0xffffffff);
+ fh[5] = generation;
+ }
+
+ *max_len = len;
+ return encode_parent ? 0x82 : 0x81;
+}
+
+static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
+ struct fid *fid, int fh_len, int fh_type)
+{
+ struct fuse_inode_handle handle;
+
+ if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
+ return NULL;
+
+ handle.nodeid = (u64) fid->raw[0] << 32;
+ handle.nodeid |= (u64) fid->raw[1];
+ handle.generation = fid->raw[2];
+ return fuse_get_dentry(sb, &handle);
+}
+
+static struct dentry *fuse_fh_to_parent(struct super_block *sb,
+ struct fid *fid, int fh_len, int fh_type)
+{
+ struct fuse_inode_handle parent;
+
+ if (fh_type != 0x82 || fh_len < 6)
+ return NULL;
+
+ parent.nodeid = (u64) fid->raw[3] << 32;
+ parent.nodeid |= (u64) fid->raw[4];
+ parent.generation = fid->raw[5];
+ return fuse_get_dentry(sb, &parent);
+}
+
+static struct dentry *fuse_get_parent(struct dentry *child)
+{
+ struct inode *child_inode = child->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(child_inode);
+ struct inode *inode;
+ struct dentry *parent;
+ struct fuse_entry_out outarg;
+ struct qstr name;
+ int err;
+
+ if (!fc->export_support)
+ return ERR_PTR(-ESTALE);
+
+ name.len = 2;
+ name.name = "..";
+ err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
+ &name, &outarg, &inode);
+ if (err && err != -ENOENT)
+ return ERR_PTR(err);
+ if (err || !inode)
+ return ERR_PTR(-ESTALE);
+
+ parent = d_alloc_anon(inode);
+ if (!parent) {
+ iput(inode);
+ return ERR_PTR(-ENOMEM);
+ }
+ if (get_node_id(inode) != FUSE_ROOT_ID) {
+ parent->d_op = &fuse_dentry_operations;
+ fuse_invalidate_entry_cache(parent);
+ }
+
+ return parent;
+}
+
+static const struct export_operations fuse_export_operations = {
+ .fh_to_dentry = fuse_fh_to_dentry,
+ .fh_to_parent = fuse_fh_to_parent,
+ .encode_fh = fuse_encode_fh,
+ .get_parent = fuse_get_parent,
+};
+
static const struct super_operations fuse_super_operations = {
.alloc_inode = fuse_alloc_inode,
.destroy_inode = fuse_destroy_inode,
@@ -581,6 +750,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->no_lock = 1;
if (arg->flags & FUSE_ATOMIC_O_TRUNC)
fc->atomic_o_trunc = 1;
+ if (arg->minor >= 9) {
+ /* LOOKUP has dependency on proto version */
+ if (arg->flags & FUSE_EXPORT_SUPPORT)
+ fc->export_support = 1;
+ }
if (arg->flags & FUSE_BIG_WRITES)
fc->big_writes = 1;
} else {
@@ -607,7 +781,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
arg->minor = FUSE_KERNEL_MINOR_VERSION;
arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
- FUSE_BIG_WRITES;
+ FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
@@ -652,6 +826,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
sb->s_magic = FUSE_SUPER_MAGIC;
sb->s_op = &fuse_super_operations;
sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_export_op = &fuse_export_operations;
file = fget(d.fd);
if (!file)
@@ -781,7 +956,7 @@ static inline void unregister_fuseblk(void)
}
#endif
-static void fuse_inode_init_once(struct kmem_cache *cachep, void *foo)
+static void fuse_inode_init_once(void *foo)
{
struct inode * inode = foo;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 6da0ab355b8..8b0806a3294 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -448,7 +448,7 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
struct qstr qstr;
struct inode *inode;
gfs2_str2qstr(&qstr, name);
- inode = gfs2_lookupi(dip, &qstr, 1, NULL);
+ inode = gfs2_lookupi(dip, &qstr, 1);
/* gfs2_lookupi has inconsistent callers: vfs
* related routines expect NULL for no entry found,
* gfs2_lookup_simple callers expect ENOENT
@@ -477,7 +477,7 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
*/
struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
- int is_root, struct nameidata *nd)
+ int is_root)
{
struct super_block *sb = dir->i_sb;
struct gfs2_inode *dip = GFS2_I(dir);
@@ -1173,7 +1173,7 @@ int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
break;
}
- tmp = gfs2_lookupi(dir, &dotdot, 1, NULL);
+ tmp = gfs2_lookupi(dir, &dotdot, 1);
if (IS_ERR(tmp)) {
error = PTR_ERR(tmp);
break;
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 6074c2506f7..58f9607d6a8 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -83,7 +83,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip);
int gfs2_dinode_dealloc(struct gfs2_inode *inode);
int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
- int is_root, struct nameidata *nd);
+ int is_root);
struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
unsigned int mode, dev_t dev);
int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index bcc668d0fad..bb2cc303ac2 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -24,7 +24,7 @@
#include "util.h"
#include "glock.h"
-static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo)
+static void gfs2_init_inode_once(void *foo)
{
struct gfs2_inode *ip = foo;
@@ -33,7 +33,7 @@ static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo)
ip->i_alloc = NULL;
}
-static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo)
+static void gfs2_init_glock_once(void *foo)
{
struct gfs2_glock *gl = foo;
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 990d9f4bc46..9cda8536530 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -134,7 +134,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
struct dentry *dentry;
gfs2_str2qstr(&dotdot, "..");
- inode = gfs2_lookupi(child->d_inode, &dotdot, 1, NULL);
+ inode = gfs2_lookupi(child->d_inode, &dotdot, 1);
if (!inode)
return ERR_PTR(-ENOENT);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 1e252dfc529..e2c62f73a77 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -74,7 +74,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
return PTR_ERR(inode);
}
- inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd);
+ inode = gfs2_lookupi(dir, &dentry->d_name, 0);
if (inode) {
if (!IS_ERR(inode)) {
gfs2_holder_uninit(ghs);
@@ -109,7 +109,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
dentry->d_op = &gfs2_dops;
- inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd);
+ inode = gfs2_lookupi(dir, &dentry->d_name, 0);
if (inode && IS_ERR(inode))
return ERR_CAST(inode);
@@ -915,12 +915,6 @@ int gfs2_permission(struct inode *inode, int mask)
return error;
}
-static int gfs2_iop_permission(struct inode *inode, int mask,
- struct nameidata *nd)
-{
- return gfs2_permission(inode, mask);
-}
-
static int setattr_size(struct inode *inode, struct iattr *attr)
{
struct gfs2_inode *ip = GFS2_I(inode);
@@ -1150,7 +1144,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
}
const struct inode_operations gfs2_file_iops = {
- .permission = gfs2_iop_permission,
+ .permission = gfs2_permission,
.setattr = gfs2_setattr,
.getattr = gfs2_getattr,
.setxattr = gfs2_setxattr,
@@ -1169,7 +1163,7 @@ const struct inode_operations gfs2_dir_iops = {
.rmdir = gfs2_rmdir,
.mknod = gfs2_mknod,
.rename = gfs2_rename,
- .permission = gfs2_iop_permission,
+ .permission = gfs2_permission,
.setattr = gfs2_setattr,
.getattr = gfs2_getattr,
.setxattr = gfs2_setxattr,
@@ -1181,7 +1175,7 @@ const struct inode_operations gfs2_dir_iops = {
const struct inode_operations gfs2_symlink_iops = {
.readlink = gfs2_readlink,
.follow_link = gfs2_follow_link,
- .permission = gfs2_iop_permission,
+ .permission = gfs2_permission,
.setattr = gfs2_setattr,
.getattr = gfs2_getattr,
.setxattr = gfs2_setxattr,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 63a8a902d9d..ca831991cbc 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -389,7 +389,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
break;
INIT_LIST_HEAD(&jd->extent_list);
- jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL);
+ jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
if (!jd->jd_inode)
error = -ENOENT;
diff --git a/fs/hfs/bitmap.c b/fs/hfs/bitmap.c
index 24e75798ddf..c6e97366e8a 100644
--- a/fs/hfs/bitmap.c
+++ b/fs/hfs/bitmap.c
@@ -145,7 +145,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits)
if (!*num_bits)
return 0;
- down(&HFS_SB(sb)->bitmap_lock);
+ mutex_lock(&HFS_SB(sb)->bitmap_lock);
bitmap = HFS_SB(sb)->bitmap;
pos = hfs_find_set_zero_bits(bitmap, HFS_SB(sb)->fs_ablocks, goal, num_bits);
@@ -162,7 +162,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits)
HFS_SB(sb)->free_ablocks -= *num_bits;
hfs_bitmap_dirty(sb);
out:
- up(&HFS_SB(sb)->bitmap_lock);
+ mutex_unlock(&HFS_SB(sb)->bitmap_lock);
return pos;
}
@@ -205,7 +205,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count)
if ((start + count) > HFS_SB(sb)->fs_ablocks)
return -2;
- down(&HFS_SB(sb)->bitmap_lock);
+ mutex_lock(&HFS_SB(sb)->bitmap_lock);
/* bitmap is always on a 32-bit boundary */
curr = HFS_SB(sb)->bitmap + (start / 32);
len = count;
@@ -236,7 +236,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count)
}
out:
HFS_SB(sb)->free_ablocks += len;
- up(&HFS_SB(sb)->bitmap_lock);
+ mutex_unlock(&HFS_SB(sb)->bitmap_lock);
hfs_bitmap_dirty(sb);
return 0;
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index f6621a78520..9b9d6395bad 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -40,7 +40,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
{
struct hfs_mdb *mdb = HFS_SB(sb)->mdb;
HFS_I(tree->inode)->flags = 0;
- init_MUTEX(&HFS_I(tree->inode)->extents_lock);
+ mutex_init(&HFS_I(tree->inode)->extents_lock);
switch (id) {
case HFS_EXT_CNID:
hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize,
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c
index c176f67ba0a..2c16316d291 100644
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -343,16 +343,16 @@ int hfs_get_block(struct inode *inode, sector_t block,
goto done;
}
- down(&HFS_I(inode)->extents_lock);
+ mutex_lock(&HFS_I(inode)->extents_lock);
res = hfs_ext_read_extent(inode, ablock);
if (!res)
dblock = hfs_ext_find_block(HFS_I(inode)->cached_extents,
ablock - HFS_I(inode)->cached_start);
else {
- up(&HFS_I(inode)->extents_lock);
+ mutex_unlock(&HFS_I(inode)->extents_lock);
return -EIO;
}
- up(&HFS_I(inode)->extents_lock);
+ mutex_unlock(&HFS_I(inode)->extents_lock);
done:
map_bh(bh_result, sb, HFS_SB(sb)->fs_start +
@@ -375,7 +375,7 @@ int hfs_extend_file(struct inode *inode)
u32 start, len, goal;
int res;
- down(&HFS_I(inode)->extents_lock);
+ mutex_lock(&HFS_I(inode)->extents_lock);
if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks)
goal = hfs_ext_lastblock(HFS_I(inode)->first_extents);
else {
@@ -425,7 +425,7 @@ int hfs_extend_file(struct inode *inode)
goto insert_extent;
}
out:
- up(&HFS_I(inode)->extents_lock);
+ mutex_unlock(&HFS_I(inode)->extents_lock);
if (!res) {
HFS_I(inode)->alloc_blocks += len;
mark_inode_dirty(inode);
@@ -487,7 +487,7 @@ void hfs_file_truncate(struct inode *inode)
if (blk_cnt == alloc_cnt)
goto out;
- down(&HFS_I(inode)->extents_lock);
+ mutex_lock(&HFS_I(inode)->extents_lock);
hfs_find_init(HFS_SB(sb)->ext_tree, &fd);
while (1) {
if (alloc_cnt == HFS_I(inode)->first_blocks) {
@@ -514,7 +514,7 @@ void hfs_file_truncate(struct inode *inode)
hfs_brec_remove(&fd);
}
hfs_find_exit(&fd);
- up(&HFS_I(inode)->extents_lock);
+ mutex_unlock(&HFS_I(inode)->extents_lock);
HFS_I(inode)->alloc_blocks = blk_cnt;
out:
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 147374b6f67..9955232fdf8 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/types.h>
+#include <linux/mutex.h>
#include <linux/buffer_head.h>
#include <linux/fs.h>
@@ -53,7 +54,7 @@ struct hfs_inode_info {
struct list_head open_dir_list;
struct inode *rsrc_inode;
- struct semaphore extents_lock;
+ struct mutex extents_lock;
u16 alloc_blocks, clump_blocks;
sector_t fs_blocks;
@@ -139,7 +140,7 @@ struct hfs_sb_info {
struct nls_table *nls_io, *nls_disk;
- struct semaphore bitmap_lock;
+ struct mutex bitmap_lock;
unsigned long flags;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 97f8446c4ff..7e19835efa2 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -150,7 +150,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
if (!inode)
return NULL;
- init_MUTEX(&HFS_I(inode)->extents_lock);
+ mutex_init(&HFS_I(inode)->extents_lock);
INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name);
inode->i_ino = HFS_SB(sb)->next_id++;
@@ -281,7 +281,7 @@ static int hfs_read_inode(struct inode *inode, void *data)
HFS_I(inode)->flags = 0;
HFS_I(inode)->rsrc_inode = NULL;
- init_MUTEX(&HFS_I(inode)->extents_lock);
+ mutex_init(&HFS_I(inode)->extents_lock);
INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
/* Initialize the inode */
@@ -511,8 +511,7 @@ void hfs_clear_inode(struct inode *inode)
}
}
-static int hfs_permission(struct inode *inode, int mask,
- struct nameidata *nd)
+static int hfs_permission(struct inode *inode, int mask)
{
if (S_ISREG(inode->i_mode) && mask & MAY_EXEC)
return 0;
@@ -523,8 +522,6 @@ static int hfs_file_open(struct inode *inode, struct file *file)
{
if (HFS_IS_RSRC(inode))
inode = HFS_I(inode)->rsrc_inode;
- if (atomic_read(&file->f_count) != 1)
- return 0;
atomic_inc(&HFS_I(inode)->opencnt);
return 0;
}
@@ -535,8 +532,6 @@ static int hfs_file_release(struct inode *inode, struct file *file)
if (HFS_IS_RSRC(inode))
inode = HFS_I(inode)->rsrc_inode;
- if (atomic_read(&file->f_count) != 0)
- return 0;
if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) {
mutex_lock(&inode->i_mutex);
hfs_file_truncate(inode);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 8cf67974adf..4abb1047c68 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -372,7 +372,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &hfs_super_operations;
sb->s_flags |= MS_NODIRATIME;
- init_MUTEX(&sbi->bitmap_lock);
+ mutex_init(&sbi->bitmap_lock);
res = hfs_mdb_get(sb);
if (res) {
@@ -432,7 +432,7 @@ static struct file_system_type hfs_fs_type = {
.fs_flags = FS_REQUIRES_DEV,
};
-static void hfs_init_once(struct kmem_cache *cachep, void *p)
+static void hfs_init_once(void *p)
{
struct hfs_inode_info *i = p;
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 12e899cd788..fec8f61227f 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -199,16 +199,16 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
goto done;
}
- down(&HFSPLUS_I(inode).extents_lock);
+ mutex_lock(&HFSPLUS_I(inode).extents_lock);
res = hfsplus_ext_read_extent(inode, ablock);
if (!res) {
dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock -
HFSPLUS_I(inode).cached_start);
} else {
- up(&HFSPLUS_I(inode).extents_lock);
+ mutex_unlock(&HFSPLUS_I(inode).extents_lock);
return -EIO;
}
- up(&HFSPLUS_I(inode).extents_lock);
+ mutex_unlock(&HFSPLUS_I(inode).extents_lock);
done:
dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock);
@@ -355,7 +355,7 @@ int hfsplus_file_extend(struct inode *inode)
return -ENOSPC;
}
- down(&HFSPLUS_I(inode).extents_lock);
+ mutex_lock(&HFSPLUS_I(inode).extents_lock);
if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks)
goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents);
else {
@@ -408,7 +408,7 @@ int hfsplus_file_extend(struct inode *inode)
goto insert_extent;
}
out:
- up(&HFSPLUS_I(inode).extents_lock);
+ mutex_unlock(&HFSPLUS_I(inode).extents_lock);
if (!res) {
HFSPLUS_I(inode).alloc_blocks += len;
mark_inode_dirty(inode);
@@ -465,7 +465,7 @@ void hfsplus_file_truncate(struct inode *inode)
if (blk_cnt == alloc_cnt)
goto out;
- down(&HFSPLUS_I(inode).extents_lock);
+ mutex_lock(&HFSPLUS_I(inode).extents_lock);
hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd);
while (1) {
if (alloc_cnt == HFSPLUS_I(inode).first_blocks) {
@@ -492,7 +492,7 @@ void hfsplus_file_truncate(struct inode *inode)
hfs_brec_remove(&fd);
}
hfs_find_exit(&fd);
- up(&HFSPLUS_I(inode).extents_lock);
+ mutex_unlock(&HFSPLUS_I(inode).extents_lock);
HFSPLUS_I(inode).alloc_blocks = blk_cnt;
out:
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 9e59537b43d..f027a905225 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -11,6 +11,7 @@
#define _LINUX_HFSPLUS_FS_H
#include <linux/fs.h>
+#include <linux/mutex.h>
#include <linux/buffer_head.h>
#include "hfsplus_raw.h"
@@ -154,7 +155,7 @@ struct hfsplus_sb_info {
struct hfsplus_inode_info {
- struct semaphore extents_lock;
+ struct mutex extents_lock;
u32 clump_blocks, alloc_blocks;
sector_t fs_blocks;
/* Allocation extents from catalog record or volume header */
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 67e1c8b467c..b085d64a2b6 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -163,7 +163,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
inode->i_ino = dir->i_ino;
INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
- init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+ mutex_init(&HFSPLUS_I(inode).extents_lock);
HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC;
hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
@@ -238,7 +238,7 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms)
perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev);
}
-static int hfsplus_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int hfsplus_permission(struct inode *inode, int mask)
{
/* MAY_EXEC is also used for lookup, if no x bit is set allow lookup,
* open_exec has the same test, so it's still not executable, if a x bit
@@ -254,8 +254,6 @@ static int hfsplus_file_open(struct inode *inode, struct file *file)
{
if (HFSPLUS_IS_RSRC(inode))
inode = HFSPLUS_I(inode).rsrc_inode;
- if (atomic_read(&file->f_count) != 1)
- return 0;
atomic_inc(&HFSPLUS_I(inode).opencnt);
return 0;
}
@@ -266,8 +264,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
if (HFSPLUS_IS_RSRC(inode))
inode = HFSPLUS_I(inode).rsrc_inode;
- if (atomic_read(&file->f_count) != 0)
- return 0;
if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) {
mutex_lock(&inode->i_mutex);
hfsplus_file_truncate(inode);
@@ -316,7 +312,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
inode->i_nlink = 1;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
- init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+ mutex_init(&HFSPLUS_I(inode).extents_lock);
atomic_set(&HFSPLUS_I(inode).opencnt, 0);
HFSPLUS_I(inode).flags = 0;
memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec));
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index ce97a54518d..e834e578c93 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -34,7 +34,7 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino)
return inode;
INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
- init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+ mutex_init(&HFSPLUS_I(inode).extents_lock);
HFSPLUS_I(inode).flags = 0;
HFSPLUS_I(inode).rsrc_inode = NULL;
atomic_set(&HFSPLUS_I(inode).opencnt, 0);
@@ -485,7 +485,7 @@ static struct file_system_type hfsplus_fs_type = {
.fs_flags = FS_REQUIRES_DEV,
};
-static void hfsplus_init_once(struct kmem_cache *cachep, void *p)
+static void hfsplus_init_once(void *p)
{
struct hfsplus_inode_info *i = p;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 5222345ddcc..d6ecabf4d23 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -822,7 +822,7 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
return err;
}
-int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
+int hostfs_permission(struct inode *ino, int desired)
{
char *name;
int r = 0, w = 0, x = 0, err;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index d256559b410..d9c59a77544 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -415,7 +415,7 @@ again:
d_drop(dentry);
spin_lock(&dentry->d_lock);
if (atomic_read(&dentry->d_count) > 1 ||
- permission(inode, MAY_WRITE, NULL) ||
+ generic_permission(inode, MAY_WRITE, NULL) ||
!S_ISREG(inode->i_mode) ||
get_write_access(inode)) {
spin_unlock(&dentry->d_lock);
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index f63a699ec65..b8ae9c90ada 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -173,7 +173,7 @@ static void hpfs_destroy_inode(struct inode *inode)
kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
}
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 65077aa90f0..2b3d1828db9 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -655,20 +655,13 @@ static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
}
-int hppfs_permission(struct inode *inode, int mask, struct nameidata *nd)
-{
- return generic_permission(inode, mask, NULL);
-}
-
static const struct inode_operations hppfs_dir_iops = {
.lookup = hppfs_lookup,
- .permission = hppfs_permission,
};
static const struct inode_operations hppfs_link_iops = {
.readlink = hppfs_readlink,
.follow_link = hppfs_follow_link,
- .permission = hppfs_permission,
};
static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index aeabf80f81a..3f58923fb39 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group;
enum {
Opt_size, Opt_nr_inodes,
Opt_mode, Opt_uid, Opt_gid,
+ Opt_pagesize,
Opt_err,
};
@@ -62,6 +63,7 @@ static match_table_t tokens = {
{Opt_mode, "mode=%o"},
{Opt_uid, "uid=%u"},
{Opt_gid, "gid=%u"},
+ {Opt_pagesize, "pagesize=%s"},
{Opt_err, NULL},
};
@@ -80,6 +82,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
struct inode *inode = file->f_path.dentry->d_inode;
loff_t len, vma_len;
int ret;
+ struct hstate *h = hstate_file(file);
/*
* vma address alignment (but not the pgoff alignment) has
@@ -92,7 +95,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
vma->vm_ops = &hugetlb_vm_ops;
- if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT))
+ if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT))
return -EINVAL;
vma_len = (loff_t)(vma->vm_end - vma->vm_start);
@@ -103,9 +106,9 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
ret = -ENOMEM;
len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
- if (vma->vm_flags & VM_MAYSHARE &&
- hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT),
- len >> HPAGE_SHIFT))
+ if (hugetlb_reserve_pages(inode,
+ vma->vm_pgoff >> huge_page_order(h),
+ len >> huge_page_shift(h), vma))
goto out;
ret = 0;
@@ -130,20 +133,21 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long start_addr;
+ struct hstate *h = hstate_file(file);
- if (len & ~HPAGE_MASK)
+ if (len & ~huge_page_mask(h))
return -EINVAL;
if (len > TASK_SIZE)
return -ENOMEM;
if (flags & MAP_FIXED) {
- if (prepare_hugepage_range(addr, len))
+ if (prepare_hugepage_range(file, addr, len))
return -EINVAL;
return addr;
}
if (addr) {
- addr = ALIGN(addr, HPAGE_SIZE);
+ addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
(!vma || addr + len <= vma->vm_start))
@@ -156,7 +160,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
start_addr = TASK_UNMAPPED_BASE;
full_search:
- addr = ALIGN(start_addr, HPAGE_SIZE);
+ addr = ALIGN(start_addr, huge_page_size(h));
for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
/* At this point: (!vma || addr < vma->vm_end). */
@@ -174,7 +178,7 @@ full_search:
if (!vma || addr + len <= vma->vm_start)
return addr;
- addr = ALIGN(vma->vm_end, HPAGE_SIZE);
+ addr = ALIGN(vma->vm_end, huge_page_size(h));
}
}
#endif
@@ -225,10 +229,11 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
size_t len, loff_t *ppos)
{
+ struct hstate *h = hstate_file(filp);
struct address_space *mapping = filp->f_mapping;
struct inode *inode = mapping->host;
- unsigned long index = *ppos >> HPAGE_SHIFT;
- unsigned long offset = *ppos & ~HPAGE_MASK;
+ unsigned long index = *ppos >> huge_page_shift(h);
+ unsigned long offset = *ppos & ~huge_page_mask(h);
unsigned long end_index;
loff_t isize;
ssize_t retval = 0;
@@ -243,17 +248,17 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
if (!isize)
goto out;
- end_index = (isize - 1) >> HPAGE_SHIFT;
+ end_index = (isize - 1) >> huge_page_shift(h);
for (;;) {
struct page *page;
- int nr, ret;
+ unsigned long nr, ret;
/* nr is the maximum number of bytes to copy from this page */
- nr = HPAGE_SIZE;
+ nr = huge_page_size(h);
if (index >= end_index) {
if (index > end_index)
goto out;
- nr = ((isize - 1) & ~HPAGE_MASK) + 1;
+ nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
if (nr <= offset) {
goto out;
}
@@ -287,8 +292,8 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
offset += ret;
retval += ret;
len -= ret;
- index += offset >> HPAGE_SHIFT;
- offset &= ~HPAGE_MASK;
+ index += offset >> huge_page_shift(h);
+ offset &= ~huge_page_mask(h);
if (page)
page_cache_release(page);
@@ -298,7 +303,7 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
break;
}
out:
- *ppos = ((loff_t)index << HPAGE_SHIFT) + offset;
+ *ppos = ((loff_t)index << huge_page_shift(h)) + offset;
mutex_unlock(&inode->i_mutex);
return retval;
}
@@ -339,8 +344,9 @@ static void truncate_huge_page(struct page *page)
static void truncate_hugepages(struct inode *inode, loff_t lstart)
{
+ struct hstate *h = hstate_inode(inode);
struct address_space *mapping = &inode->i_data;
- const pgoff_t start = lstart >> HPAGE_SHIFT;
+ const pgoff_t start = lstart >> huge_page_shift(h);
struct pagevec pvec;
pgoff_t next;
int i, freed = 0;
@@ -441,7 +447,7 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
v_offset = 0;
__unmap_hugepage_range(vma,
- vma->vm_start + v_offset, vma->vm_end);
+ vma->vm_start + v_offset, vma->vm_end, NULL);
}
}
@@ -449,8 +455,9 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
{
pgoff_t pgoff;
struct address_space *mapping = inode->i_mapping;
+ struct hstate *h = hstate_inode(inode);
- BUG_ON(offset & ~HPAGE_MASK);
+ BUG_ON(offset & ~huge_page_mask(h));
pgoff = offset >> PAGE_SHIFT;
i_size_write(inode, offset);
@@ -465,6 +472,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
+ struct hstate *h = hstate_inode(inode);
int error;
unsigned int ia_valid = attr->ia_valid;
@@ -476,7 +484,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
if (ia_valid & ATTR_SIZE) {
error = -EINVAL;
- if (!(attr->ia_size & ~HPAGE_MASK))
+ if (!(attr->ia_size & ~huge_page_mask(h)))
error = hugetlb_vmtruncate(inode, attr->ia_size);
if (error)
goto out;
@@ -610,9 +618,10 @@ static int hugetlbfs_set_page_dirty(struct page *page)
static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
+ struct hstate *h = hstate_inode(dentry->d_inode);
buf->f_type = HUGETLBFS_MAGIC;
- buf->f_bsize = HPAGE_SIZE;
+ buf->f_bsize = huge_page_size(h);
if (sbinfo) {
spin_lock(&sbinfo->stat_lock);
/* If no limits set, just report 0 for max/free/used
@@ -696,7 +705,7 @@ static const struct address_space_operations hugetlbfs_aops = {
};
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
@@ -743,6 +752,8 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
char *p, *rest;
substring_t args[MAX_OPT_ARGS];
int option;
+ unsigned long long size = 0;
+ enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE;
if (!options)
return 0;
@@ -773,17 +784,13 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
break;
case Opt_size: {
- unsigned long long size;
/* memparse() will accept a K/M/G without a digit */
if (!isdigit(*args[0].from))
goto bad_val;
size = memparse(args[0].from, &rest);
- if (*rest == '%') {
- size <<= HPAGE_SHIFT;
- size *= max_huge_pages;
- do_div(size, 100);
- }
- pconfig->nr_blocks = (size >> HPAGE_SHIFT);
+ setsize = SIZE_STD;
+ if (*rest == '%')
+ setsize = SIZE_PERCENT;
break;
}
@@ -794,6 +801,19 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
pconfig->nr_inodes = memparse(args[0].from, &rest);
break;
+ case Opt_pagesize: {
+ unsigned long ps;
+ ps = memparse(args[0].from, &rest);
+ pconfig->hstate = size_to_hstate(ps);
+ if (!pconfig->hstate) {
+ printk(KERN_ERR
+ "hugetlbfs: Unsupported page size %lu MB\n",
+ ps >> 20);
+ return -EINVAL;
+ }
+ break;
+ }
+
default:
printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
p);
@@ -801,6 +821,18 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
break;
}
}
+
+ /* Do size after hstate is set up */
+ if (setsize > NO_SIZE) {
+ struct hstate *h = pconfig->hstate;
+ if (setsize == SIZE_PERCENT) {
+ size <<= huge_page_shift(h);
+ size *= h->max_huge_pages;
+ do_div(size, 100);
+ }
+ pconfig->nr_blocks = (size >> huge_page_shift(h));
+ }
+
return 0;
bad_val:
@@ -825,6 +857,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
config.uid = current->fsuid;
config.gid = current->fsgid;
config.mode = 0755;
+ config.hstate = &default_hstate;
ret = hugetlbfs_parse_options(data, &config);
if (ret)
return ret;
@@ -833,14 +866,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
if (!sbinfo)
return -ENOMEM;
sb->s_fs_info = sbinfo;
+ sbinfo->hstate = config.hstate;
spin_lock_init(&sbinfo->stat_lock);
sbinfo->max_blocks = config.nr_blocks;
sbinfo->free_blocks = config.nr_blocks;
sbinfo->max_inodes = config.nr_inodes;
sbinfo->free_inodes = config.nr_inodes;
sb->s_maxbytes = MAX_LFS_FILESIZE;
- sb->s_blocksize = HPAGE_SIZE;
- sb->s_blocksize_bits = HPAGE_SHIFT;
+ sb->s_blocksize = huge_page_size(config.hstate);
+ sb->s_blocksize_bits = huge_page_shift(config.hstate);
sb->s_magic = HUGETLBFS_MAGIC;
sb->s_op = &hugetlbfs_ops;
sb->s_time_gran = 1;
@@ -942,7 +976,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
goto out_dentry;
error = -ENOMEM;
- if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT))
+ if (hugetlb_reserve_pages(inode, 0,
+ size >> huge_page_shift(hstate_inode(inode)), NULL))
goto out_inode;
d_instantiate(dentry, inode);
diff --git a/fs/inode.c b/fs/inode.c
index c36d9480335..b6726f64453 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -209,7 +209,7 @@ void inode_init_once(struct inode *inode)
INIT_LIST_HEAD(&inode->i_dentry);
INIT_LIST_HEAD(&inode->i_devices);
INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
- rwlock_init(&inode->i_data.tree_lock);
+ spin_lock_init(&inode->i_data.tree_lock);
spin_lock_init(&inode->i_data.i_mmap_lock);
INIT_LIST_HEAD(&inode->i_data.private_list);
spin_lock_init(&inode->i_data.private_lock);
@@ -224,7 +224,7 @@ void inode_init_once(struct inode *inode)
EXPORT_SYMBOL(inode_init_once);
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
{
struct inode * inode = (struct inode *) foo;
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 6676c06bb7c..60249429a25 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -354,20 +354,20 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev)
}
/*
- * find_inode - resolve a user-given path to a specific inode and return a nd
+ * find_inode - resolve a user-given path to a specific inode
*/
-static int find_inode(const char __user *dirname, struct nameidata *nd,
+static int find_inode(const char __user *dirname, struct path *path,
unsigned flags)
{
int error;
- error = __user_walk(dirname, flags, nd);
+ error = user_path_at(AT_FDCWD, dirname, flags, path);
if (error)
return error;
/* you can only watch an inode if you have read permissions on it */
- error = vfs_permission(nd, MAY_READ);
+ error = inode_permission(path->dentry->d_inode, MAY_READ);
if (error)
- path_put(&nd->path);
+ path_put(path);
return error;
}
@@ -566,7 +566,7 @@ static const struct inotify_operations inotify_user_ops = {
.destroy_watch = free_inotify_user_watch,
};
-asmlinkage long sys_inotify_init(void)
+asmlinkage long sys_inotify_init1(int flags)
{
struct inotify_device *dev;
struct inotify_handle *ih;
@@ -574,7 +574,14 @@ asmlinkage long sys_inotify_init(void)
struct file *filp;
int fd, ret;
- fd = get_unused_fd();
+ /* Check the IN_* constants for consistency. */
+ BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
+ BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
+
+ if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
+ return -EINVAL;
+
+ fd = get_unused_fd_flags(flags & O_CLOEXEC);
if (fd < 0)
return fd;
@@ -610,7 +617,7 @@ asmlinkage long sys_inotify_init(void)
filp->f_path.dentry = dget(inotify_mnt->mnt_root);
filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
filp->f_mode = FMODE_READ;
- filp->f_flags = O_RDONLY;
+ filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
filp->private_data = dev;
INIT_LIST_HEAD(&dev->events);
@@ -638,11 +645,16 @@ out_put_fd:
return ret;
}
-asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
+asmlinkage long sys_inotify_init(void)
+{
+ return sys_inotify_init1(0);
+}
+
+asmlinkage long sys_inotify_add_watch(int fd, const char __user *pathname, u32 mask)
{
struct inode *inode;
struct inotify_device *dev;
- struct nameidata nd;
+ struct path path;
struct file *filp;
int ret, fput_needed;
unsigned flags = 0;
@@ -662,12 +674,12 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
if (mask & IN_ONLYDIR)
flags |= LOOKUP_DIRECTORY;
- ret = find_inode(path, &nd, flags);
+ ret = find_inode(pathname, &path, flags);
if (unlikely(ret))
goto fput_and_out;
- /* inode held in place by reference to nd; dev by fget on fd */
- inode = nd.path.dentry->d_inode;
+ /* inode held in place by reference to path; dev by fget on fd */
+ inode = path.dentry->d_inode;
dev = filp->private_data;
mutex_lock(&dev->up_mutex);
@@ -676,7 +688,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
ret = create_watch(dev, inode, mask);
mutex_unlock(&dev->up_mutex);
- path_put(&nd.path);
+ path_put(&path);
fput_and_out:
fput_light(filp, fput_needed);
return ret;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 044a254d526..26948a6033b 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -73,7 +73,7 @@ static void isofs_destroy_inode(struct inode *inode)
kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
}
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct iso_inode_info *ei = foo;
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 6bd48f0a704..c2fb2dd0131 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -209,6 +209,11 @@ repeat:
while (rs.len > 2) { /* There may be one byte for padding somewhere */
rr = (struct rock_ridge *)rs.chr;
+ /*
+ * Ignore rock ridge info if rr->len is out of range, but
+ * don't return -EIO because that would make the file
+ * invisible.
+ */
if (rr->len < 3)
goto out; /* Something got screwed up here */
sig = isonum_721(rs.chr);
@@ -216,8 +221,12 @@ repeat:
goto eio;
rs.chr += rr->len;
rs.len -= rr->len;
+ /*
+ * As above, just ignore the rock ridge info if rr->len
+ * is bogus.
+ */
if (rs.len < 0)
- goto eio; /* corrupted isofs */
+ goto out; /* Something got screwed up here */
switch (sig) {
case SIG('R', 'R'):
@@ -307,6 +316,11 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
repeat:
while (rs.len > 2) { /* There may be one byte for padding somewhere */
rr = (struct rock_ridge *)rs.chr;
+ /*
+ * Ignore rock ridge info if rr->len is out of range, but
+ * don't return -EIO because that would make the file
+ * invisible.
+ */
if (rr->len < 3)
goto out; /* Something got screwed up here */
sig = isonum_721(rs.chr);
@@ -314,8 +328,12 @@ repeat:
goto eio;
rs.chr += rr->len;
rs.len -= rr->len;
+ /*
+ * As above, just ignore the rock ridge info if rr->len
+ * is bogus.
+ */
if (rs.len < 0)
- goto eio; /* corrupted isofs */
+ goto out; /* Something got screwed up here */
switch (sig) {
#ifndef CONFIG_ZISOFS /* No flag for SF or ZF */
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 5a8ca61498c..ae08c057e75 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -36,7 +36,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
/*
* When an ext3-ordered file is truncated, it is possible that many pages are
- * not sucessfully freed, because they are attached to a committing transaction.
+ * not successfully freed, because they are attached to a committing transaction.
* After the transaction commits, these pages are left on the LRU, with no
* ->mapping, and with attached buffers. These pages are trivially reclaimable
* by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -45,8 +45,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
* So here, we have a buffer which has just come off the forget list. Look to
* see if we can strip all buffers from the backing page.
*
- * Called under lock_journal(), and possibly under journal_datalist_lock. The
- * caller provided us with a ref against the buffer, and we drop that here.
+ * Called under journal->j_list_lock. The caller provided us with a ref
+ * against the buffer, and we drop that here.
*/
static void release_buffer_page(struct buffer_head *bh)
{
@@ -63,7 +63,7 @@ static void release_buffer_page(struct buffer_head *bh)
goto nope;
/* OK, it's a truncated page */
- if (TestSetPageLocked(page))
+ if (!trylock_page(page))
goto nope;
page_cache_get(page);
@@ -78,6 +78,19 @@ nope:
}
/*
+ * Decrement reference counter for data buffer. If it has been marked
+ * 'BH_Freed', release it and the page to which it belongs if possible.
+ */
+static void release_data_buffer(struct buffer_head *bh)
+{
+ if (buffer_freed(bh)) {
+ clear_buffer_freed(bh);
+ release_buffer_page(bh);
+ } else
+ put_bh(bh);
+}
+
+/*
* Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
* held. For ranking reasons we must trylock. If we lose, schedule away and
* return 0. j_list_lock is dropped in this case.
@@ -172,7 +185,7 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
/*
* Submit all the data buffers to disk
*/
-static void journal_submit_data_buffers(journal_t *journal,
+static int journal_submit_data_buffers(journal_t *journal,
transaction_t *commit_transaction)
{
struct journal_head *jh;
@@ -180,6 +193,7 @@ static void journal_submit_data_buffers(journal_t *journal,
int locked;
int bufs = 0;
struct buffer_head **wbuf = journal->j_wbuf;
+ int err = 0;
/*
* Whenever we unlock the journal and sleep, things can get added
@@ -207,7 +221,7 @@ write_out_data:
* blocking lock_buffer().
*/
if (buffer_dirty(bh)) {
- if (test_set_buffer_locked(bh)) {
+ if (!trylock_buffer(bh)) {
BUFFER_TRACE(bh, "needs blocking lock");
spin_unlock(&journal->j_list_lock);
/* Write out all data to prevent deadlocks */
@@ -231,7 +245,7 @@ write_out_data:
if (locked)
unlock_buffer(bh);
BUFFER_TRACE(bh, "already cleaned up");
- put_bh(bh);
+ release_data_buffer(bh);
continue;
}
if (locked && test_clear_buffer_dirty(bh)) {
@@ -253,15 +267,17 @@ write_out_data:
put_bh(bh);
} else {
BUFFER_TRACE(bh, "writeout complete: unfile");
+ if (unlikely(!buffer_uptodate(bh)))
+ err = -EIO;
__journal_unfile_buffer(jh);
jbd_unlock_bh_state(bh);
if (locked)
unlock_buffer(bh);
journal_remove_journal_head(bh);
- /* Once for our safety reference, once for
+ /* One for our safety reference, other for
* journal_remove_journal_head() */
put_bh(bh);
- put_bh(bh);
+ release_data_buffer(bh);
}
if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
@@ -271,6 +287,8 @@ write_out_data:
}
spin_unlock(&journal->j_list_lock);
journal_do_submit_data(wbuf, bufs);
+
+ return err;
}
/*
@@ -410,8 +428,7 @@ void journal_commit_transaction(journal_t *journal)
* Now start flushing things to disk, in the order they appear
* on the transaction lists. Data blocks go first.
*/
- err = 0;
- journal_submit_data_buffers(journal, commit_transaction);
+ err = journal_submit_data_buffers(journal, commit_transaction);
/*
* Wait for all previously submitted IO to complete.
@@ -426,10 +443,21 @@ void journal_commit_transaction(journal_t *journal)
if (buffer_locked(bh)) {
spin_unlock(&journal->j_list_lock);
wait_on_buffer(bh);
- if (unlikely(!buffer_uptodate(bh)))
- err = -EIO;
spin_lock(&journal->j_list_lock);
}
+ if (unlikely(!buffer_uptodate(bh))) {
+ if (!trylock_page(bh->b_page)) {
+ spin_unlock(&journal->j_list_lock);
+ lock_page(bh->b_page);
+ spin_lock(&journal->j_list_lock);
+ }
+ if (bh->b_page->mapping)
+ set_bit(AS_EIO, &bh->b_page->mapping->flags);
+
+ unlock_page(bh->b_page);
+ SetPageError(bh->b_page);
+ err = -EIO;
+ }
if (!inverted_lock(journal, bh)) {
put_bh(bh);
spin_lock(&journal->j_list_lock);
@@ -443,17 +471,21 @@ void journal_commit_transaction(journal_t *journal)
} else {
jbd_unlock_bh_state(bh);
}
- put_bh(bh);
+ release_data_buffer(bh);
cond_resched_lock(&journal->j_list_lock);
}
spin_unlock(&journal->j_list_lock);
- if (err)
- journal_abort(journal, err);
+ if (err) {
+ char b[BDEVNAME_SIZE];
- journal_write_revoke_records(journal, commit_transaction);
+ printk(KERN_WARNING
+ "JBD: Detected IO errors while flushing file data "
+ "on %s\n", bdevname(journal->j_fs_dev, b));
+ err = 0;
+ }
- jbd_debug(3, "JBD: commit phase 2\n");
+ journal_write_revoke_records(journal, commit_transaction);
/*
* If we found any dirty or locked buffers, then we should have
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b99c3b3654c..aa7143a8349 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(journal_set_features);
EXPORT_SYMBOL(journal_create);
EXPORT_SYMBOL(journal_load);
EXPORT_SYMBOL(journal_destroy);
-EXPORT_SYMBOL(journal_update_superblock);
EXPORT_SYMBOL(journal_abort);
EXPORT_SYMBOL(journal_errno);
EXPORT_SYMBOL(journal_ack_err);
@@ -1636,9 +1635,10 @@ static int journal_init_journal_head_cache(void)
static void journal_destroy_journal_head_cache(void)
{
- J_ASSERT(journal_head_cache != NULL);
- kmem_cache_destroy(journal_head_cache);
- journal_head_cache = NULL;
+ if (journal_head_cache) {
+ kmem_cache_destroy(journal_head_cache);
+ journal_head_cache = NULL;
+ }
}
/*
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 1bb43e987f4..c7bd649bbbd 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -166,138 +166,123 @@ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
return NULL;
}
+void journal_destroy_revoke_caches(void)
+{
+ if (revoke_record_cache) {
+ kmem_cache_destroy(revoke_record_cache);
+ revoke_record_cache = NULL;
+ }
+ if (revoke_table_cache) {
+ kmem_cache_destroy(revoke_table_cache);
+ revoke_table_cache = NULL;
+ }
+}
+
int __init journal_init_revoke_caches(void)
{
+ J_ASSERT(!revoke_record_cache);
+ J_ASSERT(!revoke_table_cache);
+
revoke_record_cache = kmem_cache_create("revoke_record",
sizeof(struct jbd_revoke_record_s),
0,
SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
NULL);
if (!revoke_record_cache)
- return -ENOMEM;
+ goto record_cache_failure;
revoke_table_cache = kmem_cache_create("revoke_table",
sizeof(struct jbd_revoke_table_s),
0, SLAB_TEMPORARY, NULL);
- if (!revoke_table_cache) {
- kmem_cache_destroy(revoke_record_cache);
- revoke_record_cache = NULL;
- return -ENOMEM;
- }
+ if (!revoke_table_cache)
+ goto table_cache_failure;
+
return 0;
-}
-void journal_destroy_revoke_caches(void)
-{
- kmem_cache_destroy(revoke_record_cache);
- revoke_record_cache = NULL;
- kmem_cache_destroy(revoke_table_cache);
- revoke_table_cache = NULL;
+table_cache_failure:
+ journal_destroy_revoke_caches();
+record_cache_failure:
+ return -ENOMEM;
}
-/* Initialise the revoke table for a given journal to a given size. */
-
-int journal_init_revoke(journal_t *journal, int hash_size)
+static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
{
- int shift, tmp;
+ int shift = 0;
+ int tmp = hash_size;
+ struct jbd_revoke_table_s *table;
- J_ASSERT (journal->j_revoke_table[0] == NULL);
+ table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+ if (!table)
+ goto out;
- shift = 0;
- tmp = hash_size;
while((tmp >>= 1UL) != 0UL)
shift++;
- journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
- if (!journal->j_revoke_table[0])
- return -ENOMEM;
- journal->j_revoke = journal->j_revoke_table[0];
-
- /* Check that the hash_size is a power of two */
- J_ASSERT(is_power_of_2(hash_size));
-
- journal->j_revoke->hash_size = hash_size;
-
- journal->j_revoke->hash_shift = shift;
-
- journal->j_revoke->hash_table =
+ table->hash_size = hash_size;
+ table->hash_shift = shift;
+ table->hash_table =
kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
- if (!journal->j_revoke->hash_table) {
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
- journal->j_revoke = NULL;
- return -ENOMEM;
+ if (!table->hash_table) {
+ kmem_cache_free(revoke_table_cache, table);
+ table = NULL;
+ goto out;
}
for (tmp = 0; tmp < hash_size; tmp++)
- INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+ INIT_LIST_HEAD(&table->hash_table[tmp]);
- journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
- if (!journal->j_revoke_table[1]) {
- kfree(journal->j_revoke_table[0]->hash_table);
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
- return -ENOMEM;
+out:
+ return table;
+}
+
+static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table)
+{
+ int i;
+ struct list_head *hash_list;
+
+ for (i = 0; i < table->hash_size; i++) {
+ hash_list = &table->hash_table[i];
+ J_ASSERT(list_empty(hash_list));
}
- journal->j_revoke = journal->j_revoke_table[1];
+ kfree(table->hash_table);
+ kmem_cache_free(revoke_table_cache, table);
+}
- /* Check that the hash_size is a power of two */
+/* Initialise the revoke table for a given journal to a given size. */
+int journal_init_revoke(journal_t *journal, int hash_size)
+{
+ J_ASSERT(journal->j_revoke_table[0] == NULL);
J_ASSERT(is_power_of_2(hash_size));
- journal->j_revoke->hash_size = hash_size;
+ journal->j_revoke_table[0] = journal_init_revoke_table(hash_size);
+ if (!journal->j_revoke_table[0])
+ goto fail0;
- journal->j_revoke->hash_shift = shift;
+ journal->j_revoke_table[1] = journal_init_revoke_table(hash_size);
+ if (!journal->j_revoke_table[1])
+ goto fail1;
- journal->j_revoke->hash_table =
- kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
- if (!journal->j_revoke->hash_table) {
- kfree(journal->j_revoke_table[0]->hash_table);
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
- journal->j_revoke = NULL;
- return -ENOMEM;
- }
-
- for (tmp = 0; tmp < hash_size; tmp++)
- INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+ journal->j_revoke = journal->j_revoke_table[1];
spin_lock_init(&journal->j_revoke_lock);
return 0;
-}
-/* Destoy a journal's revoke table. The table must already be empty! */
+fail1:
+ journal_destroy_revoke_table(journal->j_revoke_table[0]);
+fail0:
+ return -ENOMEM;
+}
+/* Destroy a journal's revoke table. The table must already be empty! */
void journal_destroy_revoke(journal_t *journal)
{
- struct jbd_revoke_table_s *table;
- struct list_head *hash_list;
- int i;
-
- table = journal->j_revoke_table[0];
- if (!table)
- return;
-
- for (i=0; i<table->hash_size; i++) {
- hash_list = &table->hash_table[i];
- J_ASSERT (list_empty(hash_list));
- }
-
- kfree(table->hash_table);
- kmem_cache_free(revoke_table_cache, table);
- journal->j_revoke = NULL;
-
- table = journal->j_revoke_table[1];
- if (!table)
- return;
-
- for (i=0; i<table->hash_size; i++) {
- hash_list = &table->hash_table[i];
- J_ASSERT (list_empty(hash_list));
- }
-
- kfree(table->hash_table);
- kmem_cache_free(revoke_table_cache, table);
journal->j_revoke = NULL;
+ if (journal->j_revoke_table[0])
+ journal_destroy_revoke_table(journal->j_revoke_table[0]);
+ if (journal->j_revoke_table[1])
+ journal_destroy_revoke_table(journal->j_revoke_table[1]);
}
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 67ff2024c23..0540ca27a44 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -291,7 +291,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
goto out;
}
- lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+ lock_map_acquire(&handle->h_lockdep_map);
out:
return handle;
@@ -1448,7 +1448,7 @@ int journal_stop(handle_t *handle)
spin_unlock(&journal->j_state_lock);
}
- lock_release(&handle->h_lockdep_map, 1, _THIS_IP_);
+ lock_map_release(&handle->h_lockdep_map);
jbd_free_handle(handle);
return err;
@@ -1648,12 +1648,42 @@ out:
return;
}
+/*
+ * journal_try_to_free_buffers() could race with journal_commit_transaction()
+ * The latter might still hold the a count on buffers when inspecting
+ * them on t_syncdata_list or t_locked_list.
+ *
+ * journal_try_to_free_buffers() will call this function to
+ * wait for the current transaction to finish syncing data buffers, before
+ * tryinf to free that buffer.
+ *
+ * Called with journal->j_state_lock held.
+ */
+static void journal_wait_for_transaction_sync_data(journal_t *journal)
+{
+ transaction_t *transaction = NULL;
+ tid_t tid;
+
+ spin_lock(&journal->j_state_lock);
+ transaction = journal->j_committing_transaction;
+
+ if (!transaction) {
+ spin_unlock(&journal->j_state_lock);
+ return;
+ }
+
+ tid = transaction->t_tid;
+ spin_unlock(&journal->j_state_lock);
+ log_wait_commit(journal, tid);
+}
/**
* int journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
* @page: to try and free
- * @unused_gfp_mask: unused
+ * @gfp_mask: we use the mask to detect how hard should we try to release
+ * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
+ * release the buffers.
*
*
* For all the buffers on this page,
@@ -1682,9 +1712,11 @@ out:
* journal_try_to_free_buffer() is changing its state. But that
* cannot happen because we never reallocate freed data as metadata
* while the data is part of a transaction. Yes?
+ *
+ * Return 0 on failure, 1 on success
*/
int journal_try_to_free_buffers(journal_t *journal,
- struct page *page, gfp_t unused_gfp_mask)
+ struct page *page, gfp_t gfp_mask)
{
struct buffer_head *head;
struct buffer_head *bh;
@@ -1713,7 +1745,28 @@ int journal_try_to_free_buffers(journal_t *journal,
if (buffer_jbd(bh))
goto busy;
} while ((bh = bh->b_this_page) != head);
+
ret = try_to_free_buffers(page);
+
+ /*
+ * There are a number of places where journal_try_to_free_buffers()
+ * could race with journal_commit_transaction(), the later still
+ * holds the reference to the buffers to free while processing them.
+ * try_to_free_buffers() failed to free those buffers. Some of the
+ * caller of releasepage() request page buffers to be dropped, otherwise
+ * treat the fail-to-free as errors (such as generic_file_direct_IO())
+ *
+ * So, if the caller of try_to_release_page() wants the synchronous
+ * behaviour(i.e make sure buffers are dropped upon return),
+ * let's wait for the current transaction to finish flush of
+ * dirty data buffers, then try to free those buffers again,
+ * with the journal locked.
+ */
+ if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
+ journal_wait_for_transaction_sync_data(journal);
+ ret = try_to_free_buffers(page);
+ }
+
busy:
return ret;
}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f8b3be87322..f2ad061e95e 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -67,7 +67,7 @@ static void release_buffer_page(struct buffer_head *bh)
goto nope;
/* OK, it's a truncated page */
- if (TestSetPageLocked(page))
+ if (!trylock_page(page))
goto nope;
page_cache_get(page);
@@ -262,8 +262,18 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock);
err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
- if (!ret)
- ret = err;
+ if (err) {
+ /*
+ * Because AS_EIO is cleared by
+ * wait_on_page_writeback_range(), set it again so
+ * that user process can get -EIO from fsync().
+ */
+ set_bit(AS_EIO,
+ &jinode->i_vfs_inode->i_mapping->flags);
+
+ if (!ret)
+ ret = err;
+ }
spin_lock(&journal->j_list_lock);
jinode->i_flags &= ~JI_COMMIT_RUNNING;
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
@@ -670,8 +680,14 @@ start_journal_io:
* commit block, which happens below in such setting.
*/
err = journal_finish_inode_data_buffers(journal, commit_transaction);
- if (err)
- jbd2_journal_abort(journal, err);
+ if (err) {
+ char b[BDEVNAME_SIZE];
+
+ printk(KERN_WARNING
+ "JBD2: Detected IO errors while flushing file data "
+ "on %s\n", bdevname(journal->j_fs_dev, b));
+ err = 0;
+ }
/* Lo and behold: we have just managed to send a transaction to
the log. Before we can commit it, wait for the IO so far to
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b26c6d9fe6a..8207a01c4ed 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(jbd2_journal_set_features);
EXPORT_SYMBOL(jbd2_journal_create);
EXPORT_SYMBOL(jbd2_journal_load);
EXPORT_SYMBOL(jbd2_journal_destroy);
-EXPORT_SYMBOL(jbd2_journal_update_superblock);
EXPORT_SYMBOL(jbd2_journal_abort);
EXPORT_SYMBOL(jbd2_journal_errno);
EXPORT_SYMBOL(jbd2_journal_ack_err);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 4f7cadbb19f..e5d540588fa 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -301,7 +301,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
goto out;
}
- lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+ lock_map_acquire(&handle->h_lockdep_map);
out:
return handle;
}
@@ -1279,7 +1279,7 @@ int jbd2_journal_stop(handle_t *handle)
spin_unlock(&journal->j_state_lock);
}
- lock_release(&handle->h_lockdep_map, 1, _THIS_IP_);
+ lock_map_release(&handle->h_lockdep_map);
jbd2_free_handle(handle);
return err;
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 4c80404a9ab..d98713777a1 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -314,7 +314,7 @@ static int jffs2_check_acl(struct inode *inode, int mask)
return -EAGAIN;
}
-int jffs2_permission(struct inode *inode, int mask, struct nameidata *nd)
+int jffs2_permission(struct inode *inode, int mask)
{
return generic_permission(inode, mask, jffs2_check_acl);
}
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 0bb7f003fd8..8ca058aed38 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -28,7 +28,7 @@ struct jffs2_acl_header {
#define JFFS2_ACL_NOT_CACHED ((void *)-1)
-extern int jffs2_permission(struct inode *, int, struct nameidata *);
+extern int jffs2_permission(struct inode *, int);
extern int jffs2_acl_chmod(struct inode *);
extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
extern int jffs2_init_acl_post(struct inode *);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index c0c141f6fde..cd219ef5525 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -38,7 +38,7 @@ const struct file_operations jffs2_dir_operations =
{
.read = generic_read_dir,
.readdir = jffs2_readdir,
- .ioctl = jffs2_ioctl,
+ .unlocked_ioctl=jffs2_ioctl,
.fsync = jffs2_fsync
};
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 5e920343b2c..5a98aa87c85 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -46,7 +46,7 @@ const struct file_operations jffs2_file_operations =
.aio_read = generic_file_aio_read,
.write = do_sync_write,
.aio_write = generic_file_aio_write,
- .ioctl = jffs2_ioctl,
+ .unlocked_ioctl=jffs2_ioctl,
.mmap = generic_file_readonly_mmap,
.fsync = jffs2_fsync,
.splice_read = generic_file_splice_read,
diff --git a/fs/jffs2/ioctl.c b/fs/jffs2/ioctl.c
index e2177210f62..9d41f43e47b 100644
--- a/fs/jffs2/ioctl.c
+++ b/fs/jffs2/ioctl.c
@@ -12,8 +12,7 @@
#include <linux/fs.h>
#include "nodelist.h"
-int jffs2_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
- unsigned long arg)
+long jffs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
/* Later, this will provide for lsattr.jffs2 and chattr.jffs2, which
will include compression support etc. */
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 2cc866cf134..5e194a5c8e2 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -167,7 +167,7 @@ int jffs2_fsync(struct file *, struct dentry *, int);
int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
/* ioctl.c */
-int jffs2_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+long jffs2_ioctl(struct file *, unsigned int, unsigned long);
/* symlink.c */
extern const struct inode_operations jffs2_symlink_inode_operations;
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index 629af01e5ad..6caf1e1ee26 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -23,6 +23,8 @@
int jffs2_sum_init(struct jffs2_sb_info *c)
{
+ uint32_t sum_size = max_t(uint32_t, c->sector_size, MAX_SUMMARY_SIZE);
+
c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
if (!c->summary) {
@@ -30,7 +32,7 @@ int jffs2_sum_init(struct jffs2_sb_info *c)
return -ENOMEM;
}
- c->summary->sum_buf = vmalloc(c->sector_size);
+ c->summary->sum_buf = kmalloc(sum_size, GFP_KERNEL);
if (!c->summary->sum_buf) {
JFFS2_WARNING("Can't allocate buffer for writing out summary information!\n");
@@ -49,7 +51,7 @@ void jffs2_sum_exit(struct jffs2_sb_info *c)
jffs2_sum_disable_collecting(c->summary);
- vfree(c->summary->sum_buf);
+ kfree(c->summary->sum_buf);
c->summary->sum_buf = NULL;
kfree(c->summary);
@@ -665,7 +667,7 @@ crc_err:
/* Write summary data to flash - helper function for jffs2_sum_write_sumnode() */
static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
- uint32_t infosize, uint32_t datasize, int padsize)
+ uint32_t infosize, uint32_t datasize, int padsize)
{
struct jffs2_raw_summary isum;
union jffs2_sum_mem *temp;
@@ -676,6 +678,26 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
int ret;
size_t retlen;
+ if (padsize + datasize > MAX_SUMMARY_SIZE) {
+ /* It won't fit in the buffer. Abort summary for this jeb */
+ jffs2_sum_disable_collecting(c->summary);
+
+ JFFS2_WARNING("Summary too big (%d data, %d pad) in eraseblock at %08x\n",
+ datasize, padsize, jeb->offset);
+ /* Non-fatal */
+ return 0;
+ }
+ /* Is there enough space for summary? */
+ if (padsize < 0) {
+ /* don't try to write out summary for this jeb */
+ jffs2_sum_disable_collecting(c->summary);
+
+ JFFS2_WARNING("Not enough space for summary, padsize = %d\n",
+ padsize);
+ /* Non-fatal */
+ return 0;
+ }
+
memset(c->summary->sum_buf, 0xff, datasize);
memset(&isum, 0, sizeof(isum));
@@ -821,7 +843,7 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
{
int datasize, infosize, padsize;
struct jffs2_eraseblock *jeb;
- int ret;
+ int ret = 0;
dbg_summary("called\n");
@@ -841,16 +863,6 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
infosize += padsize;
datasize += padsize;
- /* Is there enough space for summary? */
- if (padsize < 0) {
- /* don't try to write out summary for this jeb */
- jffs2_sum_disable_collecting(c->summary);
-
- JFFS2_WARNING("Not enough space for summary, padsize = %d\n", padsize);
- spin_lock(&c->erase_completion_lock);
- return 0;
- }
-
ret = jffs2_sum_write_data(c, jeb, infosize, datasize, padsize);
spin_lock(&c->erase_completion_lock);
return ret;
diff --git a/fs/jffs2/summary.h b/fs/jffs2/summary.h
index 8bf34f2fa5c..60207a2ae95 100644
--- a/fs/jffs2/summary.h
+++ b/fs/jffs2/summary.h
@@ -13,6 +13,12 @@
#ifndef JFFS2_SUMMARY_H
#define JFFS2_SUMMARY_H
+/* Limit summary size to 64KiB so that we can kmalloc it. If the summary
+ is larger than that, we have to just ditch it and avoid using summary
+ for the eraseblock in question... and it probably doesn't hurt us much
+ anyway. */
+#define MAX_SUMMARY_SIZE 65536
+
#include <linux/uio.h>
#include <linux/jffs2.h>
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 7da69eae49e..efd401257ed 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -44,7 +44,7 @@ static void jffs2_destroy_inode(struct inode *inode)
kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
}
-static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo)
+static void jffs2_i_init_once(void *foo)
{
struct jffs2_inode_info *f = foo;
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 4d84bdc8829..d3e5c33665d 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -140,7 +140,7 @@ static int jfs_check_acl(struct inode *inode, int mask)
return -EAGAIN;
}
-int jfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int jfs_permission(struct inode *inode, int mask)
{
return generic_permission(inode, mask, jfs_check_acl);
}
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 455fa429204..88475f10a38 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
#ifdef CONFIG_JFS_POSIX_ACL
-int jfs_permission(struct inode *, int, struct nameidata *);
+int jfs_permission(struct inode *, int);
int jfs_init_acl(tid_t, struct inode *, struct inode *);
int jfs_setattr(struct dentry *, struct iattr *);
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 854ff0ec574..c350057087d 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -182,7 +182,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
#endif
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct metapage *mp = (struct metapage *)foo;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0288e6d7936..3630718be39 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -22,6 +22,7 @@
#include <linux/parser.h>
#include <linux/completion.h>
#include <linux/vfs.h>
+#include <linux/quotaops.h>
#include <linux/mount.h>
#include <linux/moduleparam.h>
#include <linux/kthread.h>
@@ -759,7 +760,7 @@ static struct file_system_type jfs_fs_type = {
.fs_flags = FS_REQUIRES_DEV,
};
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
diff --git a/fs/libfs.c b/fs/libfs.c
index baeb71ee1cd..1add676a19d 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -216,8 +216,8 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
s->s_flags = MS_NOUSER;
s->s_maxbytes = ~0ULL;
- s->s_blocksize = 1024;
- s->s_blocksize_bits = 10;
+ s->s_blocksize = PAGE_SIZE;
+ s->s_blocksize_bits = PAGE_SHIFT;
s->s_magic = magic;
s->s_op = ops ? ops : &simple_super_operations;
s->s_time_gran = 1;
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 1f6dc518505..31668b690e0 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -582,7 +582,15 @@ again:
}
if (status < 0)
goto out_unlock;
- status = nlm_stat_to_errno(resp->status);
+ /*
+ * EAGAIN doesn't make sense for sleeping locks, and in some
+ * cases NLM_LCK_DENIED is returned for a permanent error. So
+ * turn it into an ENOLCK.
+ */
+ if (resp->status == nlm_lck_denied && (fl_flags & FL_SLEEP))
+ status = -ENOLCK;
+ else
+ status = nlm_stat_to_errno(resp->status);
out_unblock:
nlmclnt_finish_block(block);
out:
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 39944463933..4a714f64515 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -83,7 +83,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
- int rc = rpc_success;
+ __be32 rc = rpc_success;
dprintk("lockd: TEST4 called\n");
resp->cookie = argp->cookie;
@@ -116,7 +116,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
- int rc = rpc_success;
+ __be32 rc = rpc_success;
dprintk("lockd: LOCK called\n");
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 821b9acdfb6..cf0d5c2c318 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -418,8 +418,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
goto out;
case -EAGAIN:
ret = nlm_lck_denied;
- break;
- case -EINPROGRESS:
+ goto out;
+ case FILE_LOCK_DEFERRED:
if (wait)
break;
/* Filesystem lock operation is in progress
@@ -434,10 +434,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
goto out;
}
- ret = nlm_lck_denied;
- if (!wait)
- goto out;
-
ret = nlm_lck_blocked;
/* Append to list of blocked */
@@ -507,7 +503,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
}
error = vfs_test_lock(file->f_file, &lock->fl);
- if (error == -EINPROGRESS) {
+ if (error == FILE_LOCK_DEFERRED) {
ret = nlmsvc_defer_lock_rqst(rqstp, block);
goto out;
}
@@ -731,8 +727,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
switch (error) {
case 0:
break;
- case -EAGAIN:
- case -EINPROGRESS:
+ case FILE_LOCK_DEFERRED:
dprintk("lockd: lock still blocked error %d\n", error);
nlmsvc_insert_block(block, NLM_NEVER);
nlmsvc_release_block(block);
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 76019d2ff72..76262c1986f 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -112,7 +112,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
- int rc = rpc_success;
+ __be32 rc = rpc_success;
dprintk("lockd: TEST called\n");
resp->cookie = argp->cookie;
@@ -146,7 +146,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
- int rc = rpc_success;
+ __be32 rc = rpc_success;
dprintk("lockd: LOCK called\n");
diff --git a/fs/locks.c b/fs/locks.c
index dce8c747371..5eb259e3cd3 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -201,7 +201,7 @@ EXPORT_SYMBOL(locks_init_lock);
* Initialises the fields of the file lock which are invariant for
* free file_locks.
*/
-static void init_once(struct kmem_cache *cache, void *foo)
+static void init_once(void *foo)
{
struct file_lock *lock = (struct file_lock *) foo;
@@ -779,8 +779,10 @@ find_conflict:
if (!flock_locks_conflict(request, fl))
continue;
error = -EAGAIN;
- if (request->fl_flags & FL_SLEEP)
- locks_insert_block(fl, request);
+ if (!(request->fl_flags & FL_SLEEP))
+ goto out;
+ error = FILE_LOCK_DEFERRED;
+ locks_insert_block(fl, request);
goto out;
}
if (request->fl_flags & FL_ACCESS)
@@ -836,7 +838,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
error = -EDEADLK;
if (posix_locks_deadlock(request, fl))
goto out;
- error = -EAGAIN;
+ error = FILE_LOCK_DEFERRED;
locks_insert_block(fl, request);
goto out;
}
@@ -1035,7 +1037,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
might_sleep ();
for (;;) {
error = posix_lock_file(filp, fl, NULL);
- if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+ if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
if (!error)
@@ -1107,9 +1109,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
for (;;) {
error = __posix_lock_file(inode, &fl, NULL);
- if (error != -EAGAIN)
- break;
- if (!(fl.fl_flags & FL_SLEEP))
+ if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
if (!error) {
@@ -1531,7 +1531,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
might_sleep();
for (;;) {
error = flock_lock_file(filp, fl);
- if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+ if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
if (!error)
@@ -1716,17 +1716,17 @@ out:
* fl_grant is set. Callers expecting ->lock() to return asynchronously
* will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
* the request is for a blocking lock. When ->lock() does return asynchronously,
- * it must return -EINPROGRESS, and call ->fl_grant() when the lock
+ * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock
* request completes.
* If the request is for non-blocking lock the file system should return
- * -EINPROGRESS then try to get the lock and call the callback routine with
- * the result. If the request timed out the callback routine will return a
+ * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
+ * with the result. If the request timed out the callback routine will return a
* nonzero return code and the file system should release the lock. The file
* system is also responsible to keep a corresponding posix lock when it
* grants a lock so the VFS can find out which locks are locally held and do
* the correct lock cleanup when required.
* The underlying filesystem must not drop the kernel lock or call
- * ->fl_grant() before returning to the caller with a -EINPROGRESS
+ * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED
* return code.
*/
int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
@@ -1738,6 +1738,30 @@ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, str
}
EXPORT_SYMBOL_GPL(vfs_lock_file);
+static int do_lock_file_wait(struct file *filp, unsigned int cmd,
+ struct file_lock *fl)
+{
+ int error;
+
+ error = security_file_lock(filp, fl->fl_type);
+ if (error)
+ return error;
+
+ for (;;) {
+ error = vfs_lock_file(filp, cmd, fl, NULL);
+ if (error != FILE_LOCK_DEFERRED)
+ break;
+ error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
+ if (!error)
+ continue;
+
+ locks_delete_block(fl);
+ break;
+ }
+
+ return error;
+}
+
/* Apply the lock described by l to an open file descriptor.
* This implements both the F_SETLK and F_SETLKW commands of fcntl().
*/
@@ -1795,26 +1819,7 @@ again:
goto out;
}
- error = security_file_lock(filp, file_lock->fl_type);
- if (error)
- goto out;
-
- if (filp->f_op && filp->f_op->lock != NULL)
- error = filp->f_op->lock(filp, cmd, file_lock);
- else {
- for (;;) {
- error = posix_lock_file(filp, file_lock, NULL);
- if (error != -EAGAIN || cmd == F_SETLK)
- break;
- error = wait_event_interruptible(file_lock->fl_wait,
- !file_lock->fl_next);
- if (!error)
- continue;
-
- locks_delete_block(file_lock);
- break;
- }
- }
+ error = do_lock_file_wait(filp, cmd, file_lock);
/*
* Attempt to detect a close/fcntl race and recover by
@@ -1932,26 +1937,7 @@ again:
goto out;
}
- error = security_file_lock(filp, file_lock->fl_type);
- if (error)
- goto out;
-
- if (filp->f_op && filp->f_op->lock != NULL)
- error = filp->f_op->lock(filp, cmd, file_lock);
- else {
- for (;;) {
- error = posix_lock_file(filp, file_lock, NULL);
- if (error != -EAGAIN || cmd == F_SETLK64)
- break;
- error = wait_event_interruptible(file_lock->fl_wait,
- !file_lock->fl_next);
- if (!error)
- continue;
-
- locks_delete_block(file_lock);
- break;
- }
- }
+ error = do_lock_file_wait(filp, cmd, file_lock);
/*
* Attempt to detect a close/fcntl race and recover by
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 84f6242ba6f..d1d1eb84679 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -68,7 +68,7 @@ static void minix_destroy_inode(struct inode *inode)
kmem_cache_free(minix_inode_cachep, minix_i(inode));
}
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
{
struct minix_inode_info *ei = (struct minix_inode_info *) foo;
@@ -256,9 +256,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
if (!s->s_root)
goto out_iput;
- if (!NO_TRUNCATE)
- s->s_root->d_op = &minix_dentry_operations;
-
if (!(s->s_flags & MS_RDONLY)) {
if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
ms->s_state &= ~MINIX_VALID_FS;
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 326edfe9610..e6a0b193bea 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -2,11 +2,6 @@
#include <linux/pagemap.h>
#include <linux/minix_fs.h>
-/*
- * change the define below to 0 if you want names > info->s_namelen chars to be
- * truncated. Else they will be disallowed (ENAMETOOLONG).
- */
-#define NO_TRUNCATE 1
#define INODE_VERSION(inode) minix_sb(inode->i_sb)->s_version
#define MINIX_V1 0x0001 /* original minix fs */
#define MINIX_V2 0x0002 /* minix V2 fs */
@@ -83,7 +78,6 @@ extern const struct inode_operations minix_file_inode_operations;
extern const struct inode_operations minix_dir_inode_operations;
extern const struct file_operations minix_file_operations;
extern const struct file_operations minix_dir_operations;
-extern struct dentry_operations minix_dentry_operations;
static inline struct minix_sb_info *minix_sb(struct super_block *sb)
{
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 102241bc9c7..32b131cd612 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -18,30 +18,6 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
return err;
}
-static int minix_hash(struct dentry *dentry, struct qstr *qstr)
-{
- unsigned long hash;
- int i;
- const unsigned char *name;
-
- i = minix_sb(dentry->d_inode->i_sb)->s_namelen;
- if (i >= qstr->len)
- return 0;
- /* Truncate the name in place, avoids having to define a compare
- function. */
- qstr->len = i;
- name = qstr->name;
- hash = init_name_hash();
- while (i--)
- hash = partial_name_hash(*name++, hash);
- qstr->hash = end_name_hash(hash);
- return 0;
-}
-
-struct dentry_operations minix_dentry_operations = {
- .d_hash = minix_hash,
-};
-
static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
{
struct inode * inode = NULL;
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 1f7f2956412..e844b9809d2 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -14,12 +14,7 @@
/* Characters that are undesirable in an MS-DOS file name */
static unsigned char bad_chars[] = "*?<>|\"";
-static unsigned char bad_if_strict_pc[] = "+=,; ";
-/* GEMDOS is less restrictive */
-static unsigned char bad_if_strict_atari[] = " ";
-
-#define bad_if_strict(opts) \
- ((opts)->atari ? bad_if_strict_atari : bad_if_strict_pc)
+static unsigned char bad_if_strict[] = "+=,; ";
/***** Formats an MS-DOS file name. Rejects invalid names. */
static int msdos_format_name(const unsigned char *name, int len,
@@ -40,21 +35,20 @@ static int msdos_format_name(const unsigned char *name, int len,
/* Get rid of dot - test for it elsewhere */
name++;
len--;
- } else if (!opts->atari)
+ } else
return -EINVAL;
}
/*
- * disallow names that _really_ start with a dot for MS-DOS,
- * GEMDOS does not care
+ * disallow names that _really_ start with a dot
*/
- space = !opts->atari;
+ space = 1;
c = 0;
for (walk = res; len && walk - res < 8; walk++) {
c = *name++;
len--;
if (opts->name_check != 'r' && strchr(bad_chars, c))
return -EINVAL;
- if (opts->name_check == 's' && strchr(bad_if_strict(opts), c))
+ if (opts->name_check == 's' && strchr(bad_if_strict, c))
return -EINVAL;
if (c >= 'A' && c <= 'Z' && opts->name_check == 's')
return -EINVAL;
@@ -94,7 +88,7 @@ static int msdos_format_name(const unsigned char *name, int len,
if (opts->name_check != 'r' && strchr(bad_chars, c))
return -EINVAL;
if (opts->name_check == 's' &&
- strchr(bad_if_strict(opts), c))
+ strchr(bad_if_strict, c))
return -EINVAL;
if (c < ' ' || c == ':' || c == '\\')
return -EINVAL;
@@ -243,6 +237,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
int is_dir, int is_hid, int cluster,
struct timespec *ts, struct fat_slot_info *sinfo)
{
+ struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
struct msdos_dir_entry de;
__le16 time, date;
int err;
@@ -252,7 +247,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
if (is_hid)
de.attr |= ATTR_HIDDEN;
de.lcase = 0;
- fat_date_unix2dos(ts->tv_sec, &time, &date);
+ fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
de.cdate = de.adate = 0;
de.ctime = 0;
de.ctime_cs = 0;
diff --git a/fs/namei.c b/fs/namei.c
index 01e67dddcc3..4ea63ed5e79 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -31,7 +31,6 @@
#include <linux/file.h>
#include <linux/fcntl.h>
#include <linux/device_cgroup.h>
-#include <asm/namei.h>
#include <asm/uaccess.h>
#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
@@ -185,6 +184,8 @@ int generic_permission(struct inode *inode, int mask,
{
umode_t mode = inode->i_mode;
+ mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
+
if (current->fsuid == inode->i_uid)
mode >>= 6;
else {
@@ -203,7 +204,7 @@ int generic_permission(struct inode *inode, int mask,
/*
* If the DACs are ok we don't need any capability check.
*/
- if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask))
+ if ((mask & ~mode) == 0)
return 0;
check_capabilities:
@@ -226,13 +227,9 @@ int generic_permission(struct inode *inode, int mask,
return -EACCES;
}
-int permission(struct inode *inode, int mask, struct nameidata *nd)
+int inode_permission(struct inode *inode, int mask)
{
- int retval, submask;
- struct vfsmount *mnt = NULL;
-
- if (nd)
- mnt = nd->path.mnt;
+ int retval;
if (mask & MAY_WRITE) {
umode_t mode = inode->i_mode;
@@ -251,19 +248,9 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
return -EACCES;
}
- if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
- /*
- * MAY_EXEC on regular files is denied if the fs is mounted
- * with the "noexec" flag.
- */
- if (mnt && (mnt->mnt_flags & MNT_NOEXEC))
- return -EACCES;
- }
-
/* Ordinary permission routines do not understand MAY_APPEND. */
- submask = mask & ~MAY_APPEND;
if (inode->i_op && inode->i_op->permission) {
- retval = inode->i_op->permission(inode, submask, nd);
+ retval = inode->i_op->permission(inode, mask);
if (!retval) {
/*
* Exec permission on a regular file is denied if none
@@ -277,7 +264,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
return -EACCES;
}
} else {
- retval = generic_permission(inode, submask, NULL);
+ retval = generic_permission(inode, mask, NULL);
}
if (retval)
return retval;
@@ -286,7 +273,8 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
if (retval)
return retval;
- return security_inode_permission(inode, mask, nd);
+ return security_inode_permission(inode,
+ mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND));
}
/**
@@ -301,7 +289,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
*/
int vfs_permission(struct nameidata *nd, int mask)
{
- return permission(nd->path.dentry->d_inode, mask, nd);
+ return inode_permission(nd->path.dentry->d_inode, mask);
}
/**
@@ -318,7 +306,7 @@ int vfs_permission(struct nameidata *nd, int mask)
*/
int file_permission(struct file *file, int mask)
{
- return permission(file->f_path.dentry->d_inode, mask, NULL);
+ return inode_permission(file->f_path.dentry->d_inode, mask);
}
/*
@@ -459,8 +447,7 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name,
* short-cut DAC fails, then call permission() to do more
* complete permission check.
*/
-static int exec_permission_lite(struct inode *inode,
- struct nameidata *nd)
+static int exec_permission_lite(struct inode *inode)
{
umode_t mode = inode->i_mode;
@@ -486,7 +473,7 @@ static int exec_permission_lite(struct inode *inode,
return -EACCES;
ok:
- return security_inode_permission(inode, MAY_EXEC, nd);
+ return security_inode_permission(inode, MAY_EXEC);
}
/*
@@ -519,7 +506,14 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
*/
result = d_lookup(parent, name);
if (!result) {
- struct dentry * dentry = d_alloc(parent, name);
+ struct dentry *dentry;
+
+ /* Don't create child dentry for a dead directory. */
+ result = ERR_PTR(-ENOENT);
+ if (IS_DEADDIR(dir))
+ goto out_unlock;
+
+ dentry = d_alloc(parent, name);
result = ERR_PTR(-ENOMEM);
if (dentry) {
result = dir->i_op->lookup(dir, dentry, nd);
@@ -528,6 +522,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
else
result = dentry;
}
+out_unlock:
mutex_unlock(&dir->i_mutex);
return result;
}
@@ -545,27 +540,16 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
return result;
}
-static int __emul_lookup_dentry(const char *, struct nameidata *);
-
/* SMP-safe */
-static __always_inline int
+static __always_inline void
walk_init_root(const char *name, struct nameidata *nd)
{
struct fs_struct *fs = current->fs;
read_lock(&fs->lock);
- if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) {
- nd->path = fs->altroot;
- path_get(&fs->altroot);
- read_unlock(&fs->lock);
- if (__emul_lookup_dentry(name,nd))
- return 0;
- read_lock(&fs->lock);
- }
nd->path = fs->root;
path_get(&fs->root);
read_unlock(&fs->lock);
- return 1;
}
/*
@@ -606,12 +590,9 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
if (*link == '/') {
path_put(&nd->path);
- if (!walk_init_root(link, nd))
- /* weird __emul_prefix() stuff did it */
- goto out;
+ walk_init_root(link, nd);
}
res = link_path_walk(link, nd);
-out:
if (nd->depth || res || nd->last_type!=LAST_NORM)
return res;
/*
@@ -889,7 +870,7 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
unsigned int c;
nd->flags |= LOOKUP_CONTINUE;
- err = exec_permission_lite(inode, nd);
+ err = exec_permission_lite(inode);
if (err == -EAGAIN)
err = vfs_permission(nd, MAY_EXEC);
if (err)
@@ -1060,67 +1041,6 @@ static int path_walk(const char *name, struct nameidata *nd)
return link_path_walk(name, nd);
}
-/*
- * SMP-safe: Returns 1 and nd will have valid dentry and mnt, if
- * everything is done. Returns 0 and drops input nd, if lookup failed;
- */
-static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
-{
- if (path_walk(name, nd))
- return 0; /* something went wrong... */
-
- if (!nd->path.dentry->d_inode ||
- S_ISDIR(nd->path.dentry->d_inode->i_mode)) {
- struct path old_path = nd->path;
- struct qstr last = nd->last;
- int last_type = nd->last_type;
- struct fs_struct *fs = current->fs;
-
- /*
- * NAME was not found in alternate root or it's a directory.
- * Try to find it in the normal root:
- */
- nd->last_type = LAST_ROOT;
- read_lock(&fs->lock);
- nd->path = fs->root;
- path_get(&fs->root);
- read_unlock(&fs->lock);
- if (path_walk(name, nd) == 0) {
- if (nd->path.dentry->d_inode) {
- path_put(&old_path);
- return 1;
- }
- path_put(&nd->path);
- }
- nd->path = old_path;
- nd->last = last;
- nd->last_type = last_type;
- }
- return 1;
-}
-
-void set_fs_altroot(void)
-{
- char *emul = __emul_prefix();
- struct nameidata nd;
- struct path path = {}, old_path;
- int err;
- struct fs_struct *fs = current->fs;
-
- if (!emul)
- goto set_it;
- err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd);
- if (!err)
- path = nd.path;
-set_it:
- write_lock(&fs->lock);
- old_path = fs->altroot;
- fs->altroot = path;
- write_unlock(&fs->lock);
- if (old_path.dentry)
- path_put(&old_path);
-}
-
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
static int do_path_lookup(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
@@ -1136,14 +1056,6 @@ static int do_path_lookup(int dfd, const char *name,
if (*name=='/') {
read_lock(&fs->lock);
- if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) {
- nd->path = fs->altroot;
- path_get(&fs->altroot);
- read_unlock(&fs->lock);
- if (__emul_lookup_dentry(name,nd))
- goto out; /* found in altroot */
- read_lock(&fs->lock);
- }
nd->path = fs->root;
path_get(&fs->root);
read_unlock(&fs->lock);
@@ -1177,7 +1089,6 @@ static int do_path_lookup(int dfd, const char *name,
}
retval = path_walk(name, nd);
-out:
if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
nd->path.dentry->d_inode))
audit_inode(name, nd->path.dentry);
@@ -1282,19 +1193,6 @@ static int path_lookup_create(int dfd, const char *name,
nd, open_flags, create_mode);
}
-int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags,
- struct nameidata *nd, int open_flags)
-{
- char *tmp = getname(name);
- int err = PTR_ERR(tmp);
-
- if (!IS_ERR(tmp)) {
- err = __path_lookup_intent_open(AT_FDCWD, tmp, lookup_flags, nd, open_flags, 0);
- putname(tmp);
- }
- return err;
-}
-
static struct dentry *__lookup_hash(struct qstr *name,
struct dentry *base, struct nameidata *nd)
{
@@ -1317,7 +1215,14 @@ static struct dentry *__lookup_hash(struct qstr *name,
dentry = cached_lookup(base, name, nd);
if (!dentry) {
- struct dentry *new = d_alloc(base, name);
+ struct dentry *new;
+
+ /* Don't create child dentry for a dead directory. */
+ dentry = ERR_PTR(-ENOENT);
+ if (IS_DEADDIR(inode))
+ goto out;
+
+ new = d_alloc(base, name);
dentry = ERR_PTR(-ENOMEM);
if (!new)
goto out;
@@ -1340,7 +1245,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
{
int err;
- err = permission(nd->path.dentry->d_inode, MAY_EXEC, nd);
+ err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC);
if (err)
return ERR_PTR(err);
return __lookup_hash(&nd->last, nd->path.dentry, nd);
@@ -1388,7 +1293,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
if (err)
return ERR_PTR(err);
- err = permission(base->d_inode, MAY_EXEC, NULL);
+ err = inode_permission(base->d_inode, MAY_EXEC);
if (err)
return ERR_PTR(err);
return __lookup_hash(&this, base, NULL);
@@ -1416,22 +1321,40 @@ struct dentry *lookup_one_noperm(const char *name, struct dentry *base)
return __lookup_hash(&this, base, NULL);
}
-int __user_walk_fd(int dfd, const char __user *name, unsigned flags,
- struct nameidata *nd)
+int user_path_at(int dfd, const char __user *name, unsigned flags,
+ struct path *path)
{
+ struct nameidata nd;
char *tmp = getname(name);
int err = PTR_ERR(tmp);
-
if (!IS_ERR(tmp)) {
- err = do_path_lookup(dfd, tmp, flags, nd);
+
+ BUG_ON(flags & LOOKUP_PARENT);
+
+ err = do_path_lookup(dfd, tmp, flags, &nd);
putname(tmp);
+ if (!err)
+ *path = nd.path;
}
return err;
}
-int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
+static int user_path_parent(int dfd, const char __user *path,
+ struct nameidata *nd, char **name)
{
- return __user_walk_fd(AT_FDCWD, name, flags, nd);
+ char *s = getname(path);
+ int error;
+
+ if (IS_ERR(s))
+ return PTR_ERR(s);
+
+ error = do_path_lookup(dfd, s, LOOKUP_PARENT, nd);
+ if (error)
+ putname(s);
+ else
+ *name = s;
+
+ return error;
}
/*
@@ -1478,7 +1401,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
BUG_ON(victim->d_parent->d_inode != dir);
audit_inode_child(victim->d_name.name, victim, dir);
- error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
+ error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
if (error)
return error;
if (IS_APPEND(dir))
@@ -1508,14 +1431,13 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
* 3. We should have write and exec permissions on dir
* 4. We can't do it if dir is immutable (done in permission())
*/
-static inline int may_create(struct inode *dir, struct dentry *child,
- struct nameidata *nd)
+static inline int may_create(struct inode *dir, struct dentry *child)
{
if (child->d_inode)
return -EEXIST;
if (IS_DEADDIR(dir))
return -ENOENT;
- return permission(dir,MAY_WRITE | MAY_EXEC, nd);
+ return inode_permission(dir, MAY_WRITE | MAY_EXEC);
}
/*
@@ -1581,7 +1503,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
struct nameidata *nd)
{
- int error = may_create(dir, dentry, nd);
+ int error = may_create(dir, dentry);
if (error)
return error;
@@ -1755,7 +1677,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
int will_write;
int flag = open_to_namei_flags(open_flag);
- acc_mode = ACC_MODE(flag);
+ acc_mode = MAY_OPEN | ACC_MODE(flag);
/* O_TRUNC implies we need access checks for write permissions */
if (flag & O_TRUNC)
@@ -2025,7 +1947,7 @@ EXPORT_SYMBOL_GPL(lookup_create);
int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
{
- int error = may_create(dir, dentry, NULL);
+ int error = may_create(dir, dentry);
if (error)
return error;
@@ -2071,20 +1993,18 @@ static int may_mknod(mode_t mode)
asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
unsigned dev)
{
- int error = 0;
- char * tmp;
- struct dentry * dentry;
+ int error;
+ char *tmp;
+ struct dentry *dentry;
struct nameidata nd;
if (S_ISDIR(mode))
return -EPERM;
- tmp = getname(filename);
- if (IS_ERR(tmp))
- return PTR_ERR(tmp);
- error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
+ error = user_path_parent(dfd, filename, &nd, &tmp);
if (error)
- goto out;
+ return error;
+
dentry = lookup_create(&nd, 0);
if (IS_ERR(dentry)) {
error = PTR_ERR(dentry);
@@ -2116,7 +2036,6 @@ out_dput:
out_unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
path_put(&nd.path);
-out:
putname(tmp);
return error;
@@ -2129,7 +2048,7 @@ asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev)
int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
- int error = may_create(dir, dentry, NULL);
+ int error = may_create(dir, dentry);
if (error)
return error;
@@ -2156,14 +2075,10 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
struct dentry *dentry;
struct nameidata nd;
- tmp = getname(pathname);
- error = PTR_ERR(tmp);
- if (IS_ERR(tmp))
+ error = user_path_parent(dfd, pathname, &nd, &tmp);
+ if (error)
goto out_err;
- error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
- if (error)
- goto out;
dentry = lookup_create(&nd, 1);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
@@ -2181,7 +2096,6 @@ out_dput:
out_unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
path_put(&nd.path);
-out:
putname(tmp);
out_err:
return error;
@@ -2259,13 +2173,9 @@ static long do_rmdir(int dfd, const char __user *pathname)
struct dentry *dentry;
struct nameidata nd;
- name = getname(pathname);
- if(IS_ERR(name))
- return PTR_ERR(name);
-
- error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
+ error = user_path_parent(dfd, pathname, &nd, &name);
if (error)
- goto exit;
+ return error;
switch(nd.last_type) {
case LAST_DOTDOT:
@@ -2294,7 +2204,6 @@ exit2:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
exit1:
path_put(&nd.path);
-exit:
putname(name);
return error;
}
@@ -2343,19 +2252,16 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
*/
static long do_unlinkat(int dfd, const char __user *pathname)
{
- int error = 0;
- char * name;
+ int error;
+ char *name;
struct dentry *dentry;
struct nameidata nd;
struct inode *inode = NULL;
- name = getname(pathname);
- if(IS_ERR(name))
- return PTR_ERR(name);
-
- error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
+ error = user_path_parent(dfd, pathname, &nd, &name);
if (error)
- goto exit;
+ return error;
+
error = -EISDIR;
if (nd.last_type != LAST_NORM)
goto exit1;
@@ -2382,7 +2288,6 @@ static long do_unlinkat(int dfd, const char __user *pathname)
iput(inode); /* truncate the inode here */
exit1:
path_put(&nd.path);
-exit:
putname(name);
return error;
@@ -2408,9 +2313,9 @@ asmlinkage long sys_unlink(const char __user *pathname)
return do_unlinkat(AT_FDCWD, pathname);
}
-int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
+int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
{
- int error = may_create(dir, dentry, NULL);
+ int error = may_create(dir, dentry);
if (error)
return error;
@@ -2432,23 +2337,20 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i
asmlinkage long sys_symlinkat(const char __user *oldname,
int newdfd, const char __user *newname)
{
- int error = 0;
- char * from;
- char * to;
+ int error;
+ char *from;
+ char *to;
struct dentry *dentry;
struct nameidata nd;
from = getname(oldname);
- if(IS_ERR(from))
+ if (IS_ERR(from))
return PTR_ERR(from);
- to = getname(newname);
- error = PTR_ERR(to);
- if (IS_ERR(to))
- goto out_putname;
- error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
+ error = user_path_parent(newdfd, newname, &nd, &to);
if (error)
- goto out;
+ goto out_putname;
+
dentry = lookup_create(&nd, 0);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
@@ -2457,14 +2359,13 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
error = mnt_want_write(nd.path.mnt);
if (error)
goto out_dput;
- error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO);
+ error = vfs_symlink(nd.path.dentry->d_inode, dentry, from);
mnt_drop_write(nd.path.mnt);
out_dput:
dput(dentry);
out_unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
path_put(&nd.path);
-out:
putname(to);
out_putname:
putname(from);
@@ -2484,7 +2385,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
if (!inode)
return -ENOENT;
- error = may_create(dir, new_dentry, NULL);
+ error = may_create(dir, new_dentry);
if (error)
return error;
@@ -2498,19 +2399,19 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
return -EPERM;
if (!dir->i_op || !dir->i_op->link)
return -EPERM;
- if (S_ISDIR(old_dentry->d_inode->i_mode))
+ if (S_ISDIR(inode->i_mode))
return -EPERM;
error = security_inode_link(old_dentry, dir, new_dentry);
if (error)
return error;
- mutex_lock(&old_dentry->d_inode->i_mutex);
+ mutex_lock(&inode->i_mutex);
DQUOT_INIT(dir);
error = dir->i_op->link(old_dentry, dir, new_dentry);
- mutex_unlock(&old_dentry->d_inode->i_mutex);
+ mutex_unlock(&inode->i_mutex);
if (!error)
- fsnotify_link(dir, old_dentry->d_inode, new_dentry);
+ fsnotify_link(dir, inode, new_dentry);
return error;
}
@@ -2528,27 +2429,25 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
int flags)
{
struct dentry *new_dentry;
- struct nameidata nd, old_nd;
+ struct nameidata nd;
+ struct path old_path;
int error;
- char * to;
+ char *to;
if ((flags & ~AT_SYMLINK_FOLLOW) != 0)
return -EINVAL;
- to = getname(newname);
- if (IS_ERR(to))
- return PTR_ERR(to);
-
- error = __user_walk_fd(olddfd, oldname,
- flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
- &old_nd);
+ error = user_path_at(olddfd, oldname,
+ flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
+ &old_path);
if (error)
- goto exit;
- error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
+ return error;
+
+ error = user_path_parent(newdfd, newname, &nd, &to);
if (error)
goto out;
error = -EXDEV;
- if (old_nd.path.mnt != nd.path.mnt)
+ if (old_path.mnt != nd.path.mnt)
goto out_release;
new_dentry = lookup_create(&nd, 0);
error = PTR_ERR(new_dentry);
@@ -2557,7 +2456,7 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
error = mnt_want_write(nd.path.mnt);
if (error)
goto out_dput;
- error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry);
+ error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry);
mnt_drop_write(nd.path.mnt);
out_dput:
dput(new_dentry);
@@ -2565,10 +2464,9 @@ out_unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
out_release:
path_put(&nd.path);
-out:
- path_put(&old_nd.path);
-exit:
putname(to);
+out:
+ path_put(&old_path);
return error;
}
@@ -2621,7 +2519,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
* we'll need to flip '..'.
*/
if (new_dir != old_dir) {
- error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
+ error = inode_permission(old_dentry->d_inode, MAY_WRITE);
if (error)
return error;
}
@@ -2696,7 +2594,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
return error;
if (!new_dentry->d_inode)
- error = may_create(new_dir, new_dentry, NULL);
+ error = may_create(new_dir, new_dentry);
else
error = may_delete(new_dir, new_dentry, is_dir);
if (error)
@@ -2724,20 +2622,22 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
return error;
}
-static int do_rename(int olddfd, const char *oldname,
- int newdfd, const char *newname)
+asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
+ int newdfd, const char __user *newname)
{
- int error = 0;
- struct dentry * old_dir, * new_dir;
- struct dentry * old_dentry, *new_dentry;
- struct dentry * trap;
+ struct dentry *old_dir, *new_dir;
+ struct dentry *old_dentry, *new_dentry;
+ struct dentry *trap;
struct nameidata oldnd, newnd;
+ char *from;
+ char *to;
+ int error;
- error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd);
+ error = user_path_parent(olddfd, oldname, &oldnd, &from);
if (error)
goto exit;
- error = do_path_lookup(newdfd, newname, LOOKUP_PARENT, &newnd);
+ error = user_path_parent(newdfd, newname, &newnd, &to);
if (error)
goto exit1;
@@ -2799,29 +2699,11 @@ exit3:
unlock_rename(new_dir, old_dir);
exit2:
path_put(&newnd.path);
+ putname(to);
exit1:
path_put(&oldnd.path);
-exit:
- return error;
-}
-
-asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
- int newdfd, const char __user *newname)
-{
- int error;
- char * from;
- char * to;
-
- from = getname(oldname);
- if(IS_ERR(from))
- return PTR_ERR(from);
- to = getname(newname);
- error = PTR_ERR(to);
- if (!IS_ERR(to)) {
- error = do_rename(olddfd, from, newdfd, to);
- putname(to);
- }
putname(from);
+exit:
return error;
}
@@ -2959,8 +2841,7 @@ const struct inode_operations page_symlink_inode_operations = {
.put_link = page_put_link,
};
-EXPORT_SYMBOL(__user_walk);
-EXPORT_SYMBOL(__user_walk_fd);
+EXPORT_SYMBOL(user_path_at);
EXPORT_SYMBOL(follow_down);
EXPORT_SYMBOL(follow_up);
EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
@@ -2975,7 +2856,7 @@ EXPORT_SYMBOL(page_symlink);
EXPORT_SYMBOL(page_symlink_inode_operations);
EXPORT_SYMBOL(path_lookup);
EXPORT_SYMBOL(vfs_path_lookup);
-EXPORT_SYMBOL(permission);
+EXPORT_SYMBOL(inode_permission);
EXPORT_SYMBOL(vfs_permission);
EXPORT_SYMBOL(file_permission);
EXPORT_SYMBOL(unlock_rename);
diff --git a/fs/namespace.c b/fs/namespace.c
index 4f6f7635b59..6e283c93b50 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -112,9 +112,13 @@ struct vfsmount *alloc_vfsmnt(const char *name)
int err;
err = mnt_alloc_id(mnt);
- if (err) {
- kmem_cache_free(mnt_cache, mnt);
- return NULL;
+ if (err)
+ goto out_free_cache;
+
+ if (name) {
+ mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
+ if (!mnt->mnt_devname)
+ goto out_free_id;
}
atomic_set(&mnt->mnt_count, 1);
@@ -127,16 +131,14 @@ struct vfsmount *alloc_vfsmnt(const char *name)
INIT_LIST_HEAD(&mnt->mnt_slave_list);
INIT_LIST_HEAD(&mnt->mnt_slave);
atomic_set(&mnt->__mnt_writers, 0);
- if (name) {
- int size = strlen(name) + 1;
- char *newname = kmalloc(size, GFP_KERNEL);
- if (newname) {
- memcpy(newname, name, size);
- mnt->mnt_devname = newname;
- }
- }
}
return mnt;
+
+out_free_id:
+ mnt_free_id(mnt);
+out_free_cache:
+ kmem_cache_free(mnt_cache, mnt);
+ return NULL;
}
/*
@@ -309,10 +311,9 @@ static void handle_write_count_underflow(struct vfsmount *mnt)
*/
if ((atomic_read(&mnt->__mnt_writers) < 0) &&
!(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
- printk(KERN_DEBUG "leak detected on mount(%p) writers "
+ WARN(1, KERN_DEBUG "leak detected on mount(%p) writers "
"count: %d\n",
mnt, atomic_read(&mnt->__mnt_writers));
- WARN_ON(1);
/* use the flag to keep the dmesg spam down */
mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
}
@@ -1129,27 +1130,27 @@ static int do_umount(struct vfsmount *mnt, int flags)
asmlinkage long sys_umount(char __user * name, int flags)
{
- struct nameidata nd;
+ struct path path;
int retval;
- retval = __user_walk(name, LOOKUP_FOLLOW, &nd);
+ retval = user_path(name, &path);
if (retval)
goto out;
retval = -EINVAL;
- if (nd.path.dentry != nd.path.mnt->mnt_root)
+ if (path.dentry != path.mnt->mnt_root)
goto dput_and_out;
- if (!check_mnt(nd.path.mnt))
+ if (!check_mnt(path.mnt))
goto dput_and_out;
retval = -EPERM;
if (!capable(CAP_SYS_ADMIN))
goto dput_and_out;
- retval = do_umount(nd.path.mnt, flags);
+ retval = do_umount(path.mnt, flags);
dput_and_out:
/* we mustn't call path_put() as that would clear mnt_expiry_mark */
- dput(nd.path.dentry);
- mntput_no_expire(nd.path.mnt);
+ dput(path.dentry);
+ mntput_no_expire(path.mnt);
out:
return retval;
}
@@ -1666,31 +1667,31 @@ static noinline int do_new_mount(struct nameidata *nd, char *type, int flags,
if (IS_ERR(mnt))
return PTR_ERR(mnt);
- return do_add_mount(mnt, nd, mnt_flags, NULL);
+ return do_add_mount(mnt, &nd->path, mnt_flags, NULL);
}
/*
* add a mount into a namespace's mount tree
* - provide the option of adding the new mount to an expiration list
*/
-int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
+int do_add_mount(struct vfsmount *newmnt, struct path *path,
int mnt_flags, struct list_head *fslist)
{
int err;
down_write(&namespace_sem);
/* Something was mounted here while we slept */
- while (d_mountpoint(nd->path.dentry) &&
- follow_down(&nd->path.mnt, &nd->path.dentry))
+ while (d_mountpoint(path->dentry) &&
+ follow_down(&path->mnt, &path->dentry))
;
err = -EINVAL;
- if (!check_mnt(nd->path.mnt))
+ if (!check_mnt(path->mnt))
goto unlock;
/* Refuse the same filesystem on the same mount point */
err = -EBUSY;
- if (nd->path.mnt->mnt_sb == newmnt->mnt_sb &&
- nd->path.mnt->mnt_root == nd->path.dentry)
+ if (path->mnt->mnt_sb == newmnt->mnt_sb &&
+ path->mnt->mnt_root == path->dentry)
goto unlock;
err = -EINVAL;
@@ -1698,7 +1699,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
goto unlock;
newmnt->mnt_flags = mnt_flags;
- if ((err = graft_tree(newmnt, &nd->path)))
+ if ((err = graft_tree(newmnt, path)))
goto unlock;
if (fslist) /* add to the specified expiration list */
@@ -1973,7 +1974,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
struct fs_struct *fs)
{
struct mnt_namespace *new_ns;
- struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
+ struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
struct vfsmount *p, *q;
new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
@@ -2016,10 +2017,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
pwdmnt = p;
fs->pwd.mnt = mntget(q);
}
- if (p == fs->altroot.mnt) {
- altrootmnt = p;
- fs->altroot.mnt = mntget(q);
- }
}
p = next_mnt(p, mnt_ns->root);
q = next_mnt(q, new_ns->root);
@@ -2030,8 +2027,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
mntput(rootmnt);
if (pwdmnt)
mntput(pwdmnt);
- if (altrootmnt)
- mntput(altrootmnt);
return new_ns;
}
@@ -2184,28 +2179,26 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
const char __user * put_old)
{
struct vfsmount *tmp;
- struct nameidata new_nd, old_nd;
- struct path parent_path, root_parent, root;
+ struct path new, old, parent_path, root_parent, root;
int error;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- error = __user_walk(new_root, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
- &new_nd);
+ error = user_path_dir(new_root, &new);
if (error)
goto out0;
error = -EINVAL;
- if (!check_mnt(new_nd.path.mnt))
+ if (!check_mnt(new.mnt))
goto out1;
- error = __user_walk(put_old, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old_nd);
+ error = user_path_dir(put_old, &old);
if (error)
goto out1;
- error = security_sb_pivotroot(&old_nd.path, &new_nd.path);
+ error = security_sb_pivotroot(&old, &new);
if (error) {
- path_put(&old_nd.path);
+ path_put(&old);
goto out1;
}
@@ -2214,69 +2207,69 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
path_get(&current->fs->root);
read_unlock(&current->fs->lock);
down_write(&namespace_sem);
- mutex_lock(&old_nd.path.dentry->d_inode->i_mutex);
+ mutex_lock(&old.dentry->d_inode->i_mutex);
error = -EINVAL;
- if (IS_MNT_SHARED(old_nd.path.mnt) ||
- IS_MNT_SHARED(new_nd.path.mnt->mnt_parent) ||
+ if (IS_MNT_SHARED(old.mnt) ||
+ IS_MNT_SHARED(new.mnt->mnt_parent) ||
IS_MNT_SHARED(root.mnt->mnt_parent))
goto out2;
if (!check_mnt(root.mnt))
goto out2;
error = -ENOENT;
- if (IS_DEADDIR(new_nd.path.dentry->d_inode))
+ if (IS_DEADDIR(new.dentry->d_inode))
goto out2;
- if (d_unhashed(new_nd.path.dentry) && !IS_ROOT(new_nd.path.dentry))
+ if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry))
goto out2;
- if (d_unhashed(old_nd.path.dentry) && !IS_ROOT(old_nd.path.dentry))
+ if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry))
goto out2;
error = -EBUSY;
- if (new_nd.path.mnt == root.mnt ||
- old_nd.path.mnt == root.mnt)
+ if (new.mnt == root.mnt ||
+ old.mnt == root.mnt)
goto out2; /* loop, on the same file system */
error = -EINVAL;
if (root.mnt->mnt_root != root.dentry)
goto out2; /* not a mountpoint */
if (root.mnt->mnt_parent == root.mnt)
goto out2; /* not attached */
- if (new_nd.path.mnt->mnt_root != new_nd.path.dentry)
+ if (new.mnt->mnt_root != new.dentry)
goto out2; /* not a mountpoint */
- if (new_nd.path.mnt->mnt_parent == new_nd.path.mnt)
+ if (new.mnt->mnt_parent == new.mnt)
goto out2; /* not attached */
/* make sure we can reach put_old from new_root */
- tmp = old_nd.path.mnt;
+ tmp = old.mnt;
spin_lock(&vfsmount_lock);
- if (tmp != new_nd.path.mnt) {
+ if (tmp != new.mnt) {
for (;;) {
if (tmp->mnt_parent == tmp)
goto out3; /* already mounted on put_old */
- if (tmp->mnt_parent == new_nd.path.mnt)
+ if (tmp->mnt_parent == new.mnt)
break;
tmp = tmp->mnt_parent;
}
- if (!is_subdir(tmp->mnt_mountpoint, new_nd.path.dentry))
+ if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
goto out3;
- } else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry))
+ } else if (!is_subdir(old.dentry, new.dentry))
goto out3;
- detach_mnt(new_nd.path.mnt, &parent_path);
+ detach_mnt(new.mnt, &parent_path);
detach_mnt(root.mnt, &root_parent);
/* mount old root on put_old */
- attach_mnt(root.mnt, &old_nd.path);
+ attach_mnt(root.mnt, &old);
/* mount new_root on / */
- attach_mnt(new_nd.path.mnt, &root_parent);
+ attach_mnt(new.mnt, &root_parent);
touch_mnt_namespace(current->nsproxy->mnt_ns);
spin_unlock(&vfsmount_lock);
- chroot_fs_refs(&root, &new_nd.path);
- security_sb_post_pivotroot(&root, &new_nd.path);
+ chroot_fs_refs(&root, &new);
+ security_sb_post_pivotroot(&root, &new);
error = 0;
path_put(&root_parent);
path_put(&parent_path);
out2:
- mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex);
+ mutex_unlock(&old.dentry->d_inode->i_mutex);
up_write(&namespace_sem);
path_put(&root);
- path_put(&old_nd.path);
+ path_put(&old);
out1:
- path_put(&new_nd.path);
+ path_put(&new);
out0:
return error;
out3:
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 011ef0b6d2d..07e9715b865 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -266,7 +266,7 @@ leave_me:;
static int
-__ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd)
+__ncp_lookup_validate(struct dentry *dentry)
{
struct ncp_server *server;
struct dentry *parent;
@@ -340,7 +340,7 @@ ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd)
{
int res;
lock_kernel();
- res = __ncp_lookup_validate(dentry, nd);
+ res = __ncp_lookup_validate(dentry);
unlock_kernel();
return res;
}
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 2e5ab1204de..d642f0e5b36 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -64,7 +64,7 @@ static void ncp_destroy_inode(struct inode *inode)
kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
}
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 28a238dab23..74f92b717f7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1884,7 +1884,7 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
return status;
nfs_access_add_cache(inode, &cache);
out:
- if ((cache.mask & mask) == mask)
+ if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
return 0;
return -EACCES;
}
@@ -1907,17 +1907,17 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
}
-int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int nfs_permission(struct inode *inode, int mask)
{
struct rpc_cred *cred;
int res = 0;
nfs_inc_stats(inode, NFSIOS_VFSACCESS);
- if (mask == 0)
+ if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
goto out;
/* Is this sys_access() ? */
- if (nd != NULL && (nd->flags & LOOKUP_ACCESS))
+ if (mask & MAY_ACCESS)
goto force_lookup;
switch (inode->i_mode & S_IFMT) {
@@ -1926,8 +1926,7 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
case S_IFREG:
/* NFSv4 has atomic_open... */
if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)
- && nd != NULL
- && (nd->flags & LOOKUP_OPEN))
+ && (mask & MAY_OPEN))
goto out;
break;
case S_IFDIR:
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index df23f987da6..52daefa2f52 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1242,7 +1242,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
#endif
}
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
{
struct nfs_inode *nfsi = (struct nfs_inode *) foo;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 2f285ef7639..66df08dd1ca 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -129,7 +129,7 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
goto out_err;
mntget(mnt);
- err = do_add_mount(mnt, nd, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
+ err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
&nfs_automount_list);
if (err < 0) {
mntput(mnt);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 1b94e3650f5..9abcd2b329f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1718,9 +1718,9 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
* ones were explicitly specified. Fall back to legacy behavior and
* just return success.
*/
- if ((nfsvers == 4 && options4->version == 1) ||
- (nfsvers <= 3 && options->version >= 1 &&
- options->version <= 6))
+ if ((nfsvers == 4 && (!options4 || options4->version == 1)) ||
+ (nfsvers <= 3 && (!options || (options->version >= 1 &&
+ options->version <= 6))))
return 0;
data = kzalloc(sizeof(*data), GFP_KERNEL);
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 3adf8b26646..f089e5839d7 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -95,10 +95,11 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
static void nfs_async_unlink_release(void *calldata)
{
struct nfs_unlinkdata *data = calldata;
+ struct super_block *sb = data->dir->i_sb;
nfs_dec_sillycount(data->dir);
- nfs_sb_deactive(NFS_SERVER(data->dir));
nfs_free_unlinkdata(data);
+ nfs_sb_deactive(NFS_SB(sb));
}
static const struct rpc_call_ops nfs_unlink_ops = {
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 33bfcf09db4..9dc036f1835 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1023,7 +1023,7 @@ exp_export(struct nfsctl_export *nxp)
/* Look up the dentry */
err = path_lookup(nxp->ex_path, 0, &nd);
if (err)
- goto out_unlock;
+ goto out_put_clp;
err = -EINVAL;
exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL);
@@ -1090,9 +1090,9 @@ finish:
exp_put(exp);
if (fsid_key && !IS_ERR(fsid_key))
cache_put(&fsid_key->h, &svc_expkey_cache);
- if (clp)
- auth_domain_put(clp);
path_put(&nd.path);
+out_put_clp:
+ auth_domain_put(clp);
out_unlock:
exp_writeunlock();
out:
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 6b6225ac492..15c6faeec77 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -19,6 +19,13 @@
#define NFSDDBG_FACILITY NFSDDBG_LOCKD
+#ifdef CONFIG_LOCKD_V4
+#define nlm_stale_fh nlm4_stale_fh
+#define nlm_failed nlm4_failed
+#else
+#define nlm_stale_fh nlm_lck_denied_nolocks
+#define nlm_failed nlm_lck_denied_nolocks
+#endif
/*
* Note: we hold the dentry use count while the file is open.
*/
@@ -47,12 +54,10 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
return 0;
case nfserr_dropit:
return nlm_drop_reply;
-#ifdef CONFIG_LOCKD_V4
case nfserr_stale:
- return nlm4_stale_fh;
-#endif
+ return nlm_stale_fh;
default:
- return nlm_lck_denied;
+ return nlm_failed;
}
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index eef1629806f..2e51adac65d 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -851,7 +851,7 @@ struct nfsd4_operation {
static struct nfsd4_operation nfsd4_ops[];
-static inline char *nfsd4_op_name(unsigned opnum);
+static const char *nfsd4_op_name(unsigned opnum);
/*
* COMPOUND call.
@@ -1116,8 +1116,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
},
};
-static inline char *
-nfsd4_op_name(unsigned opnum)
+static const char *nfsd4_op_name(unsigned opnum)
{
if (opnum < ARRAY_SIZE(nfsd4_ops))
return nfsd4_ops[opnum].op_name;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1955a2702e6..c53e65f8f3a 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -12,6 +12,7 @@
#include <linux/time.h>
#include <linux/errno.h>
#include <linux/fs.h>
+#include <linux/namei.h>
#include <linux/fcntl.h>
#include <linux/net.h>
#include <linux/in.h>
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index f45451eb1e3..ea37c96f044 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -51,7 +51,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
/* make sure parents give x permission to user */
int err;
parent = dget_parent(tdentry);
- err = permission(parent->d_inode, MAY_EXEC, NULL);
+ err = inode_permission(parent->d_inode, MAY_EXEC);
if (err < 0) {
dput(parent);
break;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 0f4481e0502..18060bed526 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1516,7 +1516,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct dentry *dentry, *dnew;
__be32 err, cerr;
int host_err;
- umode_t mode;
err = nfserr_noent;
if (!flen || !plen)
@@ -1535,11 +1534,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (IS_ERR(dnew))
goto out_nfserr;
- mode = S_IALLUGO;
- /* Only the MODE ATTRibute is even vaguely meaningful */
- if (iap && (iap->ia_valid & ATTR_MODE))
- mode = iap->ia_mode & S_IALLUGO;
-
host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
if (host_err)
goto out_nfserr;
@@ -1551,11 +1545,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
else {
strncpy(path_alloced, path, plen);
path_alloced[plen] = 0;
- host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode);
+ host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced);
kfree(path_alloced);
}
} else
- host_err = vfs_symlink(dentry->d_inode, dnew, path, mode);
+ host_err = vfs_symlink(dentry->d_inode, dnew, path);
if (!host_err) {
if (EX_ISSYNC(fhp->fh_export))
@@ -1959,12 +1953,12 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
return 0;
/* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
- err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL);
+ err = inode_permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC));
/* Allow read access to binaries even when mode 111 */
if (err == -EACCES && S_ISREG(inode->i_mode) &&
acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
- err = permission(inode, MAY_EXEC, NULL);
+ err = inode_permission(inode, MAY_EXEC);
return err? nfserrno(err) : 0;
}
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 00e9ccde8e4..b38f944f066 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1194,7 +1194,7 @@ lock_retry_remap:
tbh = bhs[i];
if (!tbh)
continue;
- if (unlikely(test_set_buffer_locked(tbh)))
+ if (!trylock_buffer(tbh))
BUG();
/* The buffer dirty state is now irrelevant, just clean it. */
clear_buffer_dirty(tbh);
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 33ff314cc50..9669541d011 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -665,7 +665,7 @@ lock_retry_remap:
for (i = 0; i < nr_bhs; i++) {
struct buffer_head *tbh = bhs[i];
- if (unlikely(test_set_buffer_locked(tbh)))
+ if (!trylock_buffer(tbh))
continue;
if (unlikely(buffer_uptodate(tbh))) {
unlock_buffer(tbh);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 3c5550cd11d..d020866d423 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2118,7 +2118,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
goto out;
if (!count)
goto out;
- err = remove_suid(file->f_path.dentry);
+ err = file_remove_suid(file);
if (err)
goto out;
file_update_time(file);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 790defb847e..17d32ca6bc3 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -586,7 +586,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
struct buffer_head *tbh = bhs[i_bhs];
- if (unlikely(test_set_buffer_locked(tbh)))
+ if (!trylock_buffer(tbh))
BUG();
BUG_ON(!buffer_uptodate(tbh));
clear_buffer_dirty(tbh);
@@ -779,7 +779,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
struct buffer_head *tbh = bhs[i_bhs];
- if (unlikely(test_set_buffer_locked(tbh)))
+ if (!trylock_buffer(tbh))
BUG();
BUG_ON(!buffer_uptodate(tbh));
clear_buffer_dirty(tbh);
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 3e76f3b216b..4a46743b507 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3080,7 +3080,7 @@ struct kmem_cache *ntfs_inode_cache;
struct kmem_cache *ntfs_big_inode_cache;
/* Init once constructor for the inode slab cache. */
-static void ntfs_big_inode_init_once(struct kmem_cache *cachep, void *foo)
+static void ntfs_big_inode_init_once(void *foo)
{
ntfs_inode *ni = (ntfs_inode *)foo;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1db080135c6..506c24fb507 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1073,12 +1073,15 @@ static void ocfs2_write_failure(struct inode *inode,
for(i = 0; i < wc->w_num_pages; i++) {
tmppage = wc->w_pages[i];
- if (ocfs2_should_order_data(inode))
- walk_page_buffers(wc->w_handle, page_buffers(tmppage),
- from, to, NULL,
- ocfs2_journal_dirty_data);
-
- block_commit_write(tmppage, from, to);
+ if (page_has_buffers(tmppage)) {
+ if (ocfs2_should_order_data(inode))
+ walk_page_buffers(wc->w_handle,
+ page_buffers(tmppage),
+ from, to, NULL,
+ ocfs2_journal_dirty_data);
+
+ block_commit_write(tmppage, from, to);
+ }
}
}
@@ -1901,12 +1904,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
to = PAGE_CACHE_SIZE;
}
- if (ocfs2_should_order_data(inode))
- walk_page_buffers(wc->w_handle, page_buffers(tmppage),
- from, to, NULL,
- ocfs2_journal_dirty_data);
-
- block_commit_write(tmppage, from, to);
+ if (page_has_buffers(tmppage)) {
+ if (ocfs2_should_order_data(inode))
+ walk_page_buffers(wc->w_handle,
+ page_buffers(tmppage),
+ from, to, NULL,
+ ocfs2_journal_dirty_data);
+ block_commit_write(tmppage, from, to);
+ }
}
out_write_size:
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index e48aba698b7..533a789c3ef 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -267,8 +267,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
return writelen;
}
-static void dlmfs_init_once(struct kmem_cache *cachep,
- void *foo)
+static void dlmfs_init_once(void *foo)
{
struct dlmfs_inode_private *ip =
(struct dlmfs_inode_private *) foo;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index e8514e8b6ce..ec2ed15c3da 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1176,7 +1176,7 @@ bail:
return err;
}
-int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
+int ocfs2_permission(struct inode *inode, int mask)
{
int ret;
@@ -1766,8 +1766,8 @@ out_inode_unlock:
out_rw_unlock:
ocfs2_rw_unlock(inode, 1);
- mutex_unlock(&inode->i_mutex);
out:
+ mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 048ddcaf5c8..1e27b4d017e 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -62,8 +62,7 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
-int ocfs2_permission(struct inode *inode, int mask,
- struct nameidata *nd);
+int ocfs2_permission(struct inode *inode, int mask);
int ocfs2_should_update_atime(struct inode *inode,
struct vfsmount *vfsmnt);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index a8c19cb3cfd..7a37240f7a3 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -57,7 +57,7 @@ static int __ocfs2_recovery_thread(void *arg);
static int ocfs2_commit_cache(struct ocfs2_super *osb);
static int ocfs2_wait_on_mount(struct ocfs2_super *osb);
static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
- int dirty);
+ int dirty, int replayed);
static int ocfs2_trylock_journal(struct ocfs2_super *osb,
int slot_num);
static int ocfs2_recover_orphans(struct ocfs2_super *osb,
@@ -562,8 +562,18 @@ done:
return status;
}
+static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
+{
+ le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
+}
+
+static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
+{
+ return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
+}
+
static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
- int dirty)
+ int dirty, int replayed)
{
int status;
unsigned int flags;
@@ -593,6 +603,9 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
flags &= ~OCFS2_JOURNAL_DIRTY_FL;
fe->id1.journal1.ij_flags = cpu_to_le32(flags);
+ if (replayed)
+ ocfs2_bump_recovery_generation(fe);
+
status = ocfs2_write_block(osb, bh, journal->j_inode);
if (status < 0)
mlog_errno(status);
@@ -667,7 +680,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
* Do not toggle if flush was unsuccessful otherwise
* will leave dirty metadata in a "clean" journal
*/
- status = ocfs2_journal_toggle_dirty(osb, 0);
+ status = ocfs2_journal_toggle_dirty(osb, 0, 0);
if (status < 0)
mlog_errno(status);
}
@@ -710,7 +723,7 @@ static void ocfs2_clear_journal_error(struct super_block *sb,
}
}
-int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
+int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
{
int status = 0;
struct ocfs2_super *osb;
@@ -729,7 +742,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
- status = ocfs2_journal_toggle_dirty(osb, 1);
+ status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
if (status < 0) {
mlog_errno(status);
goto done;
@@ -771,7 +784,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
goto bail;
}
- status = ocfs2_journal_toggle_dirty(journal->j_osb, 0);
+ status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
if (status < 0)
mlog_errno(status);
@@ -1034,6 +1047,12 @@ restart:
spin_unlock(&osb->osb_lock);
mlog(0, "All nodes recovered\n");
+ /* Refresh all journal recovery generations from disk */
+ status = ocfs2_check_journals_nolocks(osb);
+ status = (status == -EROFS) ? 0 : status;
+ if (status < 0)
+ mlog_errno(status);
+
ocfs2_super_unlock(osb, 1);
/* We always run recovery on our own orphan dir - the dead
@@ -1096,6 +1115,42 @@ out:
mlog_exit_void();
}
+static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
+ int slot_num,
+ struct buffer_head **bh,
+ struct inode **ret_inode)
+{
+ int status = -EACCES;
+ struct inode *inode = NULL;
+
+ BUG_ON(slot_num >= osb->max_slots);
+
+ inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
+ slot_num);
+ if (!inode || is_bad_inode(inode)) {
+ mlog_errno(status);
+ goto bail;
+ }
+ SET_INODE_JOURNAL(inode);
+
+ status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+ status = 0;
+
+bail:
+ if (inode) {
+ if (status || !ret_inode)
+ iput(inode);
+ else
+ *ret_inode = inode;
+ }
+ return status;
+}
+
/* Does the actual journal replay and marks the journal inode as
* clean. Will only replay if the journal inode is marked dirty. */
static int ocfs2_replay_journal(struct ocfs2_super *osb,
@@ -1109,22 +1164,36 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
struct ocfs2_dinode *fe;
journal_t *journal = NULL;
struct buffer_head *bh = NULL;
+ u32 slot_reco_gen;
- inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
- slot_num);
- if (inode == NULL) {
- status = -EACCES;
+ status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
+ if (status) {
mlog_errno(status);
goto done;
}
- if (is_bad_inode(inode)) {
- status = -EACCES;
- iput(inode);
- inode = NULL;
- mlog_errno(status);
+
+ fe = (struct ocfs2_dinode *)bh->b_data;
+ slot_reco_gen = ocfs2_get_recovery_generation(fe);
+ brelse(bh);
+ bh = NULL;
+
+ /*
+ * As the fs recovery is asynchronous, there is a small chance that
+ * another node mounted (and recovered) the slot before the recovery
+ * thread could get the lock. To handle that, we dirty read the journal
+ * inode for that slot to get the recovery generation. If it is
+ * different than what we expected, the slot has been recovered.
+ * If not, it needs recovery.
+ */
+ if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
+ mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num,
+ osb->slot_recovery_generations[slot_num], slot_reco_gen);
+ osb->slot_recovery_generations[slot_num] = slot_reco_gen;
+ status = -EBUSY;
goto done;
}
- SET_INODE_JOURNAL(inode);
+
+ /* Continue with recovery as the journal has not yet been recovered */
status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
if (status < 0) {
@@ -1138,9 +1207,12 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
fe = (struct ocfs2_dinode *) bh->b_data;
flags = le32_to_cpu(fe->id1.journal1.ij_flags);
+ slot_reco_gen = ocfs2_get_recovery_generation(fe);
if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
mlog(0, "No recovery required for node %d\n", node_num);
+ /* Refresh recovery generation for the slot */
+ osb->slot_recovery_generations[slot_num] = slot_reco_gen;
goto done;
}
@@ -1188,6 +1260,11 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
flags &= ~OCFS2_JOURNAL_DIRTY_FL;
fe->id1.journal1.ij_flags = cpu_to_le32(flags);
+ /* Increment recovery generation to indicate successful recovery */
+ ocfs2_bump_recovery_generation(fe);
+ osb->slot_recovery_generations[slot_num] =
+ ocfs2_get_recovery_generation(fe);
+
status = ocfs2_write_block(osb, bh, inode);
if (status < 0)
mlog_errno(status);
@@ -1252,6 +1329,13 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
status = ocfs2_replay_journal(osb, node_num, slot_num);
if (status < 0) {
+ if (status == -EBUSY) {
+ mlog(0, "Skipping recovery for slot %u (node %u) "
+ "as another node has recovered it\n", slot_num,
+ node_num);
+ status = 0;
+ goto done;
+ }
mlog_errno(status);
goto done;
}
@@ -1334,12 +1418,29 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
{
unsigned int node_num;
int status, i;
+ struct buffer_head *bh = NULL;
+ struct ocfs2_dinode *di;
/* This is called with the super block cluster lock, so we
* know that the slot map can't change underneath us. */
spin_lock(&osb->osb_lock);
for (i = 0; i < osb->max_slots; i++) {
+ /* Read journal inode to get the recovery generation */
+ status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
+ if (status) {
+ mlog_errno(status);
+ goto bail;
+ }
+ di = (struct ocfs2_dinode *)bh->b_data;
+ osb->slot_recovery_generations[i] =
+ ocfs2_get_recovery_generation(di);
+ brelse(bh);
+ bh = NULL;
+
+ mlog(0, "Slot %u recovery generation is %u\n", i,
+ osb->slot_recovery_generations[i]);
+
if (i == osb->slot_num)
continue;
@@ -1603,49 +1704,41 @@ static int ocfs2_commit_thread(void *arg)
return 0;
}
-/* Look for a dirty journal without taking any cluster locks. Used for
- * hard readonly access to determine whether the file system journals
- * require recovery. */
+/* Reads all the journal inodes without taking any cluster locks. Used
+ * for hard readonly access to determine whether any journal requires
+ * recovery. Also used to refresh the recovery generation numbers after
+ * a journal has been recovered by another node.
+ */
int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
{
int ret = 0;
unsigned int slot;
- struct buffer_head *di_bh;
+ struct buffer_head *di_bh = NULL;
struct ocfs2_dinode *di;
- struct inode *journal = NULL;
+ int journal_dirty = 0;
for(slot = 0; slot < osb->max_slots; slot++) {
- journal = ocfs2_get_system_file_inode(osb,
- JOURNAL_SYSTEM_INODE,
- slot);
- if (!journal || is_bad_inode(journal)) {
- ret = -EACCES;
- mlog_errno(ret);
- goto out;
- }
-
- di_bh = NULL;
- ret = ocfs2_read_block(osb, OCFS2_I(journal)->ip_blkno, &di_bh,
- 0, journal);
- if (ret < 0) {
+ ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
+ if (ret) {
mlog_errno(ret);
goto out;
}
di = (struct ocfs2_dinode *) di_bh->b_data;
+ osb->slot_recovery_generations[slot] =
+ ocfs2_get_recovery_generation(di);
+
if (le32_to_cpu(di->id1.journal1.ij_flags) &
OCFS2_JOURNAL_DIRTY_FL)
- ret = -EROFS;
+ journal_dirty = 1;
brelse(di_bh);
- if (ret)
- break;
+ di_bh = NULL;
}
out:
- if (journal)
- iput(journal);
-
+ if (journal_dirty)
+ ret = -EROFS;
return ret;
}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index db82be2532e..2178ebffa05 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -161,7 +161,8 @@ int ocfs2_journal_init(struct ocfs2_journal *journal,
void ocfs2_journal_shutdown(struct ocfs2_super *osb);
int ocfs2_journal_wipe(struct ocfs2_journal *journal,
int full);
-int ocfs2_journal_load(struct ocfs2_journal *journal, int local);
+int ocfs2_journal_load(struct ocfs2_journal *journal, int local,
+ int replayed);
int ocfs2_check_journals_nolocks(struct ocfs2_super *osb);
void ocfs2_recovery_thread(struct ocfs2_super *osb,
int node_num);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 1cb814be8ef..7f625f2b111 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -204,6 +204,8 @@ struct ocfs2_super
struct ocfs2_slot_info *slot_info;
+ u32 *slot_recovery_generations;
+
spinlock_t node_map_lock;
u64 root_blkno;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 3f194517762..4f619850ccf 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -660,7 +660,10 @@ struct ocfs2_dinode {
struct { /* Info for journal system
inodes */
__le32 ij_flags; /* Mounted, version, etc. */
- __le32 ij_pad;
+ __le32 ij_recovery_generation; /* Incremented when the
+ journal is recovered
+ after an unclean
+ shutdown */
} journal1;
} id1; /* Inode type dependant 1 */
/*C0*/ union {
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index ccecfe5094f..88255d3f52b 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1118,7 +1118,7 @@ bail:
return status;
}
-static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data)
+static void ocfs2_inode_init_once(void *data)
{
struct ocfs2_inode_info *oi = data;
@@ -1442,6 +1442,15 @@ static int ocfs2_initialize_super(struct super_block *sb,
}
mlog(0, "max_slots for this device: %u\n", osb->max_slots);
+ osb->slot_recovery_generations =
+ kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations),
+ GFP_KERNEL);
+ if (!osb->slot_recovery_generations) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto bail;
+ }
+
init_waitqueue_head(&osb->osb_wipe_event);
osb->osb_orphan_wipes = kcalloc(osb->max_slots,
sizeof(*osb->osb_orphan_wipes),
@@ -1703,7 +1712,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
local = ocfs2_mount_local(osb);
/* will play back anything left in the journal. */
- status = ocfs2_journal_load(osb->journal, local);
+ status = ocfs2_journal_load(osb->journal, local, dirty);
if (status < 0) {
mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status);
goto finally;
@@ -1768,6 +1777,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
ocfs2_free_slot_info(osb);
kfree(osb->osb_orphan_wipes);
+ kfree(osb->slot_recovery_generations);
/* FIXME
* This belongs in journal shutdown, but because we have to
* allocate osb->journal at the start of ocfs2_initalize_osb(),
diff --git a/fs/omfs/Makefile b/fs/omfs/Makefile
new file mode 100644
index 00000000000..8b82b63f112
--- /dev/null
+++ b/fs/omfs/Makefile
@@ -0,0 +1,4 @@
+
+obj-$(CONFIG_OMFS_FS) += omfs.o
+
+omfs-y := bitmap.o dir.o file.o inode.o
diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c
new file mode 100644
index 00000000000..697663b01ba
--- /dev/null
+++ b/fs/omfs/bitmap.c
@@ -0,0 +1,192 @@
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <asm/div64.h>
+#include "omfs.h"
+
+unsigned long omfs_count_free(struct super_block *sb)
+{
+ unsigned int i;
+ unsigned long sum = 0;
+ struct omfs_sb_info *sbi = OMFS_SB(sb);
+ int nbits = sb->s_blocksize * 8;
+
+ for (i = 0; i < sbi->s_imap_size; i++)
+ sum += nbits - bitmap_weight(sbi->s_imap[i], nbits);
+
+ return sum;
+}
+
+/*
+ * Counts the run of zero bits starting at bit up to max.
+ * It handles the case where a run might spill over a buffer.
+ * Called with bitmap lock.
+ */
+static int count_run(unsigned long **addr, int nbits,
+ int addrlen, int bit, int max)
+{
+ int count = 0;
+ int x;
+
+ for (; addrlen > 0; addrlen--, addr++) {
+ x = find_next_bit(*addr, nbits, bit);
+ count += x - bit;
+
+ if (x < nbits || count > max)
+ return min(count, max);
+
+ bit = 0;
+ }
+ return min(count, max);
+}
+
+/*
+ * Sets or clears the run of count bits starting with bit.
+ * Called with bitmap lock.
+ */
+static int set_run(struct super_block *sb, int map,
+ int nbits, int bit, int count, int set)
+{
+ int i;
+ int err;
+ struct buffer_head *bh;
+ struct omfs_sb_info *sbi = OMFS_SB(sb);
+
+ err = -ENOMEM;
+ bh = sb_bread(sb, clus_to_blk(sbi, sbi->s_bitmap_ino) + map);
+ if (!bh)
+ goto out;
+
+ for (i = 0; i < count; i++, bit++) {
+ if (bit >= nbits) {
+ bit = 0;
+ map++;
+
+ mark_buffer_dirty(bh);
+ brelse(bh);
+ bh = sb_bread(sb,
+ clus_to_blk(sbi, sbi->s_bitmap_ino) + map);
+ if (!bh)
+ goto out;
+ }
+ if (set) {
+ set_bit(bit, sbi->s_imap[map]);
+ set_bit(bit, (unsigned long *)bh->b_data);
+ } else {
+ clear_bit(bit, sbi->s_imap[map]);
+ clear_bit(bit, (unsigned long *)bh->b_data);
+ }
+ }
+ mark_buffer_dirty(bh);
+ brelse(bh);
+ err = 0;
+out:
+ return err;
+}
+
+/*
+ * Tries to allocate exactly one block. Returns true if sucessful.
+ */
+int omfs_allocate_block(struct super_block *sb, u64 block)
+{
+ struct buffer_head *bh;
+ struct omfs_sb_info *sbi = OMFS_SB(sb);
+ int bits_per_entry = 8 * sb->s_blocksize;
+ int map, bit;
+ int ret = 0;
+ u64 tmp;
+
+ tmp = block;
+ bit = do_div(tmp, bits_per_entry);
+ map = tmp;
+
+ mutex_lock(&sbi->s_bitmap_lock);
+ if (map >= sbi->s_imap_size || test_and_set_bit(bit, sbi->s_imap[map]))
+ goto out;
+
+ if (sbi->s_bitmap_ino > 0) {
+ bh = sb_bread(sb, clus_to_blk(sbi, sbi->s_bitmap_ino) + map);
+ if (!bh)
+ goto out;
+
+ set_bit(bit, (unsigned long *)bh->b_data);
+ mark_buffer_dirty(bh);
+ brelse(bh);
+ }
+ ret = 1;
+out:
+ mutex_unlock(&sbi->s_bitmap_lock);
+ return ret;
+}
+
+
+/*
+ * Tries to allocate a set of blocks. The request size depends on the
+ * type: for inodes, we must allocate sbi->s_mirrors blocks, and for file
+ * blocks, we try to allocate sbi->s_clustersize, but can always get away
+ * with just one block.
+ */
+int omfs_allocate_range(struct super_block *sb,
+ int min_request,
+ int max_request,
+ u64 *return_block,
+ int *return_size)
+{
+ struct omfs_sb_info *sbi = OMFS_SB(sb);
+ int bits_per_entry = 8 * sb->s_blocksize;
+ int ret = 0;
+ int i, run, bit;
+
+ mutex_lock(&sbi->s_bitmap_lock);
+ for (i = 0; i < sbi->s_imap_size; i++) {
+ bit = 0;
+ while (bit < bits_per_entry) {
+ bit = find_next_zero_bit(sbi->s_imap[i], bits_per_entry,
+ bit);
+
+ if (bit == bits_per_entry)
+ break;
+
+ run = count_run(&sbi->s_imap[i], bits_per_entry,
+ sbi->s_imap_size-i, bit, max_request);
+
+ if (run >= min_request)
+ goto found;
+ bit += run;
+ }
+ }
+ ret = -ENOSPC;
+ goto out;
+
+found:
+ *return_block = i * bits_per_entry + bit;
+ *return_size = run;
+ ret = set_run(sb, i, bits_per_entry, bit, run, 1);
+
+out:
+ mutex_unlock(&sbi->s_bitmap_lock);
+ return ret;
+}
+
+/*
+ * Clears count bits starting at a given block.
+ */
+int omfs_clear_range(struct super_block *sb, u64 block, int count)
+{
+ struct omfs_sb_info *sbi = OMFS_SB(sb);
+ int bits_per_entry = 8 * sb->s_blocksize;
+ u64 tmp;
+ int map, bit, ret;
+
+ tmp = block;
+ bit = do_div(tmp, bits_per_entry);
+ map = tmp;
+
+ if (map >= sbi->s_imap_size)
+ return 0;
+
+ mutex_lock(&sbi->s_bitmap_lock);
+ ret = set_run(sb, map, bits_per_entry, bit, count, 0);
+ mutex_unlock(&sbi->s_bitmap_lock);
+ return ret;
+}
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
new file mode 100644
index 00000000000..c0757e99887
--- /dev/null
+++ b/fs/omfs/dir.c
@@ -0,0 +1,504 @@
+/*
+ * OMFS (as used by RIO Karma) directory operations.
+ * Copyright (C) 2005 Bob Copeland <me@bobcopeland.com>
+ * Released under GPL v2.
+ */
+
+#include <linux/fs.h>
+#include <linux/ctype.h>
+#include <linux/buffer_head.h>
+#include "omfs.h"
+
+static int omfs_hash(const char *name, int namelen, int mod)
+{
+ int i, hash = 0;
+ for (i = 0; i < namelen; i++)
+ hash ^= tolower(name[i]) << (i % 24);
+ return hash % mod;
+}
+
+/*
+ * Finds the bucket for a given name and reads the containing block;
+ * *ofs is set to the offset of the first list entry.
+ */
+static struct buffer_head *omfs_get_bucket(struct inode *dir,
+ const char *name, int namelen, int *ofs)
+{
+ int nbuckets = (dir->i_size - OMFS_DIR_START)/8;
+ int block = clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino);
+ int bucket = omfs_hash(name, namelen, nbuckets);
+
+ *ofs = OMFS_DIR_START + bucket * 8;
+ return sb_bread(dir->i_sb, block);
+}
+
+static struct buffer_head *omfs_scan_list(struct inode *dir, u64 block,
+ const char *name, int namelen,
+ u64 *prev_block)
+{
+ struct buffer_head *bh;
+ struct omfs_inode *oi;
+ int err = -ENOENT;
+ *prev_block = ~0;
+
+ while (block != ~0) {
+ bh = sb_bread(dir->i_sb,
+ clus_to_blk(OMFS_SB(dir->i_sb), block));
+ if (!bh) {
+ err = -EIO;
+ goto err;
+ }
+
+ oi = (struct omfs_inode *) bh->b_data;
+ if (omfs_is_bad(OMFS_SB(dir->i_sb), &oi->i_head, block)) {
+ brelse(bh);
+ goto err;
+ }
+
+ if (strncmp(oi->i_name, name, namelen) == 0)
+ return bh;
+
+ *prev_block = block;
+ block = be64_to_cpu(oi->i_sibling);
+ brelse(bh);
+ }
+err:
+ return ERR_PTR(err);
+}
+
+static struct buffer_head *omfs_find_entry(struct inode *dir,
+ const char *name, int namelen)
+{
+ struct buffer_head *bh;
+ int ofs;
+ u64 block, dummy;
+
+ bh = omfs_get_bucket(dir, name, namelen, &ofs);
+ if (!bh)
+ return ERR_PTR(-EIO);
+
+ block = be64_to_cpu(*((__be64 *) &bh->b_data[ofs]));
+ brelse(bh);
+
+ return omfs_scan_list(dir, block, name, namelen, &dummy);
+}
+
+int omfs_make_empty(struct inode *inode, struct super_block *sb)
+{
+ struct omfs_sb_info *sbi = OMFS_SB(sb);
+ int block = clus_to_blk(sbi, inode->i_ino);
+ struct buffer_head *bh;
+ struct omfs_inode *oi;
+
+ bh = sb_bread(sb, block);
+ if (!bh)
+ return -ENOMEM;
+
+ memset(bh->b_data, 0, sizeof(struct omfs_inode));
+
+ if (inode->i_mode & S_IFDIR) {
+ memset(&bh->b_data[OMFS_DIR_START], 0xff,
+ sbi->s_sys_blocksize - OMFS_DIR_START);
+ } else
+ omfs_make_empty_table(bh, OMFS_EXTENT_START);
+
+ oi = (struct omfs_inode *) bh->b_data;
+ oi->i_head.h_self = cpu_to_be64(inode->i_ino);
+ oi->i_sibling = ~cpu_to_be64(0ULL);
+
+ mark_buffer_dirty(bh);
+ brelse(bh);
+ return 0;
+}
+
+static int omfs_add_link(struct dentry *dentry, struct inode *inode)
+{
+ struct inode *dir = dentry->d_parent->d_inode;
+ const char *name = dentry->d_name.name;
+ int namelen = dentry->d_name.len;
+ struct omfs_inode *oi;
+ struct buffer_head *bh;
+ u64 block;
+ __be64 *entry;
+ int ofs;
+
+ /* just prepend to head of queue in proper bucket */
+ bh = omfs_get_bucket(dir, name, namelen, &ofs);
+ if (!bh)
+ goto out;
+
+ entry = (__be64 *) &bh->b_data[ofs];
+ block = be64_to_cpu(*entry);
+ *entry = cpu_to_be64(inode->i_ino);
+ mark_buffer_dirty(bh);
+ brelse(bh);
+
+ /* now set the sibling and parent pointers on the new inode */
+ bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), inode->i_ino));
+ if (!bh)
+ goto out;
+
+ oi = (struct omfs_inode *) bh->b_data;
+ memcpy(oi->i_name, name, namelen);
+ memset(oi->i_name + namelen, 0, OMFS_NAMELEN - namelen);
+ oi->i_sibling = cpu_to_be64(block);
+ oi->i_parent = cpu_to_be64(dir->i_ino);
+ mark_buffer_dirty(bh);
+ brelse(bh);
+
+ dir->i_ctime = CURRENT_TIME_SEC;
+
+ /* mark affected inodes dirty to rebuild checksums */
+ mark_inode_dirty(dir);
+ mark_inode_dirty(inode);
+ return 0;
+out:
+ return -ENOMEM;
+}
+
+static int omfs_delete_entry(struct dentry *dentry)
+{
+ struct inode *dir = dentry->d_parent->d_inode;
+ struct inode *dirty;
+ const char *name = dentry->d_name.name;
+ int namelen = dentry->d_name.len;
+ struct omfs_inode *oi;
+ struct buffer_head *bh, *bh2;
+ __be64 *entry, next;
+ u64 block, prev;
+ int ofs;
+ int err = -ENOMEM;
+
+ /* delete the proper node in the bucket's linked list */
+ bh = omfs_get_bucket(dir, name, namelen, &ofs);
+ if (!bh)
+ goto out;
+
+ entry = (__be64 *) &bh->b_data[ofs];
+ block = be64_to_cpu(*entry);
+
+ bh2 = omfs_scan_list(dir, block, name, namelen, &prev);
+ if (IS_ERR(bh2)) {
+ err = PTR_ERR(bh2);
+ goto out_free_bh;
+ }
+
+ oi = (struct omfs_inode *) bh2->b_data;
+ next = oi->i_sibling;
+ brelse(bh2);
+
+ if (prev != ~0) {
+ /* found in middle of list, get list ptr */
+ brelse(bh);
+ bh = sb_bread(dir->i_sb,
+ clus_to_blk(OMFS_SB(dir->i_sb), prev));
+ if (!bh)
+ goto out;
+
+ oi = (struct omfs_inode *) bh->b_data;
+ entry = &oi->i_sibling;
+ }
+
+ *entry = next;
+ mark_buffer_dirty(bh);
+
+ if (prev != ~0) {
+ dirty = omfs_iget(dir->i_sb, prev);
+ if (!IS_ERR(dirty)) {
+ mark_inode_dirty(dirty);
+ iput(dirty);
+ }
+ }
+
+ err = 0;
+out_free_bh:
+ brelse(bh);
+out:
+ return err;
+}
+
+static int omfs_dir_is_empty(struct inode *inode)
+{
+ int nbuckets = (inode->i_size - OMFS_DIR_START) / 8;
+ struct buffer_head *bh;
+ u64 *ptr;
+ int i;
+
+ bh = sb_bread(inode->i_sb, clus_to_blk(OMFS_SB(inode->i_sb),
+ inode->i_ino));
+
+ if (!bh)
+ return 0;
+
+ ptr = (u64 *) &bh->b_data[OMFS_DIR_START];
+
+ for (i = 0; i < nbuckets; i++, ptr++)
+ if (*ptr != ~0)
+ break;
+
+ brelse(bh);
+ return *ptr != ~0;
+}
+
+static int omfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+ int ret;
+ struct inode *inode = dentry->d_inode;
+
+ ret = omfs_delete_entry(dentry);
+ if (ret)
+ goto end_unlink;
+
+ inode_dec_link_count(inode);
+ mark_inode_dirty(dir);
+
+end_unlink:
+ return ret;
+}
+
+static int omfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ int err = -ENOTEMPTY;
+ struct inode *inode = dentry->d_inode;
+
+ if (omfs_dir_is_empty(inode)) {
+ err = omfs_unlink(dir, dentry);
+ if (!err)
+ inode_dec_link_count(inode);
+ }
+ return err;
+}
+
+static int omfs_add_node(struct inode *dir, struct dentry *dentry, int mode)
+{
+ int err;
+ struct inode *inode = omfs_new_inode(dir, mode);
+
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ err = omfs_make_empty(inode, dir->i_sb);
+ if (err)
+ goto out_free_inode;
+
+ err = omfs_add_link(dentry, inode);
+ if (err)
+ goto out_free_inode;
+
+ d_instantiate(dentry, inode);
+ return 0;
+
+out_free_inode:
+ iput(inode);
+ return err;
+}
+
+static int omfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+ return omfs_add_node(dir, dentry, mode | S_IFDIR);
+}
+
+static int omfs_create(struct inode *dir, struct dentry *dentry, int mode,
+ struct nameidata *nd)
+{
+ return omfs_add_node(dir, dentry, mode | S_IFREG);
+}
+
+static struct dentry *omfs_lookup(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nd)
+{
+ struct buffer_head *bh;
+ struct inode *inode = NULL;
+
+ if (dentry->d_name.len > OMFS_NAMELEN)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ bh = omfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len);
+ if (!IS_ERR(bh)) {
+ struct omfs_inode *oi = (struct omfs_inode *)bh->b_data;
+ ino_t ino = be64_to_cpu(oi->i_head.h_self);
+ brelse(bh);
+ inode = omfs_iget(dir->i_sb, ino);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+ }
+ d_add(dentry, inode);
+ return NULL;
+}
+
+/* sanity check block's self pointer */
+int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header,
+ u64 fsblock)
+{
+ int is_bad;
+ u64 ino = be64_to_cpu(header->h_self);
+ is_bad = ((ino != fsblock) || (ino < sbi->s_root_ino) ||
+ (ino > sbi->s_num_blocks));
+
+ if (is_bad)
+ printk(KERN_WARNING "omfs: bad hash chain detected\n");
+
+ return is_bad;
+}
+
+static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir,
+ u64 fsblock, int hindex)
+{
+ struct inode *dir = filp->f_dentry->d_inode;
+ struct buffer_head *bh;
+ struct omfs_inode *oi;
+ u64 self;
+ int res = 0;
+ unsigned char d_type;
+
+ /* follow chain in this bucket */
+ while (fsblock != ~0) {
+ bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb),
+ fsblock));
+ if (!bh)
+ goto out;
+
+ oi = (struct omfs_inode *) bh->b_data;
+ if (omfs_is_bad(OMFS_SB(dir->i_sb), &oi->i_head, fsblock)) {
+ brelse(bh);
+ goto out;
+ }
+
+ self = fsblock;
+ fsblock = be64_to_cpu(oi->i_sibling);
+
+ /* skip visited nodes */
+ if (hindex) {
+ hindex--;
+ brelse(bh);
+ continue;
+ }
+
+ d_type = (oi->i_type == OMFS_DIR) ? DT_DIR : DT_REG;
+
+ res = filldir(dirent, oi->i_name, strnlen(oi->i_name,
+ OMFS_NAMELEN), filp->f_pos, self, d_type);
+ if (res == 0)
+ filp->f_pos++;
+ brelse(bh);
+ }
+out:
+ return res;
+}
+
+static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ struct inode *new_inode = new_dentry->d_inode;
+ struct inode *old_inode = old_dentry->d_inode;
+ struct buffer_head *bh;
+ int is_dir;
+ int err;
+
+ is_dir = S_ISDIR(old_inode->i_mode);
+
+ if (new_inode) {
+ /* overwriting existing file/dir */
+ err = -ENOTEMPTY;
+ if (is_dir && !omfs_dir_is_empty(new_inode))
+ goto out;
+
+ err = -ENOENT;
+ bh = omfs_find_entry(new_dir, new_dentry->d_name.name,
+ new_dentry->d_name.len);
+ if (IS_ERR(bh))
+ goto out;
+ brelse(bh);
+
+ err = omfs_unlink(new_dir, new_dentry);
+ if (err)
+ goto out;
+ }
+
+ /* since omfs locates files by name, we need to unlink _before_
+ * adding the new link or we won't find the old one */
+ inode_inc_link_count(old_inode);
+ err = omfs_unlink(old_dir, old_dentry);
+ if (err) {
+ inode_dec_link_count(old_inode);
+ goto out;
+ }
+
+ err = omfs_add_link(new_dentry, old_inode);
+ if (err)
+ goto out;
+
+ old_inode->i_ctime = CURRENT_TIME_SEC;
+out:
+ return err;
+}
+
+static int omfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+ struct inode *dir = filp->f_dentry->d_inode;
+ struct buffer_head *bh;
+ loff_t offset, res;
+ unsigned int hchain, hindex;
+ int nbuckets;
+ u64 fsblock;
+ int ret = -EINVAL;
+
+ if (filp->f_pos >> 32)
+ goto success;
+
+ switch ((unsigned long) filp->f_pos) {
+ case 0:
+ if (filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR) < 0)
+ goto success;
+ filp->f_pos++;
+ /* fall through */
+ case 1:
+ if (filldir(dirent, "..", 2, 1,
+ parent_ino(filp->f_dentry), DT_DIR) < 0)
+ goto success;
+ filp->f_pos = 1 << 20;
+ /* fall through */
+ }
+
+ nbuckets = (dir->i_size - OMFS_DIR_START) / 8;
+
+ /* high 12 bits store bucket + 1 and low 20 bits store hash index */
+ hchain = (filp->f_pos >> 20) - 1;
+ hindex = filp->f_pos & 0xfffff;
+
+ bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino));
+ if (!bh)
+ goto out;
+
+ offset = OMFS_DIR_START + hchain * 8;
+
+ for (; hchain < nbuckets; hchain++, offset += 8) {
+ fsblock = be64_to_cpu(*((__be64 *) &bh->b_data[offset]));
+
+ res = omfs_fill_chain(filp, dirent, filldir, fsblock, hindex);
+ hindex = 0;
+ if (res < 0)
+ break;
+
+ filp->f_pos = (hchain+2) << 20;
+ }
+ brelse(bh);
+success:
+ ret = 0;
+out:
+ return ret;
+}
+
+struct inode_operations omfs_dir_inops = {
+ .lookup = omfs_lookup,
+ .mkdir = omfs_mkdir,
+ .rename = omfs_rename,
+ .create = omfs_create,
+ .unlink = omfs_unlink,
+ .rmdir = omfs_rmdir,
+};
+
+struct file_operations omfs_dir_operations = {
+ .read = generic_read_dir,
+ .readdir = omfs_readdir,
+};
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
new file mode 100644
index 00000000000..7e2499053e4
--- /dev/null
+++ b/fs/omfs/file.c
@@ -0,0 +1,346 @@
+/*
+ * OMFS (as used by RIO Karma) file operations.
+ * Copyright (C) 2005 Bob Copeland <me@bobcopeland.com>
+ * Released under GPL v2.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/mpage.h>
+#include "omfs.h"
+
+static int omfs_sync_file(struct file *file, struct dentry *dentry,
+ int datasync)
+{
+ struct inode *inode = dentry->d_inode;
+ int err;
+
+ err = sync_mapping_buffers(inode->i_mapping);
+ if (!(inode->i_state & I_DIRTY))
+ return err;
+ if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+ return err;
+ err |= omfs_sync_inode(inode);
+ return err ? -EIO : 0;
+}
+
+void omfs_make_empty_table(struct buffer_head *bh, int offset)
+{
+ struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset];
+
+ oe->e_next = ~cpu_to_be64(0ULL);
+ oe->e_extent_count = cpu_to_be32(1),
+ oe->e_fill = cpu_to_be32(0x22),
+ oe->e_entry.e_cluster = ~cpu_to_be64(0ULL);
+ oe->e_entry.e_blocks = ~cpu_to_be64(0ULL);
+}
+
+int omfs_shrink_inode(struct inode *inode)
+{
+ struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
+ struct omfs_extent *oe;
+ struct omfs_extent_entry *entry;
+ struct buffer_head *bh;
+ u64 next, last;
+ u32 extent_count;
+ int ret;
+
+ /* traverse extent table, freeing each entry that is greater
+ * than inode->i_size;
+ */
+ next = inode->i_ino;
+
+ /* only support truncate -> 0 for now */
+ ret = -EIO;
+ if (inode->i_size != 0)
+ goto out;
+
+ bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
+ if (!bh)
+ goto out;
+
+ oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
+
+ for (;;) {
+
+ if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next)) {
+ brelse(bh);
+ goto out;
+ }
+
+ extent_count = be32_to_cpu(oe->e_extent_count);
+ last = next;
+ next = be64_to_cpu(oe->e_next);
+ entry = &oe->e_entry;
+
+ /* ignore last entry as it is the terminator */
+ for (; extent_count > 1; extent_count--) {
+ u64 start, count;
+ start = be64_to_cpu(entry->e_cluster);
+ count = be64_to_cpu(entry->e_blocks);
+
+ omfs_clear_range(inode->i_sb, start, (int) count);
+ entry++;
+ }
+ omfs_make_empty_table(bh, (char *) oe - bh->b_data);
+ mark_buffer_dirty(bh);
+ brelse(bh);
+
+ if (last != inode->i_ino)
+ omfs_clear_range(inode->i_sb, last, sbi->s_mirrors);
+
+ if (next == ~0)
+ break;
+
+ bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
+ if (!bh)
+ goto out;
+ oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static void omfs_truncate(struct inode *inode)
+{
+ omfs_shrink_inode(inode);
+ mark_inode_dirty(inode);
+}
+
+/*
+ * Add new blocks to the current extent, or create new entries/continuations
+ * as necessary.
+ */
+static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe,
+ u64 *ret_block)
+{
+ struct omfs_extent_entry *terminator;
+ struct omfs_extent_entry *entry = &oe->e_entry;
+ struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
+ u32 extent_count = be32_to_cpu(oe->e_extent_count);
+ u64 new_block = 0;
+ u32 max_count;
+ int new_count;
+ int ret = 0;
+
+ /* reached the end of the extent table with no blocks mapped.
+ * there are three possibilities for adding: grow last extent,
+ * add a new extent to the current extent table, and add a
+ * continuation inode. in last two cases need an allocator for
+ * sbi->s_cluster_size
+ */
+
+ /* TODO: handle holes */
+
+ /* should always have a terminator */
+ if (extent_count < 1)
+ return -EIO;
+
+ /* trivially grow current extent, if next block is not taken */
+ terminator = entry + extent_count - 1;
+ if (extent_count > 1) {
+ entry = terminator-1;
+ new_block = be64_to_cpu(entry->e_cluster) +
+ be64_to_cpu(entry->e_blocks);
+
+ if (omfs_allocate_block(inode->i_sb, new_block)) {
+ entry->e_blocks =
+ cpu_to_be64(be64_to_cpu(entry->e_blocks) + 1);
+ terminator->e_blocks = ~(cpu_to_be64(
+ be64_to_cpu(~terminator->e_blocks) + 1));
+ goto out;
+ }
+ }
+ max_count = (sbi->s_sys_blocksize - OMFS_EXTENT_START -
+ sizeof(struct omfs_extent)) /
+ sizeof(struct omfs_extent_entry) + 1;
+
+ /* TODO: add a continuation block here */
+ if (be32_to_cpu(oe->e_extent_count) > max_count-1)
+ return -EIO;
+
+ /* try to allocate a new cluster */
+ ret = omfs_allocate_range(inode->i_sb, 1, sbi->s_clustersize,
+ &new_block, &new_count);
+ if (ret)
+ goto out_fail;
+
+ /* copy terminator down an entry */
+ entry = terminator;
+ terminator++;
+ memcpy(terminator, entry, sizeof(struct omfs_extent_entry));
+
+ entry->e_cluster = cpu_to_be64(new_block);
+ entry->e_blocks = cpu_to_be64((u64) new_count);
+
+ terminator->e_blocks = ~(cpu_to_be64(
+ be64_to_cpu(~terminator->e_blocks) + (u64) new_count));
+
+ /* write in new entry */
+ oe->e_extent_count = cpu_to_be32(1 + be32_to_cpu(oe->e_extent_count));
+
+out:
+ *ret_block = new_block;
+out_fail:
+ return ret;
+}
+
+/*
+ * Scans across the directory table for a given file block number.
+ * If block not found, return 0.
+ */
+static sector_t find_block(struct inode *inode, struct omfs_extent_entry *ent,
+ sector_t block, int count, int *left)
+{
+ /* count > 1 because of terminator */
+ sector_t searched = 0;
+ for (; count > 1; count--) {
+ int numblocks = clus_to_blk(OMFS_SB(inode->i_sb),
+ be64_to_cpu(ent->e_blocks));
+
+ if (block >= searched &&
+ block < searched + numblocks) {
+ /*
+ * found it at cluster + (block - searched)
+ * numblocks - (block - searched) is remainder
+ */
+ *left = numblocks - (block - searched);
+ return clus_to_blk(OMFS_SB(inode->i_sb),
+ be64_to_cpu(ent->e_cluster)) +
+ block - searched;
+ }
+ searched += numblocks;
+ ent++;
+ }
+ return 0;
+}
+
+static int omfs_get_block(struct inode *inode, sector_t block,
+ struct buffer_head *bh_result, int create)
+{
+ struct buffer_head *bh;
+ sector_t next, offset;
+ int ret;
+ u64 new_block;
+ int extent_count;
+ struct omfs_extent *oe;
+ struct omfs_extent_entry *entry;
+ struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
+ int max_blocks = bh_result->b_size >> inode->i_blkbits;
+ int remain;
+
+ ret = -EIO;
+ bh = sb_bread(inode->i_sb, clus_to_blk(sbi, inode->i_ino));
+ if (!bh)
+ goto out;
+
+ oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
+ next = inode->i_ino;
+
+ for (;;) {
+
+ if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next))
+ goto out_brelse;
+
+ extent_count = be32_to_cpu(oe->e_extent_count);
+ next = be64_to_cpu(oe->e_next);
+ entry = &oe->e_entry;
+
+ offset = find_block(inode, entry, block, extent_count, &remain);
+ if (offset > 0) {
+ ret = 0;
+ map_bh(bh_result, inode->i_sb, offset);
+ if (remain > max_blocks)
+ remain = max_blocks;
+ bh_result->b_size = (remain << inode->i_blkbits);
+ goto out_brelse;
+ }
+ if (next == ~0)
+ break;
+
+ brelse(bh);
+ bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
+ if (!bh)
+ goto out;
+ oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
+ }
+ if (create) {
+ ret = omfs_grow_extent(inode, oe, &new_block);
+ if (ret == 0) {
+ mark_buffer_dirty(bh);
+ mark_inode_dirty(inode);
+ map_bh(bh_result, inode->i_sb,
+ clus_to_blk(sbi, new_block));
+ }
+ }
+out_brelse:
+ brelse(bh);
+out:
+ return ret;
+}
+
+static int omfs_readpage(struct file *file, struct page *page)
+{
+ return block_read_full_page(page, omfs_get_block);
+}
+
+static int omfs_readpages(struct file *file, struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages)
+{
+ return mpage_readpages(mapping, pages, nr_pages, omfs_get_block);
+}
+
+static int omfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+ return block_write_full_page(page, omfs_get_block, wbc);
+}
+
+static int
+omfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
+{
+ return mpage_writepages(mapping, wbc, omfs_get_block);
+}
+
+static int omfs_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata)
+{
+ *pagep = NULL;
+ return block_write_begin(file, mapping, pos, len, flags,
+ pagep, fsdata, omfs_get_block);
+}
+
+static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
+{
+ return generic_block_bmap(mapping, block, omfs_get_block);
+}
+
+struct file_operations omfs_file_operations = {
+ .llseek = generic_file_llseek,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .aio_read = generic_file_aio_read,
+ .aio_write = generic_file_aio_write,
+ .mmap = generic_file_mmap,
+ .fsync = omfs_sync_file,
+ .splice_read = generic_file_splice_read,
+};
+
+struct inode_operations omfs_file_inops = {
+ .truncate = omfs_truncate
+};
+
+struct address_space_operations omfs_aops = {
+ .readpage = omfs_readpage,
+ .readpages = omfs_readpages,
+ .writepage = omfs_writepage,
+ .writepages = omfs_writepages,
+ .sync_page = block_sync_page,
+ .write_begin = omfs_write_begin,
+ .write_end = generic_write_end,
+ .bmap = omfs_bmap,
+};
+
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
new file mode 100644
index 00000000000..a95fe5984f4
--- /dev/null
+++ b/fs/omfs/inode.c
@@ -0,0 +1,554 @@
+/*
+ * Optimized MPEG FS - inode and super operations.
+ * Copyright (C) 2006 Bob Copeland <me@bobcopeland.com>
+ * Released under GPL v2.
+ */
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/parser.h>
+#include <linux/buffer_head.h>
+#include <linux/vmalloc.h>
+#include <linux/crc-itu-t.h>
+#include "omfs.h"
+
+MODULE_AUTHOR("Bob Copeland <me@bobcopeland.com>");
+MODULE_DESCRIPTION("OMFS (ReplayTV/Karma) Filesystem for Linux");
+MODULE_LICENSE("GPL");
+
+struct inode *omfs_new_inode(struct inode *dir, int mode)
+{
+ struct inode *inode;
+ u64 new_block;
+ int err;
+ int len;
+ struct omfs_sb_info *sbi = OMFS_SB(dir->i_sb);
+
+ inode = new_inode(dir->i_sb);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ err = omfs_allocate_range(dir->i_sb, sbi->s_mirrors, sbi->s_mirrors,
+ &new_block, &len);
+ if (err)
+ goto fail;
+
+ inode->i_ino = new_block;
+ inode->i_mode = mode;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = current->fsgid;
+ inode->i_blocks = 0;
+ inode->i_mapping->a_ops = &omfs_aops;
+
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ switch (mode & S_IFMT) {
+ case S_IFDIR:
+ inode->i_op = &omfs_dir_inops;
+ inode->i_fop = &omfs_dir_operations;
+ inode->i_size = sbi->s_sys_blocksize;
+ inc_nlink(inode);
+ break;
+ case S_IFREG:
+ inode->i_op = &omfs_file_inops;
+ inode->i_fop = &omfs_file_operations;
+ inode->i_size = 0;
+ break;
+ }
+
+ insert_inode_hash(inode);
+ mark_inode_dirty(inode);
+ return inode;
+fail:
+ make_bad_inode(inode);
+ iput(inode);
+ return ERR_PTR(err);
+}
+
+/*
+ * Update the header checksums for a dirty inode based on its contents.
+ * Caller is expected to hold the buffer head underlying oi and mark it
+ * dirty.
+ */
+static void omfs_update_checksums(struct omfs_inode *oi)
+{
+ int xor, i, ofs = 0, count;
+ u16 crc = 0;
+ unsigned char *ptr = (unsigned char *) oi;
+
+ count = be32_to_cpu(oi->i_head.h_body_size);
+ ofs = sizeof(struct omfs_header);
+
+ crc = crc_itu_t(crc, ptr + ofs, count);
+ oi->i_head.h_crc = cpu_to_be16(crc);
+
+ xor = ptr[0];
+ for (i = 1; i < OMFS_XOR_COUNT; i++)
+ xor ^= ptr[i];
+
+ oi->i_head.h_check_xor = xor;
+}
+
+static int omfs_write_inode(struct inode *inode, int wait)
+{
+ struct omfs_inode *oi;
+ struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
+ struct buffer_head *bh, *bh2;
+ unsigned int block;
+ u64 ctime;
+ int i;
+ int ret = -EIO;
+ int sync_failed = 0;
+
+ /* get current inode since we may have written sibling ptrs etc. */
+ block = clus_to_blk(sbi, inode->i_ino);
+ bh = sb_bread(inode->i_sb, block);
+ if (!bh)
+ goto out;
+
+ oi = (struct omfs_inode *) bh->b_data;
+
+ oi->i_head.h_self = cpu_to_be64(inode->i_ino);
+ if (S_ISDIR(inode->i_mode))
+ oi->i_type = OMFS_DIR;
+ else if (S_ISREG(inode->i_mode))
+ oi->i_type = OMFS_FILE;
+ else {
+ printk(KERN_WARNING "omfs: unknown file type: %d\n",
+ inode->i_mode);
+ goto out_brelse;
+ }
+
+ oi->i_head.h_body_size = cpu_to_be32(sbi->s_sys_blocksize -
+ sizeof(struct omfs_header));
+ oi->i_head.h_version = 1;
+ oi->i_head.h_type = OMFS_INODE_NORMAL;
+ oi->i_head.h_magic = OMFS_IMAGIC;
+ oi->i_size = cpu_to_be64(inode->i_size);
+
+ ctime = inode->i_ctime.tv_sec * 1000LL +
+ ((inode->i_ctime.tv_nsec + 999)/1000);
+ oi->i_ctime = cpu_to_be64(ctime);
+
+ omfs_update_checksums(oi);
+
+ mark_buffer_dirty(bh);
+ if (wait) {
+ sync_dirty_buffer(bh);
+ if (buffer_req(bh) && !buffer_uptodate(bh))
+ sync_failed = 1;
+ }
+
+ /* if mirroring writes, copy to next fsblock */
+ for (i = 1; i < sbi->s_mirrors; i++) {
+ bh2 = sb_bread(inode->i_sb, block + i *
+ (sbi->s_blocksize / sbi->s_sys_blocksize));
+ if (!bh2)
+ goto out_brelse;
+
+ memcpy(bh2->b_data, bh->b_data, bh->b_size);
+ mark_buffer_dirty(bh2);
+ if (wait) {
+ sync_dirty_buffer(bh2);
+ if (buffer_req(bh2) && !buffer_uptodate(bh2))
+ sync_failed = 1;
+ }
+ brelse(bh2);
+ }
+ ret = (sync_failed) ? -EIO : 0;
+out_brelse:
+ brelse(bh);
+out:
+ return ret;
+}
+
+int omfs_sync_inode(struct inode *inode)
+{
+ return omfs_write_inode(inode, 1);
+}
+
+/*
+ * called when an entry is deleted, need to clear the bits in the
+ * bitmaps.
+ */
+static void omfs_delete_inode(struct inode *inode)
+{
+ truncate_inode_pages(&inode->i_data, 0);
+
+ if (S_ISREG(inode->i_mode)) {
+ inode->i_size = 0;
+ omfs_shrink_inode(inode);
+ }
+
+ omfs_clear_range(inode->i_sb, inode->i_ino, 2);
+ clear_inode(inode);
+}
+
+struct inode *omfs_iget(struct super_block *sb, ino_t ino)
+{
+ struct omfs_sb_info *sbi = OMFS_SB(sb);
+ struct omfs_inode *oi;
+ struct buffer_head *bh;
+ unsigned int block;
+ u64 ctime;
+ unsigned long nsecs;
+ struct inode *inode;
+
+ inode = iget_locked(sb, ino);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+ if (!(inode->i_state & I_NEW))
+ return inode;
+
+ block = clus_to_blk(sbi, ino);
+ bh = sb_bread(inode->i_sb, block);
+ if (!bh)
+ goto iget_failed;
+
+ oi = (struct omfs_inode *)bh->b_data;
+
+ /* check self */
+ if (ino != be64_to_cpu(oi->i_head.h_self))
+ goto fail_bh;
+
+ inode->i_uid = sbi->s_uid;
+ inode->i_gid = sbi->s_gid;
+
+ ctime = be64_to_cpu(oi->i_ctime);
+ nsecs = do_div(ctime, 1000) * 1000L;
+
+ inode->i_atime.tv_sec = ctime;
+ inode->i_mtime.tv_sec = ctime;
+ inode->i_ctime.tv_sec = ctime;
+ inode->i_atime.tv_nsec = nsecs;
+ inode->i_mtime.tv_nsec = nsecs;
+ inode->i_ctime.tv_nsec = nsecs;
+
+ inode->i_mapping->a_ops = &omfs_aops;
+
+ switch (oi->i_type) {
+ case OMFS_DIR:
+ inode->i_mode = S_IFDIR | (S_IRWXUGO & ~sbi->s_dmask);
+ inode->i_op = &omfs_dir_inops;
+ inode->i_fop = &omfs_dir_operations;
+ inode->i_size = be32_to_cpu(oi->i_head.h_body_size) +
+ sizeof(struct omfs_header);
+ inc_nlink(inode);
+ break;
+ case OMFS_FILE:
+ inode->i_mode = S_IFREG | (S_IRWXUGO & ~sbi->s_fmask);
+ inode->i_fop = &omfs_file_operations;
+ inode->i_size = be64_to_cpu(oi->i_size);
+ break;
+ }
+ brelse(bh);
+ unlock_new_inode(inode);
+ return inode;
+fail_bh:
+ brelse(bh);
+iget_failed:
+ iget_failed(inode);
+ return ERR_PTR(-EIO);
+}
+
+static void omfs_put_super(struct super_block *sb)
+{
+ struct omfs_sb_info *sbi = OMFS_SB(sb);
+ kfree(sbi->s_imap);
+ kfree(sbi);
+ sb->s_fs_info = NULL;
+}
+
+static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ struct super_block *s = dentry->d_sb;
+ struct omfs_sb_info *sbi = OMFS_SB(s);
+ buf->f_type = OMFS_MAGIC;
+ buf->f_bsize = sbi->s_blocksize;
+ buf->f_blocks = sbi->s_num_blocks;
+ buf->f_files = sbi->s_num_blocks;
+ buf->f_namelen = OMFS_NAMELEN;
+
+ buf->f_bfree = buf->f_bavail = buf->f_ffree =
+ omfs_count_free(s);
+ return 0;
+}
+
+static struct super_operations omfs_sops = {
+ .write_inode = omfs_write_inode,
+ .delete_inode = omfs_delete_inode,
+ .put_super = omfs_put_super,
+ .statfs = omfs_statfs,
+ .show_options = generic_show_options,
+};
+
+/*
+ * For Rio Karma, there is an on-disk free bitmap whose location is
+ * stored in the root block. For ReplayTV, there is no such free bitmap
+ * so we have to walk the tree. Both inodes and file data are allocated
+ * from the same map. This array can be big (300k) so we allocate
+ * in units of the blocksize.
+ */
+static int omfs_get_imap(struct super_block *sb)
+{
+ int bitmap_size;
+ int array_size;
+ int count;
+ struct omfs_sb_info *sbi = OMFS_SB(sb);
+ struct buffer_head *bh;
+ unsigned long **ptr;
+ sector_t block;
+
+ bitmap_size = DIV_ROUND_UP(sbi->s_num_blocks, 8);
+ array_size = DIV_ROUND_UP(bitmap_size, sb->s_blocksize);
+
+ if (sbi->s_bitmap_ino == ~0ULL)
+ goto out;
+
+ sbi->s_imap_size = array_size;
+ sbi->s_imap = kzalloc(array_size * sizeof(unsigned long *), GFP_KERNEL);
+ if (!sbi->s_imap)
+ goto nomem;
+
+ block = clus_to_blk(sbi, sbi->s_bitmap_ino);
+ ptr = sbi->s_imap;
+ for (count = bitmap_size; count > 0; count -= sb->s_blocksize) {
+ bh = sb_bread(sb, block++);
+ if (!bh)
+ goto nomem_free;
+ *ptr = kmalloc(sb->s_blocksize, GFP_KERNEL);
+ if (!*ptr) {
+ brelse(bh);
+ goto nomem_free;
+ }
+ memcpy(*ptr, bh->b_data, sb->s_blocksize);
+ if (count < sb->s_blocksize)
+ memset((void *)*ptr + count, 0xff,
+ sb->s_blocksize - count);
+ brelse(bh);
+ ptr++;
+ }
+out:
+ return 0;
+
+nomem_free:
+ for (count = 0; count < array_size; count++)
+ kfree(sbi->s_imap[count]);
+
+ kfree(sbi->s_imap);
+nomem:
+ sbi->s_imap = NULL;
+ sbi->s_imap_size = 0;
+ return -ENOMEM;
+}
+
+enum {
+ Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask
+};
+
+static match_table_t tokens = {
+ {Opt_uid, "uid=%u"},
+ {Opt_gid, "gid=%u"},
+ {Opt_umask, "umask=%o"},
+ {Opt_dmask, "dmask=%o"},
+ {Opt_fmask, "fmask=%o"},
+};
+
+static int parse_options(char *options, struct omfs_sb_info *sbi)
+{
+ char *p;
+ substring_t args[MAX_OPT_ARGS];
+ int option;
+
+ if (!options)
+ return 1;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_uid:
+ if (match_int(&args[0], &option))
+ return 0;
+ sbi->s_uid = option;
+ break;
+ case Opt_gid:
+ if (match_int(&args[0], &option))
+ return 0;
+ sbi->s_gid = option;
+ break;
+ case Opt_umask:
+ if (match_octal(&args[0], &option))
+ return 0;
+ sbi->s_fmask = sbi->s_dmask = option;
+ break;
+ case Opt_dmask:
+ if (match_octal(&args[0], &option))
+ return 0;
+ sbi->s_dmask = option;
+ break;
+ case Opt_fmask:
+ if (match_octal(&args[0], &option))
+ return 0;
+ sbi->s_fmask = option;
+ break;
+ default:
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int omfs_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct buffer_head *bh, *bh2;
+ struct omfs_super_block *omfs_sb;
+ struct omfs_root_block *omfs_rb;
+ struct omfs_sb_info *sbi;
+ struct inode *root;
+ sector_t start;
+ int ret = -EINVAL;
+
+ save_mount_options(sb, (char *) data);
+
+ sbi = kzalloc(sizeof(struct omfs_sb_info), GFP_KERNEL);
+ if (!sbi)
+ return -ENOMEM;
+
+ sb->s_fs_info = sbi;
+
+ sbi->s_uid = current->uid;
+ sbi->s_gid = current->gid;
+ sbi->s_dmask = sbi->s_fmask = current->fs->umask;
+
+ if (!parse_options((char *) data, sbi))
+ goto end;
+
+ sb->s_maxbytes = 0xffffffff;
+
+ sb_set_blocksize(sb, 0x200);
+
+ bh = sb_bread(sb, 0);
+ if (!bh)
+ goto end;
+
+ omfs_sb = (struct omfs_super_block *)bh->b_data;
+
+ if (omfs_sb->s_magic != cpu_to_be32(OMFS_MAGIC)) {
+ if (!silent)
+ printk(KERN_ERR "omfs: Invalid superblock (%x)\n",
+ omfs_sb->s_magic);
+ goto out_brelse_bh;
+ }
+ sb->s_magic = OMFS_MAGIC;
+
+ sbi->s_num_blocks = be64_to_cpu(omfs_sb->s_num_blocks);
+ sbi->s_blocksize = be32_to_cpu(omfs_sb->s_blocksize);
+ sbi->s_mirrors = be32_to_cpu(omfs_sb->s_mirrors);
+ sbi->s_root_ino = be64_to_cpu(omfs_sb->s_root_block);
+ sbi->s_sys_blocksize = be32_to_cpu(omfs_sb->s_sys_blocksize);
+ mutex_init(&sbi->s_bitmap_lock);
+
+ if (sbi->s_sys_blocksize > PAGE_SIZE) {
+ printk(KERN_ERR "omfs: sysblock size (%d) is out of range\n",
+ sbi->s_sys_blocksize);
+ goto out_brelse_bh;
+ }
+
+ if (sbi->s_blocksize < sbi->s_sys_blocksize ||
+ sbi->s_blocksize > OMFS_MAX_BLOCK_SIZE) {
+ printk(KERN_ERR "omfs: block size (%d) is out of range\n",
+ sbi->s_blocksize);
+ goto out_brelse_bh;
+ }
+
+ /*
+ * Use sys_blocksize as the fs block since it is smaller than a
+ * page while the fs blocksize can be larger.
+ */
+ sb_set_blocksize(sb, sbi->s_sys_blocksize);
+
+ /*
+ * ...and the difference goes into a shift. sys_blocksize is always
+ * a power of two factor of blocksize.
+ */
+ sbi->s_block_shift = get_bitmask_order(sbi->s_blocksize) -
+ get_bitmask_order(sbi->s_sys_blocksize);
+
+ start = clus_to_blk(sbi, be64_to_cpu(omfs_sb->s_root_block));
+ bh2 = sb_bread(sb, start);
+ if (!bh2)
+ goto out_brelse_bh;
+
+ omfs_rb = (struct omfs_root_block *)bh2->b_data;
+
+ sbi->s_bitmap_ino = be64_to_cpu(omfs_rb->r_bitmap);
+ sbi->s_clustersize = be32_to_cpu(omfs_rb->r_clustersize);
+
+ if (sbi->s_num_blocks != be64_to_cpu(omfs_rb->r_num_blocks)) {
+ printk(KERN_ERR "omfs: block count discrepancy between "
+ "super and root blocks (%llx, %llx)\n",
+ (unsigned long long)sbi->s_num_blocks,
+ (unsigned long long)be64_to_cpu(omfs_rb->r_num_blocks));
+ goto out_brelse_bh2;
+ }
+
+ ret = omfs_get_imap(sb);
+ if (ret)
+ goto out_brelse_bh2;
+
+ sb->s_op = &omfs_sops;
+
+ root = omfs_iget(sb, be64_to_cpu(omfs_rb->r_root_dir));
+ if (IS_ERR(root)) {
+ ret = PTR_ERR(root);
+ goto out_brelse_bh2;
+ }
+
+ sb->s_root = d_alloc_root(root);
+ if (!sb->s_root) {
+ iput(root);
+ goto out_brelse_bh2;
+ }
+ printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name);
+
+ ret = 0;
+out_brelse_bh2:
+ brelse(bh2);
+out_brelse_bh:
+ brelse(bh);
+end:
+ return ret;
+}
+
+static int omfs_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *data, struct vfsmount *m)
+{
+ return get_sb_bdev(fs_type, flags, dev_name, data, omfs_fill_super, m);
+}
+
+static struct file_system_type omfs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "omfs",
+ .get_sb = omfs_get_sb,
+ .kill_sb = kill_block_super,
+ .fs_flags = FS_REQUIRES_DEV,
+};
+
+static int __init init_omfs_fs(void)
+{
+ return register_filesystem(&omfs_fs_type);
+}
+
+static void __exit exit_omfs_fs(void)
+{
+ unregister_filesystem(&omfs_fs_type);
+}
+
+module_init(init_omfs_fs);
+module_exit(exit_omfs_fs);
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
new file mode 100644
index 00000000000..2bc0f067040
--- /dev/null
+++ b/fs/omfs/omfs.h
@@ -0,0 +1,67 @@
+#ifndef _OMFS_H
+#define _OMFS_H
+
+#include <linux/module.h>
+#include <linux/fs.h>
+
+#include "omfs_fs.h"
+
+/* In-memory structures */
+struct omfs_sb_info {
+ u64 s_num_blocks;
+ u64 s_bitmap_ino;
+ u64 s_root_ino;
+ u32 s_blocksize;
+ u32 s_mirrors;
+ u32 s_sys_blocksize;
+ u32 s_clustersize;
+ int s_block_shift;
+ unsigned long **s_imap;
+ int s_imap_size;
+ struct mutex s_bitmap_lock;
+ int s_uid;
+ int s_gid;
+ int s_dmask;
+ int s_fmask;
+};
+
+/* convert a cluster number to a scaled block number */
+static inline sector_t clus_to_blk(struct omfs_sb_info *sbi, sector_t block)
+{
+ return block << sbi->s_block_shift;
+}
+
+static inline struct omfs_sb_info *OMFS_SB(struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+
+/* bitmap.c */
+extern unsigned long omfs_count_free(struct super_block *sb);
+extern int omfs_allocate_block(struct super_block *sb, u64 block);
+extern int omfs_allocate_range(struct super_block *sb, int min_request,
+ int max_request, u64 *return_block, int *return_size);
+extern int omfs_clear_range(struct super_block *sb, u64 block, int count);
+
+/* dir.c */
+extern struct file_operations omfs_dir_operations;
+extern struct inode_operations omfs_dir_inops;
+extern int omfs_make_empty(struct inode *inode, struct super_block *sb);
+extern int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header,
+ u64 fsblock);
+
+/* file.c */
+extern struct file_operations omfs_file_operations;
+extern struct inode_operations omfs_file_inops;
+extern struct address_space_operations omfs_aops;
+extern void omfs_make_empty_table(struct buffer_head *bh, int offset);
+extern int omfs_shrink_inode(struct inode *inode);
+
+/* inode.c */
+extern struct inode *omfs_iget(struct super_block *sb, ino_t inode);
+extern struct inode *omfs_new_inode(struct inode *dir, int mode);
+extern int omfs_reserve_block(struct super_block *sb, sector_t block);
+extern int omfs_find_empty_block(struct super_block *sb, int mode, ino_t *ino);
+extern int omfs_sync_inode(struct inode *inode);
+
+#endif
diff --git a/fs/omfs/omfs_fs.h b/fs/omfs/omfs_fs.h
new file mode 100644
index 00000000000..12cca245d6e
--- /dev/null
+++ b/fs/omfs/omfs_fs.h
@@ -0,0 +1,80 @@
+#ifndef _OMFS_FS_H
+#define _OMFS_FS_H
+
+/* OMFS On-disk structures */
+
+#define OMFS_MAGIC 0xC2993D87
+#define OMFS_IMAGIC 0xD2
+
+#define OMFS_DIR 'D'
+#define OMFS_FILE 'F'
+#define OMFS_INODE_NORMAL 'e'
+#define OMFS_INODE_CONTINUATION 'c'
+#define OMFS_INODE_SYSTEM 's'
+#define OMFS_NAMELEN 256
+#define OMFS_DIR_START 0x1b8
+#define OMFS_EXTENT_START 0x1d0
+#define OMFS_EXTENT_CONT 0x40
+#define OMFS_XOR_COUNT 19
+#define OMFS_MAX_BLOCK_SIZE 8192
+
+struct omfs_super_block {
+ char s_fill1[256];
+ __be64 s_root_block; /* block number of omfs_root_block */
+ __be64 s_num_blocks; /* total number of FS blocks */
+ __be32 s_magic; /* OMFS_MAGIC */
+ __be32 s_blocksize; /* size of a block */
+ __be32 s_mirrors; /* # of mirrors of system blocks */
+ __be32 s_sys_blocksize; /* size of non-data blocks */
+};
+
+struct omfs_header {
+ __be64 h_self; /* FS block where this is located */
+ __be32 h_body_size; /* size of useful data after header */
+ __be16 h_crc; /* crc-ccitt of body_size bytes */
+ char h_fill1[2];
+ u8 h_version; /* version, always 1 */
+ char h_type; /* OMFS_INODE_X */
+ u8 h_magic; /* OMFS_IMAGIC */
+ u8 h_check_xor; /* XOR of header bytes before this */
+ __be32 h_fill2;
+};
+
+struct omfs_root_block {
+ struct omfs_header r_head; /* header */
+ __be64 r_fill1;
+ __be64 r_num_blocks; /* total number of FS blocks */
+ __be64 r_root_dir; /* block # of root directory */
+ __be64 r_bitmap; /* block # of free space bitmap */
+ __be32 r_blocksize; /* size of a block */
+ __be32 r_clustersize; /* size allocated for data blocks */
+ __be64 r_mirrors; /* # of mirrors of system blocks */
+ char r_name[OMFS_NAMELEN]; /* partition label */
+};
+
+struct omfs_inode {
+ struct omfs_header i_head; /* header */
+ __be64 i_parent; /* parent containing this inode */
+ __be64 i_sibling; /* next inode in hash bucket */
+ __be64 i_ctime; /* ctime, in milliseconds */
+ char i_fill1[35];
+ char i_type; /* OMFS_[DIR,FILE] */
+ __be32 i_fill2;
+ char i_fill3[64];
+ char i_name[OMFS_NAMELEN]; /* filename */
+ __be64 i_size; /* size of file, in bytes */
+};
+
+struct omfs_extent_entry {
+ __be64 e_cluster; /* start location of a set of blocks */
+ __be64 e_blocks; /* number of blocks after e_cluster */
+};
+
+struct omfs_extent {
+ __be64 e_next; /* next extent table location */
+ __be32 e_extent_count; /* total # extents in this table */
+ __be32 e_fill;
+ struct omfs_extent_entry e_entry; /* start of extent entries */
+};
+
+#endif
diff --git a/fs/open.c b/fs/open.c
index a99ad09c319..07da9359481 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -64,7 +64,8 @@ static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
memcpy(buf, &st, sizeof(st));
else {
if (sizeof buf->f_blocks == 4) {
- if ((st.f_blocks | st.f_bfree | st.f_bavail) &
+ if ((st.f_blocks | st.f_bfree | st.f_bavail |
+ st.f_bsize | st.f_frsize) &
0xffffffff00000000ULL)
return -EOVERFLOW;
/*
@@ -121,37 +122,37 @@ static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
return 0;
}
-asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf)
+asmlinkage long sys_statfs(const char __user *pathname, struct statfs __user * buf)
{
- struct nameidata nd;
+ struct path path;
int error;
- error = user_path_walk(path, &nd);
+ error = user_path(pathname, &path);
if (!error) {
struct statfs tmp;
- error = vfs_statfs_native(nd.path.dentry, &tmp);
+ error = vfs_statfs_native(path.dentry, &tmp);
if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
error = -EFAULT;
- path_put(&nd.path);
+ path_put(&path);
}
return error;
}
-asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf)
+asmlinkage long sys_statfs64(const char __user *pathname, size_t sz, struct statfs64 __user *buf)
{
- struct nameidata nd;
+ struct path path;
long error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = user_path_walk(path, &nd);
+ error = user_path(pathname, &path);
if (!error) {
struct statfs64 tmp;
- error = vfs_statfs64(nd.path.dentry, &tmp);
+ error = vfs_statfs64(path.dentry, &tmp);
if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
error = -EFAULT;
- path_put(&nd.path);
+ path_put(&path);
}
return error;
}
@@ -222,20 +223,20 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
return err;
}
-static long do_sys_truncate(const char __user * path, loff_t length)
+static long do_sys_truncate(const char __user *pathname, loff_t length)
{
- struct nameidata nd;
- struct inode * inode;
+ struct path path;
+ struct inode *inode;
int error;
error = -EINVAL;
if (length < 0) /* sorry, but loff_t says... */
goto out;
- error = user_path_walk(path, &nd);
+ error = user_path(pathname, &path);
if (error)
goto out;
- inode = nd.path.dentry->d_inode;
+ inode = path.dentry->d_inode;
/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
error = -EISDIR;
@@ -246,16 +247,16 @@ static long do_sys_truncate(const char __user * path, loff_t length)
if (!S_ISREG(inode->i_mode))
goto dput_and_out;
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(path.mnt);
if (error)
goto dput_and_out;
- error = vfs_permission(&nd, MAY_WRITE);
+ error = inode_permission(inode, MAY_WRITE);
if (error)
goto mnt_drop_write_and_out;
error = -EPERM;
- if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ if (IS_APPEND(inode))
goto mnt_drop_write_and_out;
error = get_write_access(inode);
@@ -273,15 +274,15 @@ static long do_sys_truncate(const char __user * path, loff_t length)
error = locks_verify_truncate(inode, NULL, length);
if (!error) {
DQUOT_INIT(inode);
- error = do_truncate(nd.path.dentry, length, 0, NULL);
+ error = do_truncate(path.dentry, length, 0, NULL);
}
put_write_and_out:
put_write_access(inode);
mnt_drop_write_and_out:
- mnt_drop_write(nd.path.mnt);
+ mnt_drop_write(path.mnt);
dput_and_out:
- path_put(&nd.path);
+ path_put(&path);
out:
return error;
}
@@ -424,7 +425,8 @@ out:
*/
asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
{
- struct nameidata nd;
+ struct path path;
+ struct inode *inode;
int old_fsuid, old_fsgid;
kernel_cap_t uninitialized_var(old_cap); /* !SECURE_NO_SETUID_FIXUP */
int res;
@@ -447,7 +449,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
* FIXME: There is a race here against sys_capset. The
* capabilities can change yet we will restore the old
* value below. We should hold task_capabilities_lock,
- * but we cannot because user_path_walk can sleep.
+ * but we cannot because user_path_at can sleep.
*/
#endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */
if (current->uid)
@@ -456,14 +458,25 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
old_cap = cap_set_effective(current->cap_permitted);
}
- res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
+ res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
if (res)
goto out;
- res = vfs_permission(&nd, mode);
+ inode = path.dentry->d_inode;
+
+ if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
+ /*
+ * MAY_EXEC on regular files is denied if the fs is mounted
+ * with the "noexec" flag.
+ */
+ res = -EACCES;
+ if (path.mnt->mnt_flags & MNT_NOEXEC)
+ goto out_path_release;
+ }
+
+ res = inode_permission(inode, mode | MAY_ACCESS);
/* SuS v2 requires we report a read only fs too */
- if(res || !(mode & S_IWOTH) ||
- special_file(nd.path.dentry->d_inode->i_mode))
+ if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
goto out_path_release;
/*
* This is a rare case where using __mnt_is_readonly()
@@ -475,11 +488,11 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
* inherently racy and know that the fs may change
* state before we even see this result.
*/
- if (__mnt_is_readonly(nd.path.mnt))
+ if (__mnt_is_readonly(path.mnt))
res = -EROFS;
out_path_release:
- path_put(&nd.path);
+ path_put(&path);
out:
current->fsuid = old_fsuid;
current->fsgid = old_fsgid;
@@ -497,22 +510,21 @@ asmlinkage long sys_access(const char __user *filename, int mode)
asmlinkage long sys_chdir(const char __user * filename)
{
- struct nameidata nd;
+ struct path path;
int error;
- error = __user_walk(filename,
- LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd);
+ error = user_path_dir(filename, &path);
if (error)
goto out;
- error = vfs_permission(&nd, MAY_EXEC);
+ error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
if (error)
goto dput_and_out;
- set_fs_pwd(current->fs, &nd.path);
+ set_fs_pwd(current->fs, &path);
dput_and_out:
- path_put(&nd.path);
+ path_put(&path);
out:
return error;
}
@@ -534,7 +546,7 @@ asmlinkage long sys_fchdir(unsigned int fd)
if (!S_ISDIR(inode->i_mode))
goto out_putf;
- error = file_permission(file, MAY_EXEC);
+ error = inode_permission(inode, MAY_EXEC | MAY_ACCESS);
if (!error)
set_fs_pwd(current->fs, &file->f_path);
out_putf:
@@ -545,14 +557,14 @@ out:
asmlinkage long sys_chroot(const char __user * filename)
{
- struct nameidata nd;
+ struct path path;
int error;
- error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
+ error = user_path_dir(filename, &path);
if (error)
goto out;
- error = vfs_permission(&nd, MAY_EXEC);
+ error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
if (error)
goto dput_and_out;
@@ -560,11 +572,10 @@ asmlinkage long sys_chroot(const char __user * filename)
if (!capable(CAP_SYS_CHROOT))
goto dput_and_out;
- set_fs_root(current->fs, &nd.path);
- set_fs_altroot();
+ set_fs_root(current->fs, &path);
error = 0;
dput_and_out:
- path_put(&nd.path);
+ path_put(&path);
out:
return error;
}
@@ -589,9 +600,6 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
err = mnt_want_write(file->f_path.mnt);
if (err)
goto out_putf;
- err = -EPERM;
- if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- goto out_drop_write;
mutex_lock(&inode->i_mutex);
if (mode == (mode_t) -1)
mode = inode->i_mode;
@@ -599,8 +607,6 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
err = notify_change(dentry, &newattrs);
mutex_unlock(&inode->i_mutex);
-
-out_drop_write:
mnt_drop_write(file->f_path.mnt);
out_putf:
fput(file);
@@ -611,36 +617,29 @@ out:
asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
mode_t mode)
{
- struct nameidata nd;
- struct inode * inode;
+ struct path path;
+ struct inode *inode;
int error;
struct iattr newattrs;
- error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
+ error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
if (error)
goto out;
- inode = nd.path.dentry->d_inode;
+ inode = path.dentry->d_inode;
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(path.mnt);
if (error)
goto dput_and_out;
-
- error = -EPERM;
- if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- goto out_drop_write;
-
mutex_lock(&inode->i_mutex);
if (mode == (mode_t) -1)
mode = inode->i_mode;
newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- error = notify_change(nd.path.dentry, &newattrs);
+ error = notify_change(path.dentry, &newattrs);
mutex_unlock(&inode->i_mutex);
-
-out_drop_write:
- mnt_drop_write(nd.path.mnt);
+ mnt_drop_write(path.mnt);
dput_and_out:
- path_put(&nd.path);
+ path_put(&path);
out:
return error;
}
@@ -652,18 +651,10 @@ asmlinkage long sys_chmod(const char __user *filename, mode_t mode)
static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
{
- struct inode * inode;
+ struct inode *inode = dentry->d_inode;
int error;
struct iattr newattrs;
- error = -ENOENT;
- if (!(inode = dentry->d_inode)) {
- printk(KERN_ERR "chown_common: NULL inode\n");
- goto out;
- }
- error = -EPERM;
- if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- goto out;
newattrs.ia_valid = ATTR_CTIME;
if (user != (uid_t) -1) {
newattrs.ia_valid |= ATTR_UID;
@@ -679,25 +670,25 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
mutex_lock(&inode->i_mutex);
error = notify_change(dentry, &newattrs);
mutex_unlock(&inode->i_mutex);
-out:
+
return error;
}
asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
{
- struct nameidata nd;
+ struct path path;
int error;
- error = user_path_walk(filename, &nd);
+ error = user_path(filename, &path);
if (error)
goto out;
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(path.mnt);
if (error)
goto out_release;
- error = chown_common(nd.path.dentry, user, group);
- mnt_drop_write(nd.path.mnt);
+ error = chown_common(path.dentry, user, group);
+ mnt_drop_write(path.mnt);
out_release:
- path_put(&nd.path);
+ path_put(&path);
out:
return error;
}
@@ -705,7 +696,7 @@ out:
asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
gid_t group, int flag)
{
- struct nameidata nd;
+ struct path path;
int error = -EINVAL;
int follow;
@@ -713,35 +704,35 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
goto out;
follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
- error = __user_walk_fd(dfd, filename, follow, &nd);
+ error = user_path_at(dfd, filename, follow, &path);
if (error)
goto out;
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(path.mnt);
if (error)
goto out_release;
- error = chown_common(nd.path.dentry, user, group);
- mnt_drop_write(nd.path.mnt);
+ error = chown_common(path.dentry, user, group);
+ mnt_drop_write(path.mnt);
out_release:
- path_put(&nd.path);
+ path_put(&path);
out:
return error;
}
asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group)
{
- struct nameidata nd;
+ struct path path;
int error;
- error = user_path_walk_link(filename, &nd);
+ error = user_lpath(filename, &path);
if (error)
goto out;
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(path.mnt);
if (error)
goto out_release;
- error = chown_common(nd.path.dentry, user, group);
- mnt_drop_write(nd.path.mnt);
+ error = chown_common(path.dentry, user, group);
+ mnt_drop_write(path.mnt);
out_release:
- path_put(&nd.path);
+ path_put(&path);
out:
return error;
}
@@ -972,71 +963,6 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
}
EXPORT_SYMBOL(dentry_open);
-/*
- * Find an empty file descriptor entry, and mark it busy.
- */
-int get_unused_fd_flags(int flags)
-{
- struct files_struct * files = current->files;
- int fd, error;
- struct fdtable *fdt;
-
- error = -EMFILE;
- spin_lock(&files->file_lock);
-
-repeat:
- fdt = files_fdtable(files);
- fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
- files->next_fd);
-
- /*
- * N.B. For clone tasks sharing a files structure, this test
- * will limit the total number of files that can be opened.
- */
- if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
- goto out;
-
- /* Do we need to expand the fd array or fd set? */
- error = expand_files(files, fd);
- if (error < 0)
- goto out;
-
- if (error) {
- /*
- * If we needed to expand the fs array we
- * might have blocked - try again.
- */
- error = -EMFILE;
- goto repeat;
- }
-
- FD_SET(fd, fdt->open_fds);
- if (flags & O_CLOEXEC)
- FD_SET(fd, fdt->close_on_exec);
- else
- FD_CLR(fd, fdt->close_on_exec);
- files->next_fd = fd + 1;
-#if 1
- /* Sanity check */
- if (fdt->fd[fd] != NULL) {
- printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
- fdt->fd[fd] = NULL;
- }
-#endif
- error = fd;
-
-out:
- spin_unlock(&files->file_lock);
- return error;
-}
-
-int get_unused_fd(void)
-{
- return get_unused_fd_flags(0);
-}
-
-EXPORT_SYMBOL(get_unused_fd);
-
static void __put_unused_fd(struct files_struct *files, unsigned int fd)
{
struct fdtable *fdt = files_fdtable(files);
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index d17b4fd204e..9f5b054f06b 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -430,7 +430,7 @@ static struct file_system_type openprom_fs_type = {
.kill_sb = kill_anon_super,
};
-static void op_inode_init_once(struct kmem_cache * cachep, void *data)
+static void op_inode_init_once(void *data)
{
struct op_inode_info *oi = (struct op_inode_info *) data;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6149e4b58c8..7d6b34e201d 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -344,18 +344,18 @@ static ssize_t whole_disk_show(struct device *dev,
static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
whole_disk_show, NULL);
-void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
+int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
{
struct hd_struct *p;
int err;
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
- return;
+ return -ENOMEM;
if (!init_part_stats(p)) {
- kfree(p);
- return;
+ err = -ENOMEM;
+ goto out0;
}
p->start_sect = start;
p->nr_sects = len;
@@ -378,15 +378,31 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
/* delay uevent until 'holders' subdir is created */
p->dev.uevent_suppress = 1;
- device_add(&p->dev);
+ err = device_add(&p->dev);
+ if (err)
+ goto out1;
partition_sysfs_add_subdir(p);
p->dev.uevent_suppress = 0;
- if (flags & ADDPART_FLAG_WHOLEDISK)
+ if (flags & ADDPART_FLAG_WHOLEDISK) {
err = device_create_file(&p->dev, &dev_attr_whole_disk);
+ if (err)
+ goto out2;
+ }
/* suppress uevent if the disk supresses it */
if (!disk->dev.uevent_suppress)
kobject_uevent(&p->dev.kobj, KOBJ_ADD);
+
+ return 0;
+
+out2:
+ device_del(&p->dev);
+out1:
+ put_device(&p->dev);
+ free_part_stats(p);
+out0:
+ kfree(p);
+ return err;
}
/* Not exported, helper to add_disk(). */
@@ -401,7 +417,7 @@ void register_disk(struct gendisk *disk)
disk->dev.parent = disk->driverfs_dev;
disk->dev.devt = MKDEV(disk->major, disk->first_minor);
- strlcpy(disk->dev.bus_id, disk->disk_name, KOBJ_NAME_LEN);
+ strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE);
/* ewww... some of these buggers have / in the name... */
s = strchr(disk->dev.bus_id, '/');
if (s)
@@ -483,10 +499,16 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
if (!size)
continue;
if (from + size > get_capacity(disk)) {
- printk(" %s: p%d exceeds device capacity\n",
+ printk(KERN_ERR " %s: p%d exceeds device capacity\n",
disk->disk_name, p);
+ continue;
+ }
+ res = add_partition(disk, p, from, size, state->parts[p].flags);
+ if (res) {
+ printk(KERN_ERR " %s: p%d could not be added: %d\n",
+ disk->disk_name, p, -res);
+ continue;
}
- add_partition(disk, p, from, size, state->parts[p].flags);
#ifdef CONFIG_BLK_DEV_MD
if (state->parts[p].flags & ADDPART_FLAG_RAID)
md_autodetect_dev(bdev->bd_dev+p);
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index e7b07006bc4..038a6022152 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -95,13 +95,6 @@
#include "check.h"
#include "efi.h"
-#undef EFI_DEBUG
-#ifdef EFI_DEBUG
-#define Dprintk(x...) printk(KERN_DEBUG x)
-#else
-#define Dprintk(x...)
-#endif
-
/* This allows a kernel command line option 'gpt' to override
* the test for invalid PMBR. Not __initdata because reloading
* the partition tables happens after init too.
@@ -305,10 +298,10 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
/* Check the GUID Partition Table signature */
if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) {
- Dprintk("GUID Partition Table Header signature is wrong:"
- "%lld != %lld\n",
- (unsigned long long)le64_to_cpu((*gpt)->signature),
- (unsigned long long)GPT_HEADER_SIGNATURE);
+ pr_debug("GUID Partition Table Header signature is wrong:"
+ "%lld != %lld\n",
+ (unsigned long long)le64_to_cpu((*gpt)->signature),
+ (unsigned long long)GPT_HEADER_SIGNATURE);
goto fail;
}
@@ -318,9 +311,8 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size));
if (crc != origcrc) {
- Dprintk
- ("GUID Partition Table Header CRC is wrong: %x != %x\n",
- crc, origcrc);
+ pr_debug("GUID Partition Table Header CRC is wrong: %x != %x\n",
+ crc, origcrc);
goto fail;
}
(*gpt)->header_crc32 = cpu_to_le32(origcrc);
@@ -328,9 +320,9 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
/* Check that the my_lba entry points to the LBA that contains
* the GUID Partition Table */
if (le64_to_cpu((*gpt)->my_lba) != lba) {
- Dprintk("GPT my_lba incorrect: %lld != %lld\n",
- (unsigned long long)le64_to_cpu((*gpt)->my_lba),
- (unsigned long long)lba);
+ pr_debug("GPT my_lba incorrect: %lld != %lld\n",
+ (unsigned long long)le64_to_cpu((*gpt)->my_lba),
+ (unsigned long long)lba);
goto fail;
}
@@ -339,15 +331,15 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
*/
lastlba = last_lba(bdev);
if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) {
- Dprintk("GPT: first_usable_lba incorrect: %lld > %lld\n",
- (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
- (unsigned long long)lastlba);
+ pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n",
+ (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
+ (unsigned long long)lastlba);
goto fail;
}
if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) {
- Dprintk("GPT: last_usable_lba incorrect: %lld > %lld\n",
- (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
- (unsigned long long)lastlba);
+ pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n",
+ (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
+ (unsigned long long)lastlba);
goto fail;
}
@@ -360,7 +352,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
le32_to_cpu((*gpt)->sizeof_partition_entry));
if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
- Dprintk("GUID Partitition Entry Array CRC check failed.\n");
+ pr_debug("GUID Partitition Entry Array CRC check failed.\n");
goto fail_ptes;
}
@@ -616,7 +608,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev)
return 0;
}
- Dprintk("GUID Partition Table is valid! Yea!\n");
+ pr_debug("GUID Partition Table is valid! Yea!\n");
for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
if (!is_pte_valid(&ptes[i], last_lba(bdev)))
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 0fdda2e8a4c..8652fb99e96 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -133,17 +133,17 @@ static bool ldm_parse_privhead(const u8 *data, struct privhead *ph)
bool is_vista = false;
BUG_ON(!data || !ph);
- if (MAGIC_PRIVHEAD != BE64(data)) {
+ if (MAGIC_PRIVHEAD != get_unaligned_be64(data)) {
ldm_error("Cannot find PRIVHEAD structure. LDM database is"
" corrupt. Aborting.");
return false;
}
- ph->ver_major = BE16(data + 0x000C);
- ph->ver_minor = BE16(data + 0x000E);
- ph->logical_disk_start = BE64(data + 0x011B);
- ph->logical_disk_size = BE64(data + 0x0123);
- ph->config_start = BE64(data + 0x012B);
- ph->config_size = BE64(data + 0x0133);
+ ph->ver_major = get_unaligned_be16(data + 0x000C);
+ ph->ver_minor = get_unaligned_be16(data + 0x000E);
+ ph->logical_disk_start = get_unaligned_be64(data + 0x011B);
+ ph->logical_disk_size = get_unaligned_be64(data + 0x0123);
+ ph->config_start = get_unaligned_be64(data + 0x012B);
+ ph->config_size = get_unaligned_be64(data + 0x0133);
/* Version 2.11 is Win2k/XP and version 2.12 is Vista. */
if (ph->ver_major == 2 && ph->ver_minor == 12)
is_vista = true;
@@ -191,14 +191,14 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
{
BUG_ON (!data || !toc);
- if (MAGIC_TOCBLOCK != BE64 (data)) {
+ if (MAGIC_TOCBLOCK != get_unaligned_be64(data)) {
ldm_crit ("Cannot find TOCBLOCK, database may be corrupt.");
return false;
}
strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name));
toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0;
- toc->bitmap1_start = BE64 (data + 0x2E);
- toc->bitmap1_size = BE64 (data + 0x36);
+ toc->bitmap1_start = get_unaligned_be64(data + 0x2E);
+ toc->bitmap1_size = get_unaligned_be64(data + 0x36);
if (strncmp (toc->bitmap1_name, TOC_BITMAP1,
sizeof (toc->bitmap1_name)) != 0) {
@@ -208,8 +208,8 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
}
strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name));
toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0;
- toc->bitmap2_start = BE64 (data + 0x50);
- toc->bitmap2_size = BE64 (data + 0x58);
+ toc->bitmap2_start = get_unaligned_be64(data + 0x50);
+ toc->bitmap2_size = get_unaligned_be64(data + 0x58);
if (strncmp (toc->bitmap2_name, TOC_BITMAP2,
sizeof (toc->bitmap2_name)) != 0) {
ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.",
@@ -237,22 +237,22 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
{
BUG_ON (!data || !vm);
- if (MAGIC_VMDB != BE32 (data)) {
+ if (MAGIC_VMDB != get_unaligned_be32(data)) {
ldm_crit ("Cannot find the VMDB, database may be corrupt.");
return false;
}
- vm->ver_major = BE16 (data + 0x12);
- vm->ver_minor = BE16 (data + 0x14);
+ vm->ver_major = get_unaligned_be16(data + 0x12);
+ vm->ver_minor = get_unaligned_be16(data + 0x14);
if ((vm->ver_major != 4) || (vm->ver_minor != 10)) {
ldm_error ("Expected VMDB version %d.%d, got %d.%d. "
"Aborting.", 4, 10, vm->ver_major, vm->ver_minor);
return false;
}
- vm->vblk_size = BE32 (data + 0x08);
- vm->vblk_offset = BE32 (data + 0x0C);
- vm->last_vblk_seq = BE32 (data + 0x04);
+ vm->vblk_size = get_unaligned_be32(data + 0x08);
+ vm->vblk_offset = get_unaligned_be32(data + 0x0C);
+ vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
ldm_debug ("Parsed VMDB successfully.");
return true;
@@ -507,7 +507,7 @@ static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base,
goto out; /* Already logged */
/* Are there uncommitted transactions? */
- if (BE16(data + 0x10) != 0x01) {
+ if (get_unaligned_be16(data + 0x10) != 0x01) {
ldm_crit ("Database is not in a consistent state. Aborting.");
goto out;
}
@@ -802,7 +802,7 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb)
return false;
len += VBLK_SIZE_CMP3;
- if (len != BE32 (buffer + 0x14))
+ if (len != get_unaligned_be32(buffer + 0x14))
return false;
comp = &vb->vblk.comp;
@@ -851,7 +851,7 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb)
return false;
len += VBLK_SIZE_DGR3;
- if (len != BE32 (buffer + 0x14))
+ if (len != get_unaligned_be32(buffer + 0x14))
return false;
dgrp = &vb->vblk.dgrp;
@@ -895,7 +895,7 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb)
return false;
len += VBLK_SIZE_DGR4;
- if (len != BE32 (buffer + 0x14))
+ if (len != get_unaligned_be32(buffer + 0x14))
return false;
dgrp = &vb->vblk.dgrp;
@@ -931,7 +931,7 @@ static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb)
return false;
len += VBLK_SIZE_DSK3;
- if (len != BE32 (buffer + 0x14))
+ if (len != get_unaligned_be32(buffer + 0x14))
return false;
disk = &vb->vblk.disk;
@@ -968,7 +968,7 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb)
return false;
len += VBLK_SIZE_DSK4;
- if (len != BE32 (buffer + 0x14))
+ if (len != get_unaligned_be32(buffer + 0x14))
return false;
disk = &vb->vblk.disk;
@@ -1034,14 +1034,14 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
return false;
}
len += VBLK_SIZE_PRT3;
- if (len > BE32(buffer + 0x14)) {
+ if (len > get_unaligned_be32(buffer + 0x14)) {
ldm_error("len %d > BE32(buffer + 0x14) %d", len,
- BE32(buffer + 0x14));
+ get_unaligned_be32(buffer + 0x14));
return false;
}
part = &vb->vblk.part;
- part->start = BE64(buffer + 0x24 + r_name);
- part->volume_offset = BE64(buffer + 0x2C + r_name);
+ part->start = get_unaligned_be64(buffer + 0x24 + r_name);
+ part->volume_offset = get_unaligned_be64(buffer + 0x2C + r_name);
part->size = ldm_get_vnum(buffer + 0x34 + r_name);
part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size);
part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent);
@@ -1139,9 +1139,9 @@ static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb)
return false;
}
len += VBLK_SIZE_VOL5;
- if (len > BE32(buffer + 0x14)) {
+ if (len > get_unaligned_be32(buffer + 0x14)) {
ldm_error("len %d > BE32(buffer + 0x14) %d", len,
- BE32(buffer + 0x14));
+ get_unaligned_be32(buffer + 0x14));
return false;
}
volu = &vb->vblk.volu;
@@ -1294,9 +1294,9 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
BUG_ON (!data || !frags);
- group = BE32 (data + 0x08);
- rec = BE16 (data + 0x0C);
- num = BE16 (data + 0x0E);
+ group = get_unaligned_be32(data + 0x08);
+ rec = get_unaligned_be16(data + 0x0C);
+ num = get_unaligned_be16(data + 0x0E);
if ((num < 1) || (num > 4)) {
ldm_error ("A VBLK claims to have %d parts.", num);
return false;
@@ -1425,12 +1425,12 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base,
}
for (v = 0; v < perbuf; v++, data+=size) { /* For each vblk */
- if (MAGIC_VBLK != BE32 (data)) {
+ if (MAGIC_VBLK != get_unaligned_be32(data)) {
ldm_error ("Expected to find a VBLK.");
goto out;
}
- recs = BE16 (data + 0x0E); /* Number of records */
+ recs = get_unaligned_be16(data + 0x0E); /* Number of records */
if (recs == 1) {
if (!ldm_ldmdb_add (data, size, ldb))
goto out; /* Already logged */
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h
index 80f63b5fdd9..30e08e809c1 100644
--- a/fs/partitions/ldm.h
+++ b/fs/partitions/ldm.h
@@ -98,11 +98,6 @@ struct parsed_partitions;
#define TOC_BITMAP1 "config" /* Names of the two defined */
#define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */
-/* Most numbers we deal with are big-endian and won't be aligned. */
-#define BE16(x) ((u16)be16_to_cpu(get_unaligned((__be16*)(x))))
-#define BE32(x) ((u32)be32_to_cpu(get_unaligned((__be32*)(x))))
-#define BE64(x) ((u64)be64_to_cpu(get_unaligned((__be64*)(x))))
-
/* Borrowed from msdos.c */
#define SYS_IND(p) (get_unaligned(&(p)->sys_ind))
diff --git a/fs/pipe.c b/fs/pipe.c
index 700f4e0d957..fcba6542b8d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -777,45 +777,10 @@ pipe_rdwr_open(struct inode *inode, struct file *filp)
/*
* The file_operations structs are not static because they
* are also used in linux/fs/fifo.c to do operations on FIFOs.
+ *
+ * Pipes reuse fifos' file_operations structs.
*/
-const struct file_operations read_fifo_fops = {
- .llseek = no_llseek,
- .read = do_sync_read,
- .aio_read = pipe_read,
- .write = bad_pipe_w,
- .poll = pipe_poll,
- .unlocked_ioctl = pipe_ioctl,
- .open = pipe_read_open,
- .release = pipe_read_release,
- .fasync = pipe_read_fasync,
-};
-
-const struct file_operations write_fifo_fops = {
- .llseek = no_llseek,
- .read = bad_pipe_r,
- .write = do_sync_write,
- .aio_write = pipe_write,
- .poll = pipe_poll,
- .unlocked_ioctl = pipe_ioctl,
- .open = pipe_write_open,
- .release = pipe_write_release,
- .fasync = pipe_write_fasync,
-};
-
-const struct file_operations rdwr_fifo_fops = {
- .llseek = no_llseek,
- .read = do_sync_read,
- .aio_read = pipe_read,
- .write = do_sync_write,
- .aio_write = pipe_write,
- .poll = pipe_poll,
- .unlocked_ioctl = pipe_ioctl,
- .open = pipe_rdwr_open,
- .release = pipe_rdwr_release,
- .fasync = pipe_rdwr_fasync,
-};
-
-static const struct file_operations read_pipe_fops = {
+const struct file_operations read_pipefifo_fops = {
.llseek = no_llseek,
.read = do_sync_read,
.aio_read = pipe_read,
@@ -827,7 +792,7 @@ static const struct file_operations read_pipe_fops = {
.fasync = pipe_read_fasync,
};
-static const struct file_operations write_pipe_fops = {
+const struct file_operations write_pipefifo_fops = {
.llseek = no_llseek,
.read = bad_pipe_r,
.write = do_sync_write,
@@ -839,7 +804,7 @@ static const struct file_operations write_pipe_fops = {
.fasync = pipe_write_fasync,
};
-static const struct file_operations rdwr_pipe_fops = {
+const struct file_operations rdwr_pipefifo_fops = {
.llseek = no_llseek,
.read = do_sync_read,
.aio_read = pipe_read,
@@ -927,7 +892,7 @@ static struct inode * get_pipe_inode(void)
inode->i_pipe = pipe;
pipe->readers = pipe->writers = 1;
- inode->i_fop = &rdwr_pipe_fops;
+ inode->i_fop = &rdwr_pipefifo_fops;
/*
* Mark the inode dirty from the very beginning,
@@ -950,7 +915,7 @@ fail_inode:
return NULL;
}
-struct file *create_write_pipe(void)
+struct file *create_write_pipe(int flags)
{
int err;
struct inode *inode;
@@ -978,12 +943,12 @@ struct file *create_write_pipe(void)
d_instantiate(dentry, inode);
err = -ENFILE;
- f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipe_fops);
+ f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops);
if (!f)
goto err_dentry;
f->f_mapping = inode->i_mapping;
- f->f_flags = O_WRONLY;
+ f->f_flags = O_WRONLY | (flags & O_NONBLOCK);
f->f_version = 0;
return f;
@@ -1007,7 +972,7 @@ void free_write_pipe(struct file *f)
put_filp(f);
}
-struct file *create_read_pipe(struct file *wrf)
+struct file *create_read_pipe(struct file *wrf, int flags)
{
struct file *f = get_empty_filp();
if (!f)
@@ -1019,34 +984,37 @@ struct file *create_read_pipe(struct file *wrf)
f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
f->f_pos = 0;
- f->f_flags = O_RDONLY;
- f->f_op = &read_pipe_fops;
+ f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
+ f->f_op = &read_pipefifo_fops;
f->f_mode = FMODE_READ;
f->f_version = 0;
return f;
}
-int do_pipe(int *fd)
+int do_pipe_flags(int *fd, int flags)
{
struct file *fw, *fr;
int error;
int fdw, fdr;
- fw = create_write_pipe();
+ if (flags & ~(O_CLOEXEC | O_NONBLOCK))
+ return -EINVAL;
+
+ fw = create_write_pipe(flags);
if (IS_ERR(fw))
return PTR_ERR(fw);
- fr = create_read_pipe(fw);
+ fr = create_read_pipe(fw, flags);
error = PTR_ERR(fr);
if (IS_ERR(fr))
goto err_write_pipe;
- error = get_unused_fd();
+ error = get_unused_fd_flags(flags);
if (error < 0)
goto err_read_pipe;
fdr = error;
- error = get_unused_fd();
+ error = get_unused_fd_flags(flags);
if (error < 0)
goto err_fdr;
fdw = error;
@@ -1074,16 +1042,21 @@ int do_pipe(int *fd)
return error;
}
+int do_pipe(int *fd)
+{
+ return do_pipe_flags(fd, 0);
+}
+
/*
* sys_pipe() is the normal C calling standard for creating
* a pipe. It's not the way Unix traditionally does this, though.
*/
-asmlinkage long __weak sys_pipe(int __user *fildes)
+asmlinkage long __weak sys_pipe2(int __user *fildes, int flags)
{
int fd[2];
int error;
- error = do_pipe(fd);
+ error = do_pipe_flags(fd, flags);
if (!error) {
if (copy_to_user(fildes, fd, sizeof(fd))) {
sys_close(fd[0]);
@@ -1094,6 +1067,11 @@ asmlinkage long __weak sys_pipe(int __user *fildes)
return error;
}
+asmlinkage long __weak sys_pipe(int __user *fildes)
+{
+ return sys_pipe2(fildes, 0);
+}
+
/*
* pipefs should _never_ be mounted by userland - too much of security hassle,
* no real gain from having the whole whorehouse mounted. So we don't need
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
new file mode 100644
index 00000000000..73cd7a418f0
--- /dev/null
+++ b/fs/proc/Kconfig
@@ -0,0 +1,59 @@
+config PROC_FS
+ bool "/proc file system support" if EMBEDDED
+ default y
+ help
+ This is a virtual file system providing information about the status
+ of the system. "Virtual" means that it doesn't take up any space on
+ your hard disk: the files are created on the fly by the kernel when
+ you try to access them. Also, you cannot read the files with older
+ version of the program less: you need to use more or cat.
+
+ It's totally cool; for example, "cat /proc/interrupts" gives
+ information about what the different IRQs are used for at the moment
+ (there is a small number of Interrupt ReQuest lines in your computer
+ that are used by the attached devices to gain the CPU's attention --
+ often a source of trouble if two devices are mistakenly configured
+ to use the same IRQ). The program procinfo to display some
+ information about your system gathered from the /proc file system.
+
+ Before you can use the /proc file system, it has to be mounted,
+ meaning it has to be given a location in the directory hierarchy.
+ That location should be /proc. A command such as "mount -t proc proc
+ /proc" or the equivalent line in /etc/fstab does the job.
+
+ The /proc file system is explained in the file
+ <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
+ ("man 5 proc").
+
+ This option will enlarge your kernel by about 67 KB. Several
+ programs depend on this, so everyone should say Y here.
+
+config PROC_KCORE
+ bool "/proc/kcore support" if !ARM
+ depends on PROC_FS && MMU
+
+config PROC_VMCORE
+ bool "/proc/vmcore support (EXPERIMENTAL)"
+ depends on PROC_FS && CRASH_DUMP
+ default y
+ help
+ Exports the dump image of crashed kernel in ELF format.
+
+config PROC_SYSCTL
+ bool "Sysctl support (/proc/sys)" if EMBEDDED
+ depends on PROC_FS
+ select SYSCTL
+ default y
+ ---help---
+ The sysctl interface provides a means of dynamically changing
+ certain kernel parameters and variables on the fly without requiring
+ a recompile of the kernel or reboot of the system. The primary
+ interface is through /proc/sys. If you say Y here a tree of
+ modifiable sysctl entries will be generated beneath the
+ /proc/sys directory. They are explained in the files
+ in <file:Documentation/sysctl/>. Note that enabling this
+ option will enlarge the kernel by at least 8 KB.
+
+ As it is generally a good thing, you should say Y here unless
+ building a kernel for install/rescue disks or your system is very
+ limited in memory.
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 797d775e035..0d6eb33597c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -80,6 +80,7 @@
#include <linux/delayacct.h>
#include <linux/seq_file.h>
#include <linux/pid_namespace.h>
+#include <linux/tracehook.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
@@ -168,8 +169,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
rcu_read_lock();
ppid = pid_alive(p) ?
task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
- tpid = pid_alive(p) && p->ptrace ?
- task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0;
+ tpid = 0;
+ if (pid_alive(p)) {
+ struct task_struct *tracer = tracehook_tracer_task(p);
+ if (tracer)
+ tpid = task_pid_nr_ns(tracer, ns);
+ }
seq_printf(m,
"State:\t%s\n"
"Tgid:\t%d\n"
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 58c3e6a8e15..a28840b11b8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -53,6 +53,7 @@
#include <linux/time.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
+#include <linux/task_io_accounting_ops.h>
#include <linux/init.h>
#include <linux/capability.h>
#include <linux/file.h>
@@ -69,6 +70,7 @@
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/ptrace.h>
+#include <linux/tracehook.h>
#include <linux/cgroup.h>
#include <linux/cpuset.h>
#include <linux/audit.h>
@@ -231,10 +233,14 @@ static int check_mem_permission(struct task_struct *task)
* If current is actively ptrace'ing, and would also be
* permitted to freshly attach with ptrace now, permit it.
*/
- if (task->parent == current && (task->ptrace & PT_PTRACED) &&
- task_is_stopped_or_traced(task) &&
- ptrace_may_access(task, PTRACE_MODE_ATTACH))
- return 0;
+ if (task_is_stopped_or_traced(task)) {
+ int match;
+ rcu_read_lock();
+ match = (tracehook_tracer_task(task) == current);
+ rcu_read_unlock();
+ if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
+ return 0;
+ }
/*
* Noone else is allowed.
@@ -504,6 +510,26 @@ static int proc_pid_limits(struct task_struct *task, char *buffer)
return count;
}
+#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
+static int proc_pid_syscall(struct task_struct *task, char *buffer)
+{
+ long nr;
+ unsigned long args[6], sp, pc;
+
+ if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
+ return sprintf(buffer, "running\n");
+
+ if (nr < 0)
+ return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
+
+ return sprintf(buffer,
+ "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
+ nr,
+ args[0], args[1], args[2], args[3], args[4], args[5],
+ sp, pc);
+}
+#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
+
/************************************************************************/
/* Here the fs part begins */
/************************************************************************/
@@ -1834,8 +1860,7 @@ static const struct file_operations proc_fd_operations = {
* /proc/pid/fd needs a special permission handler so that a process can still
* access /proc/self/fd after it has executed a setuid().
*/
-static int proc_fd_permission(struct inode *inode, int mask,
- struct nameidata *nd)
+static int proc_fd_permission(struct inode *inode, int mask)
{
int rv;
@@ -2376,29 +2401,47 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
}
#ifdef CONFIG_TASK_IO_ACCOUNTING
-static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
+static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
{
+ struct task_io_accounting acct = task->ioac;
+ unsigned long flags;
+
+ if (whole && lock_task_sighand(task, &flags)) {
+ struct task_struct *t = task;
+
+ task_io_accounting_add(&acct, &task->signal->ioac);
+ while_each_thread(task, t)
+ task_io_accounting_add(&acct, &t->ioac);
+
+ unlock_task_sighand(task, &flags);
+ }
return sprintf(buffer,
-#ifdef CONFIG_TASK_XACCT
"rchar: %llu\n"
"wchar: %llu\n"
"syscr: %llu\n"
"syscw: %llu\n"
-#endif
"read_bytes: %llu\n"
"write_bytes: %llu\n"
"cancelled_write_bytes: %llu\n",
-#ifdef CONFIG_TASK_XACCT
- (unsigned long long)task->rchar,
- (unsigned long long)task->wchar,
- (unsigned long long)task->syscr,
- (unsigned long long)task->syscw,
-#endif
- (unsigned long long)task->ioac.read_bytes,
- (unsigned long long)task->ioac.write_bytes,
- (unsigned long long)task->ioac.cancelled_write_bytes);
+ (unsigned long long)acct.rchar,
+ (unsigned long long)acct.wchar,
+ (unsigned long long)acct.syscr,
+ (unsigned long long)acct.syscw,
+ (unsigned long long)acct.read_bytes,
+ (unsigned long long)acct.write_bytes,
+ (unsigned long long)acct.cancelled_write_bytes);
+}
+
+static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
+{
+ return do_io_accounting(task, buffer, 0);
}
-#endif
+
+static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
+{
+ return do_io_accounting(task, buffer, 1);
+}
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
/*
* Thread groups
@@ -2420,6 +2463,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, pid_sched),
#endif
+#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
+ INF("syscall", S_IRUSR, pid_syscall),
+#endif
INF("cmdline", S_IRUGO, pid_cmdline),
ONE("stat", S_IRUGO, tgid_stat),
ONE("statm", S_IRUGO, pid_statm),
@@ -2470,7 +2516,7 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
- INF("io", S_IRUGO, pid_io_accounting),
+ INF("io", S_IRUGO, tgid_io_accounting),
#endif
};
@@ -2752,6 +2798,9 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, pid_sched),
#endif
+#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
+ INF("syscall", S_IRUSR, pid_syscall),
+#endif
INF("cmdline", S_IRUGO, pid_cmdline),
ONE("stat", S_IRUGO, tid_stat),
ONE("statm", S_IRUGO, pid_statm),
@@ -2797,6 +2846,9 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
#endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+ INF("io", S_IRUGO, tid_io_accounting),
+#endif
};
static int proc_tid_base_readdir(struct file * filp,
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 43e54e86cef..4fb81e9c94e 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -300,10 +300,10 @@ out:
return rtn;
}
-static DEFINE_IDR(proc_inum_idr);
+static DEFINE_IDA(proc_inum_ida);
static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
-#define PROC_DYNAMIC_FIRST 0xF0000000UL
+#define PROC_DYNAMIC_FIRST 0xF0000000U
/*
* Return an inode number between PROC_DYNAMIC_FIRST and
@@ -311,36 +311,33 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
*/
static unsigned int get_inode_number(void)
{
- int i, inum = 0;
+ unsigned int i;
int error;
retry:
- if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0)
+ if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
return 0;
spin_lock(&proc_inum_lock);
- error = idr_get_new(&proc_inum_idr, NULL, &i);
+ error = ida_get_new(&proc_inum_ida, &i);
spin_unlock(&proc_inum_lock);
if (error == -EAGAIN)
goto retry;
else if (error)
return 0;
- inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST;
-
- /* inum will never be more than 0xf0ffffff, so no check
- * for overflow.
- */
-
- return inum;
+ if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
+ spin_lock(&proc_inum_lock);
+ ida_remove(&proc_inum_ida, i);
+ spin_unlock(&proc_inum_lock);
+ }
+ return PROC_DYNAMIC_FIRST + i;
}
static void release_inode_number(unsigned int inum)
{
- int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK;
-
spin_lock(&proc_inum_lock);
- idr_remove(&proc_inum_idr, id);
+ ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
spin_unlock(&proc_inum_lock);
}
@@ -597,6 +594,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
ent->pde_users = 0;
spin_lock_init(&ent->pde_unload_lock);
ent->pde_unload_completion = NULL;
+ INIT_LIST_HEAD(&ent->pde_openers);
out:
return ent;
}
@@ -789,15 +787,25 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
spin_unlock(&de->pde_unload_lock);
continue_removing:
+ spin_lock(&de->pde_unload_lock);
+ while (!list_empty(&de->pde_openers)) {
+ struct pde_opener *pdeo;
+
+ pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
+ list_del(&pdeo->lh);
+ spin_unlock(&de->pde_unload_lock);
+ pdeo->release(pdeo->inode, pdeo->file);
+ kfree(pdeo);
+ spin_lock(&de->pde_unload_lock);
+ }
+ spin_unlock(&de->pde_unload_lock);
+
if (S_ISDIR(de->mode))
parent->nlink--;
de->nlink = 0;
- if (de->subdir) {
- printk(KERN_WARNING "%s: removing non-empty directory "
+ WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory "
"'%s/%s', leaking at least '%s'\n", __func__,
de->parent->name, de->name, de->subdir->name);
- WARN_ON(1);
- }
if (atomic_dec_and_test(&de->count))
free_proc_entry(de);
}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b08d1001791..8bb03f056c2 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -17,6 +17,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/smp_lock.h>
+#include <linux/sysctl.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -65,6 +66,8 @@ static void proc_delete_inode(struct inode *inode)
module_put(de->owner);
de_put(de);
}
+ if (PROC_I(inode)->sysctl)
+ sysctl_head_put(PROC_I(inode)->sysctl);
clear_inode(inode);
}
@@ -84,6 +87,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
ei->fd = 0;
ei->op.proc_get_link = NULL;
ei->pde = NULL;
+ ei->sysctl = NULL;
+ ei->sysctl_entry = NULL;
inode = &ei->vfs_inode;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
return inode;
@@ -94,7 +99,7 @@ static void proc_destroy_inode(struct inode *inode)
kmem_cache_free(proc_inode_cachep, PROC_I(inode));
}
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
{
struct proc_inode *ei = (struct proc_inode *) foo;
@@ -111,27 +116,25 @@ int __init proc_init_inodecache(void)
return 0;
}
-static int proc_remount(struct super_block *sb, int *flags, char *data)
-{
- *flags |= MS_NODIRATIME;
- return 0;
-}
-
static const struct super_operations proc_sops = {
.alloc_inode = proc_alloc_inode,
.destroy_inode = proc_destroy_inode,
.drop_inode = generic_delete_inode,
.delete_inode = proc_delete_inode,
.statfs = simple_statfs,
- .remount_fs = proc_remount,
};
-static void pde_users_dec(struct proc_dir_entry *pde)
+static void __pde_users_dec(struct proc_dir_entry *pde)
{
- spin_lock(&pde->pde_unload_lock);
pde->pde_users--;
if (pde->pde_unload_completion && pde->pde_users == 0)
complete(pde->pde_unload_completion);
+}
+
+static void pde_users_dec(struct proc_dir_entry *pde)
+{
+ spin_lock(&pde->pde_unload_lock);
+ __pde_users_dec(pde);
spin_unlock(&pde->pde_unload_lock);
}
@@ -318,36 +321,97 @@ static int proc_reg_open(struct inode *inode, struct file *file)
struct proc_dir_entry *pde = PDE(inode);
int rv = 0;
int (*open)(struct inode *, struct file *);
+ int (*release)(struct inode *, struct file *);
+ struct pde_opener *pdeo;
+
+ /*
+ * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
+ * sequence. ->release won't be called because ->proc_fops will be
+ * cleared. Depending on complexity of ->release, consequences vary.
+ *
+ * We can't wait for mercy when close will be done for real, it's
+ * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
+ * by hand in remove_proc_entry(). For this, save opener's credentials
+ * for later.
+ */
+ pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
+ if (!pdeo)
+ return -ENOMEM;
spin_lock(&pde->pde_unload_lock);
if (!pde->proc_fops) {
spin_unlock(&pde->pde_unload_lock);
+ kfree(pdeo);
return rv;
}
pde->pde_users++;
open = pde->proc_fops->open;
+ release = pde->proc_fops->release;
spin_unlock(&pde->pde_unload_lock);
if (open)
rv = open(inode, file);
- pde_users_dec(pde);
+ spin_lock(&pde->pde_unload_lock);
+ if (rv == 0 && release) {
+ /* To know what to release. */
+ pdeo->inode = inode;
+ pdeo->file = file;
+ /* Strictly for "too late" ->release in proc_reg_release(). */
+ pdeo->release = release;
+ list_add(&pdeo->lh, &pde->pde_openers);
+ } else
+ kfree(pdeo);
+ __pde_users_dec(pde);
+ spin_unlock(&pde->pde_unload_lock);
return rv;
}
+static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
+ struct inode *inode, struct file *file)
+{
+ struct pde_opener *pdeo;
+
+ list_for_each_entry(pdeo, &pde->pde_openers, lh) {
+ if (pdeo->inode == inode && pdeo->file == file)
+ return pdeo;
+ }
+ return NULL;
+}
+
static int proc_reg_release(struct inode *inode, struct file *file)
{
struct proc_dir_entry *pde = PDE(inode);
int rv = 0;
int (*release)(struct inode *, struct file *);
+ struct pde_opener *pdeo;
spin_lock(&pde->pde_unload_lock);
+ pdeo = find_pde_opener(pde, inode, file);
if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
+ /*
+ * Can't simply exit, __fput() will think that everything is OK,
+ * and move on to freeing struct file. remove_proc_entry() will
+ * find slacker in opener's list and will try to do non-trivial
+ * things with struct file. Therefore, remove opener from list.
+ *
+ * But if opener is removed from list, who will ->release it?
+ */
+ if (pdeo) {
+ list_del(&pdeo->lh);
+ spin_unlock(&pde->pde_unload_lock);
+ rv = pdeo->release(inode, file);
+ kfree(pdeo);
+ } else
+ spin_unlock(&pde->pde_unload_lock);
return rv;
}
pde->pde_users++;
release = pde->proc_fops->release;
+ if (pdeo) {
+ list_del(&pdeo->lh);
+ kfree(pdeo);
+ }
spin_unlock(&pde->pde_unload_lock);
if (release)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 28cbca80590..442202314d5 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -63,6 +63,7 @@ extern const struct file_operations proc_smaps_operations;
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
extern const struct file_operations proc_net_operations;
+extern const struct file_operations proc_kmsg_operations;
extern const struct inode_operations proc_net_inode_operations;
void free_proc_entry(struct proc_dir_entry *de);
@@ -88,3 +89,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
struct dentry *dentry);
int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
filldir_t filldir);
+
+struct pde_opener {
+ struct inode *inode;
+ struct file *file;
+ int (*release)(struct inode *, struct file *);
+ struct list_head lh;
+};
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e78c81fcf54..c2370c76fb7 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -23,6 +23,10 @@
#define CORE_STR "CORE"
+#ifndef ELF_CORE_EFLAGS
+#define ELF_CORE_EFLAGS 0
+#endif
+
static int open_kcore(struct inode * inode, struct file * filp)
{
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
@@ -164,11 +168,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
elf->e_entry = 0;
elf->e_phoff = sizeof(struct elfhdr);
elf->e_shoff = 0;
-#if defined(CONFIG_H8300)
- elf->e_flags = ELF_FLAGS;
-#else
- elf->e_flags = 0;
-#endif
+ elf->e_flags = ELF_CORE_EFLAGS;
elf->e_ehsize = sizeof(struct elfhdr);
elf->e_phentsize= sizeof(struct elf_phdr);
elf->e_phnum = nphdr;
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index ff3b90b56e9..9fd5df3f40c 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -15,6 +15,8 @@
#include <asm/uaccess.h>
#include <asm/io.h>
+#include "internal.h"
+
extern wait_queue_head_t log_wait;
extern int do_syslog(int type, char __user *bug, int count);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index c652d469dc0..ded96986296 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -232,7 +232,6 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
#undef K
}
-extern const struct seq_operations fragmentation_op;
static int fragmentation_open(struct inode *inode, struct file *file)
{
(void)inode;
@@ -246,7 +245,6 @@ static const struct file_operations fragmentation_file_operations = {
.release = seq_release,
};
-extern const struct seq_operations pagetypeinfo_op;
static int pagetypeinfo_open(struct inode *inode, struct file *file)
{
return seq_open(file, &pagetypeinfo_op);
@@ -259,7 +257,6 @@ static const struct file_operations pagetypeinfo_file_ops = {
.release = seq_release,
};
-extern const struct seq_operations zoneinfo_op;
static int zoneinfo_open(struct inode *inode, struct file *file)
{
return seq_open(file, &zoneinfo_op);
@@ -356,7 +353,6 @@ static const struct file_operations proc_devinfo_operations = {
.release = seq_release,
};
-extern const struct seq_operations vmstat_op;
static int vmstat_open(struct inode *inode, struct file *file)
{
return seq_open(file, &vmstat_op);
@@ -468,14 +464,25 @@ static const struct file_operations proc_slabstats_operations = {
#ifdef CONFIG_MMU
static int vmalloc_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &vmalloc_op);
+ unsigned int *ptr = NULL;
+ int ret;
+
+ if (NUMA_BUILD)
+ ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
+ ret = seq_open(file, &vmalloc_op);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = ptr;
+ } else
+ kfree(ptr);
+ return ret;
}
static const struct file_operations proc_vmalloc_operations = {
.open = vmalloc_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_private,
};
#endif
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index b224a28e0c1..7bc296f424a 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -27,6 +27,11 @@
#include "internal.h"
+static struct net *get_proc_net(const struct inode *inode)
+{
+ return maybe_get_net(PDE_NET(PDE(inode)));
+}
+
int seq_open_net(struct inode *ino, struct file *f,
const struct seq_operations *ops, int size)
{
@@ -185,12 +190,6 @@ void proc_net_remove(struct net *net, const char *name)
}
EXPORT_SYMBOL_GPL(proc_net_remove);
-struct net *get_proc_net(const struct inode *inode)
-{
- return maybe_get_net(PDE_NET(PDE(inode)));
-}
-EXPORT_SYMBOL_GPL(get_proc_net);
-
static __net_init int proc_net_ns_init(struct net *net)
{
struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5acc001d49f..f9a8b892718 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -10,149 +10,110 @@
static struct dentry_operations proc_sys_dentry_operations;
static const struct file_operations proc_sys_file_operations;
static const struct inode_operations proc_sys_inode_operations;
+static const struct file_operations proc_sys_dir_file_operations;
+static const struct inode_operations proc_sys_dir_operations;
-static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table)
-{
- /* Refresh the cached information bits in the inode */
- if (table) {
- inode->i_uid = 0;
- inode->i_gid = 0;
- inode->i_mode = table->mode;
- if (table->proc_handler) {
- inode->i_mode |= S_IFREG;
- inode->i_nlink = 1;
- } else {
- inode->i_mode |= S_IFDIR;
- inode->i_nlink = 0; /* It is too hard to figure out */
- }
- }
-}
-
-static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table)
+static struct inode *proc_sys_make_inode(struct super_block *sb,
+ struct ctl_table_header *head, struct ctl_table *table)
{
struct inode *inode;
- struct proc_inode *dir_ei, *ei;
- int depth;
+ struct proc_inode *ei;
- inode = new_inode(dir->i_sb);
+ inode = new_inode(sb);
if (!inode)
goto out;
- /* A directory is always one deeper than it's parent */
- dir_ei = PROC_I(dir);
- depth = dir_ei->fd + 1;
-
+ sysctl_head_get(head);
ei = PROC_I(inode);
- ei->fd = depth;
+ ei->sysctl = head;
+ ei->sysctl_entry = table;
+
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
- inode->i_op = &proc_sys_inode_operations;
- inode->i_fop = &proc_sys_file_operations;
inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
- proc_sys_refresh_inode(inode, table);
+ inode->i_mode = table->mode;
+ if (!table->child) {
+ inode->i_mode |= S_IFREG;
+ inode->i_op = &proc_sys_inode_operations;
+ inode->i_fop = &proc_sys_file_operations;
+ } else {
+ inode->i_mode |= S_IFDIR;
+ inode->i_nlink = 0;
+ inode->i_op = &proc_sys_dir_operations;
+ inode->i_fop = &proc_sys_dir_file_operations;
+ }
out:
return inode;
}
-static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth)
-{
- for (;;) {
- struct proc_inode *ei;
-
- ei = PROC_I(dentry->d_inode);
- if (ei->fd == depth)
- break; /* found */
-
- dentry = dentry->d_parent;
- }
- return dentry;
-}
-
-static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table,
- struct qstr *name)
+static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
{
int len;
- for ( ; table->ctl_name || table->procname; table++) {
+ for ( ; p->ctl_name || p->procname; p++) {
- if (!table->procname)
+ if (!p->procname)
continue;
- len = strlen(table->procname);
+ len = strlen(p->procname);
if (len != name->len)
continue;
- if (memcmp(table->procname, name->name, len) != 0)
+ if (memcmp(p->procname, name->name, len) != 0)
continue;
/* I have a match */
- return table;
+ return p;
}
return NULL;
}
-static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry,
- struct ctl_table *table)
+struct ctl_table_header *grab_header(struct inode *inode)
{
- struct dentry *ancestor;
- struct proc_inode *ei;
- int depth, i;
+ if (PROC_I(inode)->sysctl)
+ return sysctl_head_grab(PROC_I(inode)->sysctl);
+ else
+ return sysctl_head_next(NULL);
+}
- ei = PROC_I(dentry->d_inode);
- depth = ei->fd;
+static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nd)
+{
+ struct ctl_table_header *head = grab_header(dir);
+ struct ctl_table *table = PROC_I(dir)->sysctl_entry;
+ struct ctl_table_header *h = NULL;
+ struct qstr *name = &dentry->d_name;
+ struct ctl_table *p;
+ struct inode *inode;
+ struct dentry *err = ERR_PTR(-ENOENT);
- if (depth == 0)
- return table;
+ if (IS_ERR(head))
+ return ERR_CAST(head);
- for (i = 1; table && (i <= depth); i++) {
- ancestor = proc_sys_ancestor(dentry, i);
- table = proc_sys_lookup_table_one(table, &ancestor->d_name);
- if (table)
- table = table->child;
+ if (table && !table->child) {
+ WARN_ON(1);
+ goto out;
}
- return table;
-
-}
-static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent,
- struct qstr *name,
- struct ctl_table *table)
-{
- table = proc_sys_lookup_table(dparent, table);
- if (table)
- table = proc_sys_lookup_table_one(table, name);
- return table;
-}
-static struct ctl_table *do_proc_sys_lookup(struct dentry *parent,
- struct qstr *name,
- struct ctl_table_header **ptr)
-{
- struct ctl_table_header *head;
- struct ctl_table *table = NULL;
+ table = table ? table->child : head->ctl_table;
- for (head = sysctl_head_next(NULL); head;
- head = sysctl_head_next(head)) {
- table = proc_sys_lookup_entry(parent, name, head->ctl_table);
- if (table)
- break;
+ p = find_in_table(table, name);
+ if (!p) {
+ for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
+ if (h->attached_to != table)
+ continue;
+ p = find_in_table(h->attached_by, name);
+ if (p)
+ break;
+ }
}
- *ptr = head;
- return table;
-}
-
-static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
- struct nameidata *nd)
-{
- struct ctl_table_header *head;
- struct inode *inode;
- struct dentry *err;
- struct ctl_table *table;
- err = ERR_PTR(-ENOENT);
- table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
- if (!table)
+ if (!p)
goto out;
err = ERR_PTR(-ENOMEM);
- inode = proc_sys_make_inode(dir, table);
+ inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
+ if (h)
+ sysctl_head_finish(h);
+
if (!inode)
goto out;
@@ -168,22 +129,14 @@ out:
static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
size_t count, loff_t *ppos, int write)
{
- struct dentry *dentry = filp->f_dentry;
- struct ctl_table_header *head;
- struct ctl_table *table;
+ struct inode *inode = filp->f_path.dentry->d_inode;
+ struct ctl_table_header *head = grab_header(inode);
+ struct ctl_table *table = PROC_I(inode)->sysctl_entry;
ssize_t error;
size_t res;
- table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
- /* Has the sysctl entry disappeared on us? */
- error = -ENOENT;
- if (!table)
- goto out;
-
- /* Has the sysctl entry been replaced by a directory? */
- error = -EISDIR;
- if (!table->proc_handler)
- goto out;
+ if (IS_ERR(head))
+ return PTR_ERR(head);
/*
* At this point we know that the sysctl was not unregistered
@@ -193,6 +146,11 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
goto out;
+ /* if that can happen at all, it should be -EINVAL, not -EISDIR */
+ error = -EINVAL;
+ if (!table->proc_handler)
+ goto out;
+
/* careful: calling conventions are nasty here */
res = count;
error = table->proc_handler(table, write, filp, buf, &res, ppos);
@@ -218,82 +176,86 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
static int proc_sys_fill_cache(struct file *filp, void *dirent,
- filldir_t filldir, struct ctl_table *table)
+ filldir_t filldir,
+ struct ctl_table_header *head,
+ struct ctl_table *table)
{
- struct ctl_table_header *head;
- struct ctl_table *child_table = NULL;
struct dentry *child, *dir = filp->f_path.dentry;
struct inode *inode;
struct qstr qname;
ino_t ino = 0;
unsigned type = DT_UNKNOWN;
- int ret;
qname.name = table->procname;
qname.len = strlen(table->procname);
qname.hash = full_name_hash(qname.name, qname.len);
- /* Suppress duplicates.
- * Only fill a directory entry if it is the value that
- * an ordinary lookup of that name returns. Hide all
- * others.
- *
- * If we ever cache this translation in the dcache
- * I should do a dcache lookup first. But for now
- * it is just simpler not to.
- */
- ret = 0;
- child_table = do_proc_sys_lookup(dir, &qname, &head);
- sysctl_head_finish(head);
- if (child_table != table)
- return 0;
-
child = d_lookup(dir, &qname);
if (!child) {
- struct dentry *new;
- new = d_alloc(dir, &qname);
- if (new) {
- inode = proc_sys_make_inode(dir->d_inode, table);
- if (!inode)
- child = ERR_PTR(-ENOMEM);
- else {
- new->d_op = &proc_sys_dentry_operations;
- d_add(new, inode);
+ child = d_alloc(dir, &qname);
+ if (child) {
+ inode = proc_sys_make_inode(dir->d_sb, head, table);
+ if (!inode) {
+ dput(child);
+ return -ENOMEM;
+ } else {
+ child->d_op = &proc_sys_dentry_operations;
+ d_add(child, inode);
}
- if (child)
- dput(new);
- else
- child = new;
+ } else {
+ return -ENOMEM;
}
}
- if (!child || IS_ERR(child) || !child->d_inode)
- goto end_instantiate;
inode = child->d_inode;
- if (inode) {
- ino = inode->i_ino;
- type = inode->i_mode >> 12;
- }
+ ino = inode->i_ino;
+ type = inode->i_mode >> 12;
dput(child);
-end_instantiate:
- if (!ino)
- ino= find_inode_number(dir, &qname);
- if (!ino)
- ino = 1;
- return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
+ return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
+}
+
+static int scan(struct ctl_table_header *head, ctl_table *table,
+ unsigned long *pos, struct file *file,
+ void *dirent, filldir_t filldir)
+{
+
+ for (; table->ctl_name || table->procname; table++, (*pos)++) {
+ int res;
+
+ /* Can't do anything without a proc name */
+ if (!table->procname)
+ continue;
+
+ if (*pos < file->f_pos)
+ continue;
+
+ res = proc_sys_fill_cache(file, dirent, filldir, head, table);
+ if (res)
+ return res;
+
+ file->f_pos = *pos + 1;
+ }
+ return 0;
}
static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
- struct dentry *dentry = filp->f_dentry;
+ struct dentry *dentry = filp->f_path.dentry;
struct inode *inode = dentry->d_inode;
- struct ctl_table_header *head = NULL;
- struct ctl_table *table;
+ struct ctl_table_header *head = grab_header(inode);
+ struct ctl_table *table = PROC_I(inode)->sysctl_entry;
+ struct ctl_table_header *h = NULL;
unsigned long pos;
- int ret;
+ int ret = -EINVAL;
+
+ if (IS_ERR(head))
+ return PTR_ERR(head);
- ret = -ENOTDIR;
- if (!S_ISDIR(inode->i_mode))
+ if (table && !table->child) {
+ WARN_ON(1);
goto out;
+ }
+
+ table = table ? table->child : head->ctl_table;
ret = 0;
/* Avoid a switch here: arm builds fail with missing __cmpdi2 */
@@ -311,30 +273,17 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
}
pos = 2;
- /* - Find each instance of the directory
- * - Read all entries in each instance
- * - Before returning an entry to user space lookup the entry
- * by name and if I find a different entry don't return
- * this one because it means it is a buried dup.
- * For sysctl this should only happen for directory entries.
- */
- for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) {
- table = proc_sys_lookup_table(dentry, head->ctl_table);
+ ret = scan(head, table, &pos, filp, dirent, filldir);
+ if (ret)
+ goto out;
- if (!table)
+ for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
+ if (h->attached_to != table)
continue;
-
- for (; table->ctl_name || table->procname; table++, pos++) {
- /* Can't do anything without a proc name */
- if (!table->procname)
- continue;
-
- if (pos < filp->f_pos)
- continue;
-
- if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0)
- goto out;
- filp->f_pos = pos + 1;
+ ret = scan(h, h->attached_by, &pos, filp, dirent, filldir);
+ if (ret) {
+ sysctl_head_finish(h);
+ break;
}
}
ret = 1;
@@ -343,53 +292,24 @@ out:
return ret;
}
-static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int proc_sys_permission(struct inode *inode, int mask)
{
/*
* sysctl entries that are not writeable,
* are _NOT_ writeable, capabilities or not.
*/
- struct ctl_table_header *head;
- struct ctl_table *table;
- struct dentry *dentry;
- int mode;
- int depth;
+ struct ctl_table_header *head = grab_header(inode);
+ struct ctl_table *table = PROC_I(inode)->sysctl_entry;
int error;
- head = NULL;
- depth = PROC_I(inode)->fd;
-
- /* First check the cached permissions, in case we don't have
- * enough information to lookup the sysctl table entry.
- */
- error = -EACCES;
- mode = inode->i_mode;
-
- if (current->euid == 0)
- mode >>= 6;
- else if (in_group_p(0))
- mode >>= 3;
-
- if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
- error = 0;
-
- /* If we can't get a sysctl table entry the permission
- * checks on the cached mode will have to be enough.
- */
- if (!nd || !depth)
- goto out;
+ if (IS_ERR(head))
+ return PTR_ERR(head);
- dentry = nd->path.dentry;
- table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
+ if (!table) /* global root - r-xr-xr-x */
+ error = mask & MAY_WRITE ? -EACCES : 0;
+ else /* Use the permissions on the sysctl table entry */
+ error = sysctl_perm(head->root, table, mask);
- /* If the entry does not exist deny permission */
- error = -EACCES;
- if (!table)
- goto out;
-
- /* Use the permissions on the sysctl table entry */
- error = sysctl_perm(head->root, table, mask);
-out:
sysctl_head_finish(head);
return error;
}
@@ -409,33 +329,70 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
return error;
}
-/* I'm lazy and don't distinguish between files and directories,
- * until access time.
- */
+static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+ struct inode *inode = dentry->d_inode;
+ struct ctl_table_header *head = grab_header(inode);
+ struct ctl_table *table = PROC_I(inode)->sysctl_entry;
+
+ if (IS_ERR(head))
+ return PTR_ERR(head);
+
+ generic_fillattr(inode, stat);
+ if (table)
+ stat->mode = (stat->mode & S_IFMT) | table->mode;
+
+ sysctl_head_finish(head);
+ return 0;
+}
+
static const struct file_operations proc_sys_file_operations = {
.read = proc_sys_read,
.write = proc_sys_write,
+};
+
+static const struct file_operations proc_sys_dir_file_operations = {
.readdir = proc_sys_readdir,
};
static const struct inode_operations proc_sys_inode_operations = {
+ .permission = proc_sys_permission,
+ .setattr = proc_sys_setattr,
+ .getattr = proc_sys_getattr,
+};
+
+static const struct inode_operations proc_sys_dir_operations = {
.lookup = proc_sys_lookup,
.permission = proc_sys_permission,
.setattr = proc_sys_setattr,
+ .getattr = proc_sys_getattr,
};
static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- struct ctl_table_header *head;
- struct ctl_table *table;
- table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
- proc_sys_refresh_inode(dentry->d_inode, table);
- sysctl_head_finish(head);
- return !!table;
+ return !PROC_I(dentry->d_inode)->sysctl->unregistering;
+}
+
+static int proc_sys_delete(struct dentry *dentry)
+{
+ return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
+}
+
+static int proc_sys_compare(struct dentry *dir, struct qstr *qstr,
+ struct qstr *name)
+{
+ struct dentry *dentry = container_of(qstr, struct dentry, d_name);
+ if (qstr->len != name->len)
+ return 1;
+ if (memcmp(qstr->name, name->name, name->len))
+ return 1;
+ return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl);
}
static struct dentry_operations proc_sys_dentry_operations = {
.d_revalidate = proc_sys_revalidate,
+ .d_delete = proc_sys_delete,
+ .d_compare = proc_sys_compare,
};
static struct proc_dir_entry *proc_sys_root;
@@ -443,8 +400,8 @@ static struct proc_dir_entry *proc_sys_root;
int proc_sys_init(void)
{
proc_sys_root = proc_mkdir("sys", NULL);
- proc_sys_root->proc_iops = &proc_sys_inode_operations;
- proc_sys_root->proc_fops = &proc_sys_file_operations;
+ proc_sys_root->proc_iops = &proc_sys_dir_operations;
+ proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
proc_sys_root->nlink = 0;
return 0;
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 164bd9f9ede..7546a918f79 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -636,7 +636,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
struct pagemapread pm;
int pagecount;
int ret = -ESRCH;
- struct mm_walk pagemap_walk;
+ struct mm_walk pagemap_walk = {};
unsigned long src;
unsigned long svpfn;
unsigned long start_vaddr;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index b31ab78052b..2aad1044b84 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -553,7 +553,7 @@ static void qnx4_destroy_inode(struct inode *inode)
kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
}
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
diff --git a/fs/quota.c b/fs/quota.c
index db1cc9f3c7a..7f4386ebc23 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -186,7 +186,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
void sync_dquots(struct super_block *sb, int type)
{
- int cnt, dirty;
+ int cnt;
if (sb) {
if (sb->s_qcop->quota_sync)
@@ -198,11 +198,17 @@ void sync_dquots(struct super_block *sb, int type)
restart:
list_for_each_entry(sb, &super_blocks, s_list) {
/* This test just improves performance so it needn't be reliable... */
- for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
- if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
- && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
- dirty = 1;
- if (!dirty)
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (type != -1 && type != cnt)
+ continue;
+ if (!sb_has_quota_enabled(sb, cnt))
+ continue;
+ if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
+ list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
+ continue;
+ break;
+ }
+ if (cnt == MAXQUOTAS)
continue;
sb->s_count++;
spin_unlock(&sb_lock);
diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index a6cf9269105..5ae15b13eeb 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -1,6 +1,7 @@
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/quota.h>
+#include <linux/quotaops.h>
#include <linux/dqblk_v1.h>
#include <linux/quotaio_v1.h>
#include <linux/kernel.h>
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 234ada90363..b53827dc02d 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -11,6 +11,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/quotaops.h>
#include <asm/byteorder.h>
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 192269698a8..5699171212a 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2435,7 +2435,7 @@ static int reiserfs_write_full_page(struct page *page,
if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
lock_buffer(bh);
} else {
- if (test_set_buffer_locked(bh)) {
+ if (!trylock_buffer(bh)) {
redirty_page_for_writepage(wbc, page);
continue;
}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index e396b2fa474..c21df71943a 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -34,15 +34,10 @@
** from within kupdate, it will ignore the immediate flag
*/
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
#include <linux/time.h>
#include <linux/semaphore.h>
-
#include <linux/vmalloc.h>
#include <linux/reiserfs_fs.h>
-
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
@@ -54,6 +49,9 @@
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
+#include <linux/uaccess.h>
+
+#include <asm/system.h>
/* gets a struct reiserfs_journal_list * from a list head */
#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
@@ -558,13 +556,13 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
static inline void lock_journal(struct super_block *p_s_sb)
{
PROC_INFO_INC(p_s_sb, journal.lock_journal);
- down(&SB_JOURNAL(p_s_sb)->j_lock);
+ mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex);
}
/* unlock the current transaction */
static inline void unlock_journal(struct super_block *p_s_sb)
{
- up(&SB_JOURNAL(p_s_sb)->j_lock);
+ mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex);
}
static inline void get_journal_list(struct reiserfs_journal_list *jl)
@@ -629,7 +627,7 @@ static int journal_list_still_alive(struct super_block *s,
static void release_buffer_page(struct buffer_head *bh)
{
struct page *page = bh->b_page;
- if (!page->mapping && !TestSetPageLocked(page)) {
+ if (!page->mapping && trylock_page(page)) {
page_cache_get(page);
put_bh(bh);
if (!page->mapping)
@@ -857,7 +855,7 @@ static int write_ordered_buffers(spinlock_t * lock,
jh = JH_ENTRY(list->next);
bh = jh->bh;
get_bh(bh);
- if (test_set_buffer_locked(bh)) {
+ if (!trylock_buffer(bh)) {
if (!buffer_dirty(bh)) {
list_move(&jh->list, &tmp);
goto loop_next;
@@ -1045,9 +1043,9 @@ static int flush_commit_list(struct super_block *s,
}
/* make sure nobody is trying to flush this one at the same time */
- down(&jl->j_commit_lock);
+ mutex_lock(&jl->j_commit_mutex);
if (!journal_list_still_alive(s, trans_id)) {
- up(&jl->j_commit_lock);
+ mutex_unlock(&jl->j_commit_mutex);
goto put_jl;
}
BUG_ON(jl->j_trans_id == 0);
@@ -1057,7 +1055,7 @@ static int flush_commit_list(struct super_block *s,
if (flushall) {
atomic_set(&(jl->j_older_commits_done), 1);
}
- up(&jl->j_commit_lock);
+ mutex_unlock(&jl->j_commit_mutex);
goto put_jl;
}
@@ -1181,7 +1179,7 @@ static int flush_commit_list(struct super_block *s,
if (flushall) {
atomic_set(&(jl->j_older_commits_done), 1);
}
- up(&jl->j_commit_lock);
+ mutex_unlock(&jl->j_commit_mutex);
put_jl:
put_journal_list(s, jl);
@@ -1411,8 +1409,8 @@ static int flush_journal_list(struct super_block *s,
/* if flushall == 0, the lock is already held */
if (flushall) {
- down(&journal->j_flush_sem);
- } else if (!down_trylock(&journal->j_flush_sem)) {
+ mutex_lock(&journal->j_flush_mutex);
+ } else if (mutex_trylock(&journal->j_flush_mutex)) {
BUG();
}
@@ -1642,7 +1640,7 @@ static int flush_journal_list(struct super_block *s,
jl->j_state = 0;
put_journal_list(s, jl);
if (flushall)
- up(&journal->j_flush_sem);
+ mutex_unlock(&journal->j_flush_mutex);
put_fs_excl();
return err;
}
@@ -1772,12 +1770,12 @@ static int kupdate_transactions(struct super_block *s,
struct reiserfs_journal *journal = SB_JOURNAL(s);
chunk.nr = 0;
- down(&journal->j_flush_sem);
+ mutex_lock(&journal->j_flush_mutex);
if (!journal_list_still_alive(s, orig_trans_id)) {
goto done;
}
- /* we've got j_flush_sem held, nobody is going to delete any
+ /* we've got j_flush_mutex held, nobody is going to delete any
* of these lists out from underneath us
*/
while ((num_trans && transactions_flushed < num_trans) ||
@@ -1812,7 +1810,7 @@ static int kupdate_transactions(struct super_block *s,
}
done:
- up(&journal->j_flush_sem);
+ mutex_unlock(&journal->j_flush_mutex);
return ret;
}
@@ -2556,7 +2554,7 @@ static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
INIT_LIST_HEAD(&jl->j_working_list);
INIT_LIST_HEAD(&jl->j_tail_bh_list);
INIT_LIST_HEAD(&jl->j_bh_list);
- sema_init(&jl->j_commit_lock, 1);
+ mutex_init(&jl->j_commit_mutex);
SB_JOURNAL(s)->j_num_lists++;
get_journal_list(jl);
return jl;
@@ -2837,8 +2835,8 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
journal->j_last = NULL;
journal->j_first = NULL;
init_waitqueue_head(&(journal->j_join_wait));
- sema_init(&journal->j_lock, 1);
- sema_init(&journal->j_flush_sem, 1);
+ mutex_init(&journal->j_mutex);
+ mutex_init(&journal->j_flush_mutex);
journal->j_trans_id = 10;
journal->j_mount_id = 10;
@@ -3873,7 +3871,7 @@ int reiserfs_prepare_for_journal(struct super_block *p_s_sb,
{
PROC_INFO_INC(p_s_sb, journal.prepare);
- if (test_set_buffer_locked(bh)) {
+ if (!trylock_buffer(bh)) {
if (!wait)
return 0;
lock_buffer(bh);
@@ -4030,7 +4028,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
* the new transaction is fully setup, and we've already flushed the
* ordered bh list
*/
- down(&jl->j_commit_lock);
+ mutex_lock(&jl->j_commit_mutex);
/* save the transaction id in case we need to commit it later */
commit_trans_id = jl->j_trans_id;
@@ -4196,7 +4194,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
lock_kernel();
}
BUG_ON(!list_empty(&jl->j_tail_bh_list));
- up(&jl->j_commit_lock);
+ mutex_unlock(&jl->j_commit_mutex);
/* honor the flush wishes from the caller, simple commits can
** be done outside the journal lock, they are done below
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1d40f2bd197..d318c7e663f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -22,11 +22,11 @@
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/exportfs.h>
+#include <linux/quotaops.h>
#include <linux/vfs.h>
#include <linux/mnt_namespace.h>
#include <linux/mount.h>
#include <linux/namei.h>
-#include <linux/quotaops.h>
struct file_system_type reiserfs_fs_type;
@@ -182,7 +182,7 @@ static int finish_unfinished(struct super_block *s)
int ret = reiserfs_quota_on_mount(s, i);
if (ret < 0)
reiserfs_warning(s,
- "reiserfs: cannot turn on journalled quota: error %d",
+ "reiserfs: cannot turn on journaled quota: error %d",
ret);
}
}
@@ -520,7 +520,7 @@ static void reiserfs_destroy_inode(struct inode *inode)
kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
}
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
{
struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
@@ -876,7 +876,9 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
mount options were selected. */
unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */
char **jdev_name,
- unsigned int *commit_max_age)
+ unsigned int *commit_max_age,
+ char **qf_names,
+ unsigned int *qfmt)
{
int c;
char *arg = NULL;
@@ -992,9 +994,11 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
if (c == 'u' || c == 'g') {
int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
- if (sb_any_quota_enabled(s)) {
+ if ((sb_any_quota_enabled(s) ||
+ sb_any_quota_suspended(s)) &&
+ (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) {
reiserfs_warning(s,
- "reiserfs_parse_options: cannot change journalled quota options when quota turned on.");
+ "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
return 0;
}
if (*arg) { /* Some filename specified? */
@@ -1011,46 +1015,54 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
"reiserfs_parse_options: quotafile must be on filesystem root.");
return 0;
}
- REISERFS_SB(s)->s_qf_names[qtype] =
+ qf_names[qtype] =
kmalloc(strlen(arg) + 1, GFP_KERNEL);
- if (!REISERFS_SB(s)->s_qf_names[qtype]) {
+ if (!qf_names[qtype]) {
reiserfs_warning(s,
"reiserfs_parse_options: not enough memory for storing quotafile name.");
return 0;
}
- strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg);
+ strcpy(qf_names[qtype], arg);
*mount_options |= 1 << REISERFS_QUOTA;
} else {
- kfree(REISERFS_SB(s)->s_qf_names[qtype]);
- REISERFS_SB(s)->s_qf_names[qtype] = NULL;
+ if (qf_names[qtype] !=
+ REISERFS_SB(s)->s_qf_names[qtype])
+ kfree(qf_names[qtype]);
+ qf_names[qtype] = NULL;
}
}
if (c == 'f') {
if (!strcmp(arg, "vfsold"))
- REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD;
+ *qfmt = QFMT_VFS_OLD;
else if (!strcmp(arg, "vfsv0"))
- REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0;
+ *qfmt = QFMT_VFS_V0;
else {
reiserfs_warning(s,
"reiserfs_parse_options: unknown quota format specified.");
return 0;
}
+ if ((sb_any_quota_enabled(s) ||
+ sb_any_quota_suspended(s)) &&
+ *qfmt != REISERFS_SB(s)->s_jquota_fmt) {
+ reiserfs_warning(s,
+ "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
+ return 0;
+ }
}
#else
if (c == 'u' || c == 'g' || c == 'f') {
reiserfs_warning(s,
- "reiserfs_parse_options: journalled quota options not supported.");
+ "reiserfs_parse_options: journaled quota options not supported.");
return 0;
}
#endif
}
#ifdef CONFIG_QUOTA
- if (!REISERFS_SB(s)->s_jquota_fmt
- && (REISERFS_SB(s)->s_qf_names[USRQUOTA]
- || REISERFS_SB(s)->s_qf_names[GRPQUOTA])) {
+ if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt
+ && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) {
reiserfs_warning(s,
- "reiserfs_parse_options: journalled quota format not specified.");
+ "reiserfs_parse_options: journaled quota format not specified.");
return 0;
}
/* This checking is not precise wrt the quota type but for our purposes it is sufficient */
@@ -1130,6 +1142,21 @@ static void handle_attrs(struct super_block *s)
}
}
+#ifdef CONFIG_QUOTA
+static void handle_quota_files(struct super_block *s, char **qf_names,
+ unsigned int *qfmt)
+{
+ int i;
+
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
+ kfree(REISERFS_SB(s)->s_qf_names[i]);
+ REISERFS_SB(s)->s_qf_names[i] = qf_names[i];
+ }
+ REISERFS_SB(s)->s_jquota_fmt = *qfmt;
+}
+#endif
+
static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
{
struct reiserfs_super_block *rs;
@@ -1141,23 +1168,30 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
struct reiserfs_journal *journal = SB_JOURNAL(s);
char *new_opts = kstrdup(arg, GFP_KERNEL);
int err;
+ char *qf_names[MAXQUOTAS];
+ unsigned int qfmt = 0;
#ifdef CONFIG_QUOTA
int i;
+
+ memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
#endif
rs = SB_DISK_SUPER_BLOCK(s);
if (!reiserfs_parse_options
- (s, arg, &mount_options, &blocks, NULL, &commit_max_age)) {
+ (s, arg, &mount_options, &blocks, NULL, &commit_max_age,
+ qf_names, &qfmt)) {
#ifdef CONFIG_QUOTA
- for (i = 0; i < MAXQUOTAS; i++) {
- kfree(REISERFS_SB(s)->s_qf_names[i]);
- REISERFS_SB(s)->s_qf_names[i] = NULL;
- }
+ for (i = 0; i < MAXQUOTAS; i++)
+ if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
+ kfree(qf_names[i]);
#endif
err = -EINVAL;
goto out_err;
}
+#ifdef CONFIG_QUOTA
+ handle_quota_files(s, qf_names, &qfmt);
+#endif
handle_attrs(s);
@@ -1570,6 +1604,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
char *jdev_name;
struct reiserfs_sb_info *sbi;
int errval = -EINVAL;
+ char *qf_names[MAXQUOTAS] = {};
+ unsigned int qfmt = 0;
save_mount_options(s, data);
@@ -1597,9 +1633,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
jdev_name = NULL;
if (reiserfs_parse_options
(s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
- &commit_max_age) == 0) {
+ &commit_max_age, qf_names, &qfmt) == 0) {
goto error;
}
+#ifdef CONFIG_QUOTA
+ handle_quota_files(s, qf_names, &qfmt);
+#endif
if (blocks) {
SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option "
@@ -1819,7 +1858,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
return (0);
- error:
+error:
if (jinit_done) { /* kill the commit thread, free journal ram */
journal_release_error(NULL, s);
}
@@ -1830,10 +1869,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
#ifdef CONFIG_QUOTA
{
int j;
- for (j = 0; j < MAXQUOTAS; j++) {
- kfree(sbi->s_qf_names[j]);
- sbi->s_qf_names[j] = NULL;
- }
+ for (j = 0; j < MAXQUOTAS; j++)
+ kfree(qf_names[j]);
}
#endif
kfree(sbi);
@@ -1980,7 +2017,7 @@ static int reiserfs_release_dquot(struct dquot *dquot)
static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
{
- /* Are we journalling quotas? */
+ /* Are we journaling quotas? */
if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
dquot_mark_dquot_dirty(dquot);
@@ -2026,6 +2063,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
int err;
struct nameidata nd;
struct inode *inode;
+ struct reiserfs_transaction_handle th;
if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
return -EINVAL;
@@ -2037,8 +2075,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
return err;
/* Quotafile not on the same filesystem? */
if (nd.path.mnt->mnt_sb != sb) {
- path_put(&nd.path);
- return -EXDEV;
+ err = -EXDEV;
+ goto out;
}
inode = nd.path.dentry->d_inode;
/* We must not pack tails for quota files on reiserfs for quota IO to work */
@@ -2048,24 +2086,37 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
reiserfs_warning(sb,
"reiserfs: Unpacking tail of quota file failed"
" (%d). Cannot turn on quotas.", err);
- path_put(&nd.path);
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
mark_inode_dirty(inode);
}
- /* Not journalling quota? No more tests needed... */
- if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] &&
- !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) {
- path_put(&nd.path);
- return vfs_quota_on(sb, type, format_id, path, 0);
- }
- /* Quotafile not of fs root? */
- if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
- reiserfs_warning(sb,
+ /* Journaling quota? */
+ if (REISERFS_SB(sb)->s_qf_names[type]) {
+ /* Quotafile not of fs root? */
+ if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
+ reiserfs_warning(sb,
"reiserfs: Quota file not on filesystem root. "
"Journalled quota will not work.");
+ }
+
+ /*
+ * When we journal data on quota file, we have to flush journal to see
+ * all updates to the file when we bypass pagecache...
+ */
+ if (reiserfs_file_data_log(inode)) {
+ /* Just start temporary transaction and finish it */
+ err = journal_begin(&th, sb, 1);
+ if (err)
+ goto out;
+ err = journal_end_sync(&th, sb, 1);
+ if (err)
+ goto out;
+ }
+ err = vfs_quota_on_path(sb, type, format_id, &nd.path);
+out:
path_put(&nd.path);
- return vfs_quota_on(sb, type, format_id, path, 0);
+ return err;
}
/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index d7c4935c103..bb3cb5b7cdb 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -1250,7 +1250,7 @@ static int reiserfs_check_acl(struct inode *inode, int mask)
return error;
}
-int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int reiserfs_permission(struct inode *inode, int mask)
{
/*
* We don't do permission checks on the internal objects.
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 5e90a95ad60..056008db137 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -6,8 +6,6 @@
#include <linux/reiserfs_xattr.h>
#include <asm/uaccess.h>
-#define XATTR_SECURITY_PREFIX "security."
-
static int
security_get(struct inode *inode, const char *name, void *buffer, size_t size)
{
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 024a938ca60..60abe2bb1f9 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -7,8 +7,6 @@
#include <linux/reiserfs_xattr.h>
#include <asm/uaccess.h>
-#define XATTR_TRUSTED_PREFIX "trusted."
-
static int
trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
{
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 073f39364b1..1384efcb938 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -10,8 +10,6 @@
# include <linux/reiserfs_acl.h>
#endif
-#define XATTR_USER_PREFIX "user."
-
static int
user_get(struct inode *inode, const char *name, void *buffer, size_t size)
{
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 3f13d491c7c..60d2f822e87 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -418,7 +418,8 @@ static int
romfs_readpage(struct file *file, struct page * page)
{
struct inode *inode = page->mapping->host;
- loff_t offset, avail, readlen;
+ loff_t offset, size;
+ unsigned long filled;
void *buf;
int result = -EIO;
@@ -430,21 +431,29 @@ romfs_readpage(struct file *file, struct page * page)
/* 32 bit warning -- but not for us :) */
offset = page_offset(page);
- if (offset < i_size_read(inode)) {
- avail = inode->i_size-offset;
- readlen = min_t(unsigned long, avail, PAGE_SIZE);
- if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) {
- if (readlen < PAGE_SIZE) {
- memset(buf + readlen,0,PAGE_SIZE-readlen);
- }
- SetPageUptodate(page);
- result = 0;
+ size = i_size_read(inode);
+ filled = 0;
+ result = 0;
+ if (offset < size) {
+ unsigned long readlen;
+
+ size -= offset;
+ readlen = size > PAGE_SIZE ? PAGE_SIZE : size;
+
+ filled = romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen);
+
+ if (filled != readlen) {
+ SetPageError(page);
+ filled = 0;
+ result = -EIO;
}
}
- if (result) {
- memset(buf, 0, PAGE_SIZE);
- SetPageError(page);
- }
+
+ if (filled < PAGE_SIZE)
+ memset(buf + filled, 0, PAGE_SIZE-filled);
+
+ if (!result)
+ SetPageUptodate(page);
flush_dcache_page(page);
unlock_page(page);
@@ -577,7 +586,7 @@ static void romfs_destroy_inode(struct inode *inode)
kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
}
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct romfs_inode_info *ei = foo;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3f54dbd6c49..5d54205e486 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -443,6 +443,20 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
return -1;
}
+int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits)
+{
+ size_t len = bitmap_scnprintf_len(nr_bits);
+
+ if (m->count + len < m->size) {
+ bitmap_scnprintf(m->buf + m->count, m->size - m->count,
+ bits, nr_bits);
+ m->count += len;
+ return 0;
+ }
+ m->count = m->size;
+ return -1;
+}
+
static void *single_start(struct seq_file *p, loff_t *pos)
{
return NULL + (*pos == 0);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 619725644c7..9c39bc7f843 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -205,11 +205,19 @@ static const struct file_operations signalfd_fops = {
.read = signalfd_read,
};
-asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
+asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask,
+ size_t sizemask, int flags)
{
sigset_t sigmask;
struct signalfd_ctx *ctx;
+ /* Check the SFD_* constants for consistency. */
+ BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC);
+ BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK);
+
+ if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK))
+ return -EINVAL;
+
if (sizemask != sizeof(sigset_t) ||
copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
return -EINVAL;
@@ -227,7 +235,8 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
* When we call this, the initialization must be complete, since
* anon_inode_getfd() will install the fd.
*/
- ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx);
+ ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
+ flags & (O_CLOEXEC | O_NONBLOCK));
if (ufd < 0)
kfree(ctx);
} else {
@@ -249,3 +258,9 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
return ufd;
}
+
+asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask,
+ size_t sizemask)
+{
+ return sys_signalfd4(ufd, user_mask, sizemask, 0);
+}
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
index 8182f0542a2..8c177eb7e34 100644
--- a/fs/smbfs/cache.c
+++ b/fs/smbfs/cache.c
@@ -13,7 +13,6 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/dirent.h>
#include <linux/smb_fs.h>
#include <linux/pagemap.h>
#include <linux/net.h>
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index 2294783320c..e4f8d51a555 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -408,7 +408,7 @@ smb_file_release(struct inode *inode, struct file * file)
* privileges, so we need our own check for this.
*/
static int
-smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
+smb_file_permission(struct inode *inode, int mask)
{
int mode = inode->i_mode;
int error = 0;
@@ -417,7 +417,7 @@ smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
/* Look at user permissions */
mode >>= 6;
- if ((mode & 7 & mask) != mask)
+ if (mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC))
error = -EACCES;
return error;
}
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 376ef3ee6ed..3528f40ffb0 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -67,7 +67,7 @@ static void smb_destroy_inode(struct inode *inode)
kmem_cache_free(smb_inode_cachep, SMB_I(inode));
}
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct smb_inode_info *ei = (struct smb_inode_info *) foo;
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index d517a27b7f4..ee536e8a649 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -16,7 +16,6 @@
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/dcache.h>
-#include <linux/dirent.h>
#include <linux/nls.h>
#include <linux/smp_lock.h>
#include <linux/net.h>
diff --git a/fs/splice.c b/fs/splice.c
index 399442179d8..1bbc6f4bb09 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -371,7 +371,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
* for an in-flight io page
*/
if (flags & SPLICE_F_NONBLOCK) {
- if (TestSetPageLocked(page)) {
+ if (!trylock_page(page)) {
error = -EAGAIN;
break;
}
@@ -772,7 +772,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
ssize_t ret;
int err;
- err = remove_suid(out->f_path.dentry);
+ err = file_remove_suid(out);
if (unlikely(err))
return err;
@@ -830,7 +830,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
ssize_t ret;
inode_double_lock(inode, pipe->inode);
- ret = remove_suid(out->f_path.dentry);
+ ret = file_remove_suid(out);
if (likely(!ret))
ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
inode_double_unlock(inode, pipe->inode);
@@ -1161,36 +1161,6 @@ static long do_splice(struct file *in, loff_t __user *off_in,
}
/*
- * Do a copy-from-user while holding the mmap_semaphore for reading, in a
- * manner safe from deadlocking with simultaneous mmap() (grabbing mmap_sem
- * for writing) and page faulting on the user memory pointed to by src.
- * This assumes that we will very rarely hit the partial != 0 path, or this
- * will not be a win.
- */
-static int copy_from_user_mmap_sem(void *dst, const void __user *src, size_t n)
-{
- int partial;
-
- if (!access_ok(VERIFY_READ, src, n))
- return -EFAULT;
-
- pagefault_disable();
- partial = __copy_from_user_inatomic(dst, src, n);
- pagefault_enable();
-
- /*
- * Didn't copy everything, drop the mmap_sem and do a faulting copy
- */
- if (unlikely(partial)) {
- up_read(&current->mm->mmap_sem);
- partial = copy_from_user(dst, src, n);
- down_read(&current->mm->mmap_sem);
- }
-
- return partial;
-}
-
-/*
* Map an iov into an array of pages and offset/length tupples. With the
* partial_page structure, we can map several non-contiguous ranges into
* our ones pages[] map instead of splitting that operation into pieces.
@@ -1203,8 +1173,6 @@ static int get_iovec_page_array(const struct iovec __user *iov,
{
int buffers = 0, error = 0;
- down_read(&current->mm->mmap_sem);
-
while (nr_vecs) {
unsigned long off, npages;
struct iovec entry;
@@ -1213,7 +1181,7 @@ static int get_iovec_page_array(const struct iovec __user *iov,
int i;
error = -EFAULT;
- if (copy_from_user_mmap_sem(&entry, iov, sizeof(entry)))
+ if (copy_from_user(&entry, iov, sizeof(entry)))
break;
base = entry.iov_base;
@@ -1247,9 +1215,8 @@ static int get_iovec_page_array(const struct iovec __user *iov,
if (npages > PIPE_BUFFERS - buffers)
npages = PIPE_BUFFERS - buffers;
- error = get_user_pages(current, current->mm,
- (unsigned long) base, npages, 0, 0,
- &pages[buffers], NULL);
+ error = get_user_pages_fast((unsigned long)base, npages,
+ 0, &pages[buffers]);
if (unlikely(error <= 0))
break;
@@ -1288,8 +1255,6 @@ static int get_iovec_page_array(const struct iovec __user *iov,
iov++;
}
- up_read(&current->mm->mmap_sem);
-
if (buffers)
return buffers;
diff --git a/fs/stat.c b/fs/stat.c
index 9cf41f719d5..7c46fbeb8b7 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -57,13 +57,13 @@ EXPORT_SYMBOL(vfs_getattr);
int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat)
{
- struct nameidata nd;
+ struct path path;
int error;
- error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd);
+ error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path);
if (!error) {
- error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat);
- path_put(&nd.path);
+ error = vfs_getattr(path.mnt, path.dentry, stat);
+ path_put(&path);
}
return error;
}
@@ -77,13 +77,13 @@ EXPORT_SYMBOL(vfs_stat);
int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat)
{
- struct nameidata nd;
+ struct path path;
int error;
- error = __user_walk_fd(dfd, name, 0, &nd);
+ error = user_path_at(dfd, name, 0, &path);
if (!error) {
- error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat);
- path_put(&nd.path);
+ error = vfs_getattr(path.mnt, path.dentry, stat);
+ path_put(&path);
}
return error;
}
@@ -291,29 +291,29 @@ asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf)
return error;
}
-asmlinkage long sys_readlinkat(int dfd, const char __user *path,
+asmlinkage long sys_readlinkat(int dfd, const char __user *pathname,
char __user *buf, int bufsiz)
{
- struct nameidata nd;
+ struct path path;
int error;
if (bufsiz <= 0)
return -EINVAL;
- error = __user_walk_fd(dfd, path, 0, &nd);
+ error = user_path_at(dfd, pathname, 0, &path);
if (!error) {
- struct inode *inode = nd.path.dentry->d_inode;
+ struct inode *inode = path.dentry->d_inode;
error = -EINVAL;
if (inode->i_op && inode->i_op->readlink) {
- error = security_inode_readlink(nd.path.dentry);
+ error = security_inode_readlink(path.dentry);
if (!error) {
- touch_atime(nd.path.mnt, nd.path.dentry);
- error = inode->i_op->readlink(nd.path.dentry,
+ touch_atime(path.mnt, path.dentry);
+ error = inode->i_op->readlink(path.dentry,
buf, bufsiz);
}
}
- path_put(&nd.path);
+ path_put(&path);
}
return error;
}
diff --git a/fs/super.c b/fs/super.c
index 453877c5697..e931ae9511f 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -70,6 +70,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
INIT_LIST_HEAD(&s->s_instances);
INIT_HLIST_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
+ INIT_LIST_HEAD(&s->s_dentry_lru);
init_rwsem(&s->s_umount);
mutex_init(&s->s_lock);
lockdep_set_class(&s->s_umount, &type->s_umount_key);
diff --git a/fs/sync.c b/fs/sync.c
index 228e17b5e9e..2967562d416 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -139,7 +139,8 @@ asmlinkage long sys_fdatasync(unsigned int fd)
* before performing the write.
*
* SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
- * range which are not presently under writeback.
+ * range which are not presently under writeback. Note that this may block for
+ * significant periods due to exhaustion of disk request structures.
*
* SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
* after performing the write.
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 8c0e4b92574..aedaeba82ae 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -398,7 +398,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
}
/**
- * sysfs_add_one - add sysfs_dirent to parent
+ * __sysfs_add_one - add sysfs_dirent to parent without warning
* @acxt: addrm context to use
* @sd: sysfs_dirent to be added
*
@@ -417,7 +417,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
* 0 on success, -EEXIST if entry with the given name already
* exists.
*/
-int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
+int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
if (sysfs_find_dirent(acxt->parent_sd, sd->s_name))
return -EEXIST;
@@ -435,6 +435,36 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
}
/**
+ * sysfs_add_one - add sysfs_dirent to parent
+ * @acxt: addrm context to use
+ * @sd: sysfs_dirent to be added
+ *
+ * Get @acxt->parent_sd and set sd->s_parent to it and increment
+ * nlink of parent inode if @sd is a directory and link into the
+ * children list of the parent.
+ *
+ * This function should be called between calls to
+ * sysfs_addrm_start() and sysfs_addrm_finish() and should be
+ * passed the same @acxt as passed to sysfs_addrm_start().
+ *
+ * LOCKING:
+ * Determined by sysfs_addrm_start().
+ *
+ * RETURNS:
+ * 0 on success, -EEXIST if entry with the given name already
+ * exists.
+ */
+int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
+{
+ int ret;
+
+ ret = __sysfs_add_one(acxt, sd);
+ WARN(ret == -EEXIST, KERN_WARNING "sysfs: duplicate filename '%s' "
+ "can not be created\n", sd->s_name);
+ return ret;
+}
+
+/**
* sysfs_remove_one - remove sysfs_dirent from parent
* @acxt: addrm context to use
* @sd: sysfs_dirent to be removed
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index e7735f643cd..c9e4e5091da 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -14,6 +14,7 @@
#include <linux/kobject.h>
#include <linux/kallsyms.h>
#include <linux/slab.h>
+#include <linux/fsnotify.h>
#include <linux/namei.h>
#include <linux/poll.h>
#include <linux/list.h>
@@ -336,9 +337,8 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
if (kobj->ktype && kobj->ktype->sysfs_ops)
ops = kobj->ktype->sysfs_ops;
else {
- printk(KERN_ERR "missing sysfs attribute operations for "
+ WARN(1, KERN_ERR "missing sysfs attribute operations for "
"kobject: %s\n", kobject_name(kobj));
- WARN_ON(1);
goto err_out;
}
@@ -585,9 +585,11 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- rc = notify_change(victim, &newattrs);
+ newattrs.ia_ctime = current_fs_time(inode->i_sb);
+ rc = sysfs_setattr(victim, &newattrs);
if (rc == 0) {
+ fsnotify_change(victim, newattrs.ia_valid);
mutex_lock(&sysfs_mutex);
victim_sd->s_mode = newattrs.ia_mode;
mutex_unlock(&sysfs_mutex);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index eeba38417b1..fe611949a7f 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -134,9 +134,8 @@ void sysfs_remove_group(struct kobject * kobj,
if (grp->name) {
sd = sysfs_get_dirent(dir_sd, grp->name);
if (!sd) {
- printk(KERN_WARNING "sysfs group %p not found for "
+ WARN(!sd, KERN_WARNING "sysfs group %p not found for "
"kobject '%s'\n", grp, kobject_name(kobj));
- WARN_ON(!sd);
return;
}
} else
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 817f5966edc..a3ba217fbe7 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -19,13 +19,8 @@
#include "sysfs.h"
-/**
- * sysfs_create_link - create symlink between two objects.
- * @kobj: object whose directory we're creating the link in.
- * @target: object we're pointing to.
- * @name: name of the symlink.
- */
-int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
+static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
+ const char *name, int warn)
{
struct sysfs_dirent *parent_sd = NULL;
struct sysfs_dirent *target_sd = NULL;
@@ -65,7 +60,10 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
target_sd = NULL; /* reference is now owned by the symlink */
sysfs_addrm_start(&acxt, parent_sd);
- error = sysfs_add_one(&acxt, sd);
+ if (warn)
+ error = sysfs_add_one(&acxt, sd);
+ else
+ error = __sysfs_add_one(&acxt, sd);
sysfs_addrm_finish(&acxt);
if (error)
@@ -80,6 +78,33 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
}
/**
+ * sysfs_create_link - create symlink between two objects.
+ * @kobj: object whose directory we're creating the link in.
+ * @target: object we're pointing to.
+ * @name: name of the symlink.
+ */
+int sysfs_create_link(struct kobject *kobj, struct kobject *target,
+ const char *name)
+{
+ return sysfs_do_create_link(kobj, target, name, 1);
+}
+
+/**
+ * sysfs_create_link_nowarn - create symlink between two objects.
+ * @kobj: object whose directory we're creating the link in.
+ * @target: object we're pointing to.
+ * @name: name of the symlink.
+ *
+ * This function does the same as sysf_create_link(), but it
+ * doesn't warn if the link already exists.
+ */
+int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target,
+ const char *name)
+{
+ return sysfs_do_create_link(kobj, target, name, 0);
+}
+
+/**
* sysfs_remove_link - remove symlink in object's directory.
* @kobj: object we're acting for.
* @name: name of the symlink to remove.
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ce4e15f8aae..a5db496f71c 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -107,6 +107,7 @@ struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
void sysfs_put_active_two(struct sysfs_dirent *sd);
void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
struct sysfs_dirent *parent_sd);
+int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index c5d60de0658..df0d435baa4 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -326,7 +326,7 @@ static void sysv_destroy_inode(struct inode *inode)
kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
}
-static void init_once(struct kmem_cache *cachep, void *p)
+static void init_once(void *p)
{
struct sysv_inode_info *si = (struct sysv_inode_info *)p;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index d87d354ec42..c502c60e4f5 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -184,7 +184,11 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
int ufd;
struct timerfd_ctx *ctx;
- if (flags)
+ /* Check the TFD_* constants for consistency. */
+ BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
+ BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);
+
+ if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK))
return -EINVAL;
if (clockid != CLOCK_MONOTONIC &&
clockid != CLOCK_REALTIME)
@@ -198,7 +202,8 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
ctx->clockid = clockid;
hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
- ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx);
+ ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
+ flags & (O_CLOEXEC | O_NONBLOCK));
if (ufd < 0)
kfree(ctx);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 005a3b854d9..8565e586e53 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -53,6 +53,7 @@
#include "ubifs.h"
#include <linux/mount.h>
+#include <linux/namei.h>
static int read_block(struct inode *inode, void *addr, unsigned int block,
struct ubifs_data_node *dn)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 00eb9c68ad0..ca1e2d4e03c 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1841,7 +1841,7 @@ static struct file_system_type ubifs_fs_type = {
/*
* Inode slab cache constructor.
*/
-static void inode_slab_ctor(struct kmem_cache *cachep, void *obj)
+static void inode_slab_ctor(void *obj)
{
struct ubifs_inode *ui = obj;
inode_init_once(&ui->vfs_inode);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 44cc702f96c..5698bbf83bb 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -148,7 +148,7 @@ static void udf_destroy_inode(struct inode *inode)
kmem_cache_free(udf_inode_cachep, UDF_I(inode));
}
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct udf_inode_info *ei = (struct udf_inode_info *)foo;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 85b22b5977f..3141969b456 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -76,6 +76,7 @@
#include <linux/errno.h>
#include <linux/fs.h>
+#include <linux/quotaops.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/stat.h>
@@ -1301,7 +1302,7 @@ static void ufs_destroy_inode(struct inode *inode)
kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
}
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
{
struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
diff --git a/fs/utimes.c b/fs/utimes.c
index b6b664e7145..6929e3e91d0 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -48,66 +48,22 @@ static bool nsec_valid(long nsec)
return nsec >= 0 && nsec <= 999999999;
}
-/* If times==NULL, set access and modification to current time,
- * must be owner or have write permission.
- * Else, update from *times, must be owner or super user.
- */
-long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags)
+static int utimes_common(struct path *path, struct timespec *times)
{
int error;
- struct nameidata nd;
- struct dentry *dentry;
- struct inode *inode;
struct iattr newattrs;
- struct file *f = NULL;
- struct vfsmount *mnt;
-
- error = -EINVAL;
- if (times && (!nsec_valid(times[0].tv_nsec) ||
- !nsec_valid(times[1].tv_nsec))) {
- goto out;
- }
-
- if (flags & ~AT_SYMLINK_NOFOLLOW)
- goto out;
-
- if (filename == NULL && dfd != AT_FDCWD) {
- error = -EINVAL;
- if (flags & AT_SYMLINK_NOFOLLOW)
- goto out;
+ struct inode *inode = path->dentry->d_inode;
- error = -EBADF;
- f = fget(dfd);
- if (!f)
- goto out;
- dentry = f->f_path.dentry;
- mnt = f->f_path.mnt;
- } else {
- error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd);
- if (error)
- goto out;
-
- dentry = nd.path.dentry;
- mnt = nd.path.mnt;
- }
-
- inode = dentry->d_inode;
-
- error = mnt_want_write(mnt);
+ error = mnt_want_write(path->mnt);
if (error)
- goto dput_and_out;
+ goto out;
if (times && times[0].tv_nsec == UTIME_NOW &&
times[1].tv_nsec == UTIME_NOW)
times = NULL;
- /* In most cases, the checks are done in inode_change_ok() */
newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
if (times) {
- error = -EPERM;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- goto mnt_drop_write_and_out;
-
if (times[0].tv_nsec == UTIME_OMIT)
newattrs.ia_valid &= ~ATTR_ATIME;
else if (times[0].tv_nsec != UTIME_NOW) {
@@ -123,21 +79,13 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
newattrs.ia_mtime.tv_nsec = times[1].tv_nsec;
newattrs.ia_valid |= ATTR_MTIME_SET;
}
-
/*
- * For the UTIME_OMIT/UTIME_NOW and UTIME_NOW/UTIME_OMIT
- * cases, we need to make an extra check that is not done by
- * inode_change_ok().
+ * Tell inode_change_ok(), that this is an explicit time
+ * update, even if neither ATTR_ATIME_SET nor ATTR_MTIME_SET
+ * were used.
*/
- if (((times[0].tv_nsec == UTIME_NOW &&
- times[1].tv_nsec == UTIME_OMIT)
- ||
- (times[0].tv_nsec == UTIME_OMIT &&
- times[1].tv_nsec == UTIME_NOW))
- && !is_owner_or_cap(inode))
- goto mnt_drop_write_and_out;
+ newattrs.ia_valid |= ATTR_TIMES_SET;
} else {
-
/*
* If times is NULL (or both times are UTIME_NOW),
* then we need to check permissions, because
@@ -148,21 +96,76 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
goto mnt_drop_write_and_out;
if (!is_owner_or_cap(inode)) {
- error = permission(inode, MAY_WRITE, NULL);
+ error = inode_permission(inode, MAY_WRITE);
if (error)
goto mnt_drop_write_and_out;
}
}
mutex_lock(&inode->i_mutex);
- error = notify_change(dentry, &newattrs);
+ error = notify_change(path->dentry, &newattrs);
mutex_unlock(&inode->i_mutex);
+
mnt_drop_write_and_out:
- mnt_drop_write(mnt);
-dput_and_out:
- if (f)
- fput(f);
- else
- path_put(&nd.path);
+ mnt_drop_write(path->mnt);
+out:
+ return error;
+}
+
+/*
+ * do_utimes - change times on filename or file descriptor
+ * @dfd: open file descriptor, -1 or AT_FDCWD
+ * @filename: path name or NULL
+ * @times: new times or NULL
+ * @flags: zero or more flags (only AT_SYMLINK_NOFOLLOW for the moment)
+ *
+ * If filename is NULL and dfd refers to an open file, then operate on
+ * the file. Otherwise look up filename, possibly using dfd as a
+ * starting point.
+ *
+ * If times==NULL, set access and modification to current time,
+ * must be owner or have write permission.
+ * Else, update from *times, must be owner or super user.
+ */
+long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags)
+{
+ int error = -EINVAL;
+
+ if (times && (!nsec_valid(times[0].tv_nsec) ||
+ !nsec_valid(times[1].tv_nsec))) {
+ goto out;
+ }
+
+ if (flags & ~AT_SYMLINK_NOFOLLOW)
+ goto out;
+
+ if (filename == NULL && dfd != AT_FDCWD) {
+ struct file *file;
+
+ if (flags & AT_SYMLINK_NOFOLLOW)
+ goto out;
+
+ file = fget(dfd);
+ error = -EBADF;
+ if (!file)
+ goto out;
+
+ error = utimes_common(&file->f_path, times);
+ fput(file);
+ } else {
+ struct path path;
+ int lookup_flags = 0;
+
+ if (!(flags & AT_SYMLINK_NOFOLLOW))
+ lookup_flags |= LOOKUP_FOLLOW;
+
+ error = user_path_at(dfd, filename, lookup_flags, &path);
+ if (error)
+ goto out;
+
+ error = utimes_common(&path, times);
+ path_put(&path);
+ }
+
out:
return error;
}
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index b546ba69be8..155c10b4adb 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -621,7 +621,7 @@ shortname:
memcpy(de->name, msdos_name, MSDOS_NAME);
de->attr = is_dir ? ATTR_DIR : ATTR_ARCH;
de->lcase = lcase;
- fat_date_unix2dos(ts->tv_sec, &time, &date);
+ fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
de->time = de->ctime = time;
de->date = de->cdate = de->adate = date;
de->ctime_cs = 0;
diff --git a/fs/xattr.c b/fs/xattr.c
index 4706a8b1f49..468377e6653 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -63,7 +63,7 @@ xattr_permission(struct inode *inode, const char *name, int mask)
return -EPERM;
}
- return permission(inode, mask, NULL);
+ return inode_permission(inode, mask);
}
int
@@ -252,40 +252,40 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
}
asmlinkage long
-sys_setxattr(const char __user *path, const char __user *name,
+sys_setxattr(const char __user *pathname, const char __user *name,
const void __user *value, size_t size, int flags)
{
- struct nameidata nd;
+ struct path path;
int error;
- error = user_path_walk(path, &nd);
+ error = user_path(pathname, &path);
if (error)
return error;
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(path.mnt);
if (!error) {
- error = setxattr(nd.path.dentry, name, value, size, flags);
- mnt_drop_write(nd.path.mnt);
+ error = setxattr(path.dentry, name, value, size, flags);
+ mnt_drop_write(path.mnt);
}
- path_put(&nd.path);
+ path_put(&path);
return error;
}
asmlinkage long
-sys_lsetxattr(const char __user *path, const char __user *name,
+sys_lsetxattr(const char __user *pathname, const char __user *name,
const void __user *value, size_t size, int flags)
{
- struct nameidata nd;
+ struct path path;
int error;
- error = user_path_walk_link(path, &nd);
+ error = user_lpath(pathname, &path);
if (error)
return error;
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(path.mnt);
if (!error) {
- error = setxattr(nd.path.dentry, name, value, size, flags);
- mnt_drop_write(nd.path.mnt);
+ error = setxattr(path.dentry, name, value, size, flags);
+ mnt_drop_write(path.mnt);
}
- path_put(&nd.path);
+ path_put(&path);
return error;
}
@@ -350,32 +350,32 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
}
asmlinkage ssize_t
-sys_getxattr(const char __user *path, const char __user *name,
+sys_getxattr(const char __user *pathname, const char __user *name,
void __user *value, size_t size)
{
- struct nameidata nd;
+ struct path path;
ssize_t error;
- error = user_path_walk(path, &nd);
+ error = user_path(pathname, &path);
if (error)
return error;
- error = getxattr(nd.path.dentry, name, value, size);
- path_put(&nd.path);
+ error = getxattr(path.dentry, name, value, size);
+ path_put(&path);
return error;
}
asmlinkage ssize_t
-sys_lgetxattr(const char __user *path, const char __user *name, void __user *value,
+sys_lgetxattr(const char __user *pathname, const char __user *name, void __user *value,
size_t size)
{
- struct nameidata nd;
+ struct path path;
ssize_t error;
- error = user_path_walk_link(path, &nd);
+ error = user_lpath(pathname, &path);
if (error)
return error;
- error = getxattr(nd.path.dentry, name, value, size);
- path_put(&nd.path);
+ error = getxattr(path.dentry, name, value, size);
+ path_put(&path);
return error;
}
@@ -425,30 +425,30 @@ listxattr(struct dentry *d, char __user *list, size_t size)
}
asmlinkage ssize_t
-sys_listxattr(const char __user *path, char __user *list, size_t size)
+sys_listxattr(const char __user *pathname, char __user *list, size_t size)
{
- struct nameidata nd;
+ struct path path;
ssize_t error;
- error = user_path_walk(path, &nd);
+ error = user_path(pathname, &path);
if (error)
return error;
- error = listxattr(nd.path.dentry, list, size);
- path_put(&nd.path);
+ error = listxattr(path.dentry, list, size);
+ path_put(&path);
return error;
}
asmlinkage ssize_t
-sys_llistxattr(const char __user *path, char __user *list, size_t size)
+sys_llistxattr(const char __user *pathname, char __user *list, size_t size)
{
- struct nameidata nd;
+ struct path path;
ssize_t error;
- error = user_path_walk_link(path, &nd);
+ error = user_lpath(pathname, &path);
if (error)
return error;
- error = listxattr(nd.path.dentry, list, size);
- path_put(&nd.path);
+ error = listxattr(path.dentry, list, size);
+ path_put(&path);
return error;
}
@@ -486,38 +486,38 @@ removexattr(struct dentry *d, const char __user *name)
}
asmlinkage long
-sys_removexattr(const char __user *path, const char __user *name)
+sys_removexattr(const char __user *pathname, const char __user *name)
{
- struct nameidata nd;
+ struct path path;
int error;
- error = user_path_walk(path, &nd);
+ error = user_path(pathname, &path);
if (error)
return error;
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(path.mnt);
if (!error) {
- error = removexattr(nd.path.dentry, name);
- mnt_drop_write(nd.path.mnt);
+ error = removexattr(path.dentry, name);
+ mnt_drop_write(path.mnt);
}
- path_put(&nd.path);
+ path_put(&path);
return error;
}
asmlinkage long
-sys_lremovexattr(const char __user *path, const char __user *name)
+sys_lremovexattr(const char __user *pathname, const char __user *name)
{
- struct nameidata nd;
+ struct path path;
int error;
- error = user_path_walk_link(path, &nd);
+ error = user_lpath(pathname, &path);
if (error)
return error;
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(path.mnt);
if (!error) {
- error = removexattr(nd.path.dentry, name);
- mnt_drop_write(nd.path.mnt);
+ error = removexattr(path.dentry, name);
+ mnt_drop_write(path.mnt);
}
- path_put(&nd.path);
+ path_put(&path);
return error;
}
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 36ec614e699..737c9a42536 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -106,7 +106,8 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
xfs_iops.o \
xfs_lrw.o \
xfs_super.o \
- xfs_vnode.o)
+ xfs_vnode.o \
+ xfs_xattr.o)
# Objects in support/
xfs-y += $(addprefix support/, \
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 9b1bb17a050..1cd3b55ee3d 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -90,7 +90,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize,
}
void
-kmem_free(void *ptr, size_t size)
+kmem_free(const void *ptr)
{
if (!is_vmalloc_addr(ptr)) {
kfree(ptr);
@@ -100,7 +100,7 @@ kmem_free(void *ptr, size_t size)
}
void *
-kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
+kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
unsigned int __nocast flags)
{
void *new;
@@ -110,7 +110,7 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
if (new)
memcpy(new, ptr,
((oldsize < newsize) ? oldsize : newsize));
- kmem_free(ptr, oldsize);
+ kmem_free(ptr);
}
return new;
}
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 5e956490297..af6843c7ee4 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -57,8 +57,8 @@ kmem_flags_convert(unsigned int __nocast flags)
extern void *kmem_alloc(size_t, unsigned int __nocast);
extern void *kmem_zalloc(size_t, unsigned int __nocast);
extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast);
-extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast);
-extern void kmem_free(void *, size_t);
+extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
+extern void kmem_free(const void *);
/*
* Zone interfaces
@@ -79,7 +79,7 @@ kmem_zone_init(int size, char *zone_name)
static inline kmem_zone_t *
kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
- void (*construct)(kmem_zone_t *, void *))
+ void (*construct)(void *))
{
return kmem_cache_create(zone_name, size, 0, flags, construct);
}
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a55c3b26d84..fa47e43b8b4 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -409,7 +409,6 @@ xfs_start_buffer_writeback(
STATIC void
xfs_start_page_writeback(
struct page *page,
- struct writeback_control *wbc,
int clear_dirty,
int buffers)
{
@@ -676,7 +675,7 @@ xfs_probe_cluster(
} else
pg_offset = PAGE_CACHE_SIZE;
- if (page->index == tindex && !TestSetPageLocked(page)) {
+ if (page->index == tindex && trylock_page(page)) {
pg_len = xfs_probe_page(page, pg_offset, mapped);
unlock_page(page);
}
@@ -760,7 +759,7 @@ xfs_convert_page(
if (page->index != tindex)
goto fail;
- if (TestSetPageLocked(page))
+ if (!trylock_page(page))
goto fail;
if (PageWriteback(page))
goto fail_unlock_page;
@@ -858,7 +857,7 @@ xfs_convert_page(
done = 1;
}
}
- xfs_start_page_writeback(page, wbc, !page_dirty, count);
+ xfs_start_page_writeback(page, !page_dirty, count);
}
return done;
@@ -1105,7 +1104,7 @@ xfs_page_state_convert(
* that we are writing into for the first time.
*/
type = IOMAP_NEW;
- if (!test_and_set_bit(BH_Lock, &bh->b_state)) {
+ if (trylock_buffer(bh)) {
ASSERT(buffer_mapped(bh));
if (iomap_valid)
all_bh = 1;
@@ -1130,7 +1129,7 @@ xfs_page_state_convert(
SetPageUptodate(page);
if (startio)
- xfs_start_page_writeback(page, wbc, 1, count);
+ xfs_start_page_writeback(page, 1, count);
if (ioend && iomap_valid) {
offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 98e0e86093b..9cc8f021309 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -310,8 +310,7 @@ _xfs_buf_free_pages(
xfs_buf_t *bp)
{
if (bp->b_pages != bp->b_page_array) {
- kmem_free(bp->b_pages,
- bp->b_page_count * sizeof(struct page *));
+ kmem_free(bp->b_pages);
}
}
@@ -1398,7 +1397,7 @@ STATIC void
xfs_free_bufhash(
xfs_buftarg_t *btp)
{
- kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t));
+ kmem_free(btp->bt_hash);
btp->bt_hash = NULL;
}
@@ -1428,13 +1427,10 @@ xfs_unregister_buftarg(
void
xfs_free_buftarg(
- xfs_buftarg_t *btp,
- int external)
+ xfs_buftarg_t *btp)
{
xfs_flush_buftarg(btp, 1);
xfs_blkdev_issue_flush(btp);
- if (external)
- xfs_blkdev_put(btp->bt_bdev);
xfs_free_bufhash(btp);
iput(btp->bt_mapping->host);
@@ -1444,7 +1440,7 @@ xfs_free_buftarg(
xfs_unregister_buftarg(btp);
kthread_stop(btp->bt_task);
- kmem_free(btp, sizeof(*btp));
+ kmem_free(btp);
}
STATIC int
@@ -1575,7 +1571,7 @@ xfs_alloc_buftarg(
return btp;
error:
- kmem_free(btp, sizeof(*btp));
+ kmem_free(btp);
return NULL;
}
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index f948ec7ba9a..29d1d4adc07 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -429,7 +429,7 @@ static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp)
* Handling of buftargs.
*/
extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
-extern void xfs_free_buftarg(xfs_buftarg_t *, int);
+extern void xfs_free_buftarg(xfs_buftarg_t *);
extern void xfs_wait_buftarg(xfs_buftarg_t *);
extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index c672b3238b1..987fe84f7b1 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -215,7 +215,7 @@ xfs_fs_get_parent(
struct xfs_inode *cip;
struct dentry *parent;
- error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip);
+ error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
if (unlikely(error))
return ERR_PTR(-error);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index a42ba9d7115..acb978d9d08 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -48,6 +48,8 @@
#include "xfs_dfrag.h"
#include "xfs_fsops.h"
#include "xfs_vnodeops.h"
+#include "xfs_quota.h"
+#include "xfs_inode_item.h"
#include <linux/capability.h>
#include <linux/dcache.h>
@@ -84,17 +86,15 @@ xfs_find_handle(
switch (cmd) {
case XFS_IOC_PATH_TO_FSHANDLE:
case XFS_IOC_PATH_TO_HANDLE: {
- struct nameidata nd;
- int error;
-
- error = user_path_walk_link((const char __user *)hreq.path, &nd);
+ struct path path;
+ int error = user_lpath((const char __user *)hreq.path, &path);
if (error)
return error;
- ASSERT(nd.path.dentry);
- ASSERT(nd.path.dentry->d_inode);
- inode = igrab(nd.path.dentry->d_inode);
- path_put(&nd.path);
+ ASSERT(path.dentry);
+ ASSERT(path.dentry->d_inode);
+ inode = igrab(path.dentry->d_inode);
+ path_put(&path);
break;
}
@@ -470,6 +470,12 @@ xfs_attrlist_by_handle(
if (al_hreq.buflen > XATTR_LIST_MAX)
return -XFS_ERROR(EINVAL);
+ /*
+ * Reject flags, only allow namespaces.
+ */
+ if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
+ return -XFS_ERROR(EINVAL);
+
error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, &inode);
if (error)
goto out;
@@ -589,7 +595,7 @@ xfs_attrmulti_by_handle(
goto out;
error = E2BIG;
- size = am_hreq.opcount * sizeof(attr_multiop_t);
+ size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
if (!size || size > 16 * PAGE_SIZE)
goto out_vn_rele;
@@ -682,9 +688,9 @@ xfs_ioc_space(
return -XFS_ERROR(EFAULT);
if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
- attr_flags |= ATTR_NONBLOCK;
+ attr_flags |= XFS_ATTR_NONBLOCK;
if (ioflags & IO_INVIS)
- attr_flags |= ATTR_DMI;
+ attr_flags |= XFS_ATTR_DMI;
error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos,
NULL, attr_flags);
@@ -875,6 +881,322 @@ xfs_ioc_fsgetxattr(
return 0;
}
+STATIC void
+xfs_set_diflags(
+ struct xfs_inode *ip,
+ unsigned int xflags)
+{
+ unsigned int di_flags;
+
+ /* can't set PREALLOC this way, just preserve it */
+ di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
+ if (xflags & XFS_XFLAG_IMMUTABLE)
+ di_flags |= XFS_DIFLAG_IMMUTABLE;
+ if (xflags & XFS_XFLAG_APPEND)
+ di_flags |= XFS_DIFLAG_APPEND;
+ if (xflags & XFS_XFLAG_SYNC)
+ di_flags |= XFS_DIFLAG_SYNC;
+ if (xflags & XFS_XFLAG_NOATIME)
+ di_flags |= XFS_DIFLAG_NOATIME;
+ if (xflags & XFS_XFLAG_NODUMP)
+ di_flags |= XFS_DIFLAG_NODUMP;
+ if (xflags & XFS_XFLAG_PROJINHERIT)
+ di_flags |= XFS_DIFLAG_PROJINHERIT;
+ if (xflags & XFS_XFLAG_NODEFRAG)
+ di_flags |= XFS_DIFLAG_NODEFRAG;
+ if (xflags & XFS_XFLAG_FILESTREAM)
+ di_flags |= XFS_DIFLAG_FILESTREAM;
+ if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+ if (xflags & XFS_XFLAG_RTINHERIT)
+ di_flags |= XFS_DIFLAG_RTINHERIT;
+ if (xflags & XFS_XFLAG_NOSYMLINKS)
+ di_flags |= XFS_DIFLAG_NOSYMLINKS;
+ if (xflags & XFS_XFLAG_EXTSZINHERIT)
+ di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+ } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
+ if (xflags & XFS_XFLAG_REALTIME)
+ di_flags |= XFS_DIFLAG_REALTIME;
+ if (xflags & XFS_XFLAG_EXTSIZE)
+ di_flags |= XFS_DIFLAG_EXTSIZE;
+ }
+
+ ip->i_d.di_flags = di_flags;
+}
+
+STATIC void
+xfs_diflags_to_linux(
+ struct xfs_inode *ip)
+{
+ struct inode *inode = XFS_ITOV(ip);
+ unsigned int xflags = xfs_ip2xflags(ip);
+
+ if (xflags & XFS_XFLAG_IMMUTABLE)
+ inode->i_flags |= S_IMMUTABLE;
+ else
+ inode->i_flags &= ~S_IMMUTABLE;
+ if (xflags & XFS_XFLAG_APPEND)
+ inode->i_flags |= S_APPEND;
+ else
+ inode->i_flags &= ~S_APPEND;
+ if (xflags & XFS_XFLAG_SYNC)
+ inode->i_flags |= S_SYNC;
+ else
+ inode->i_flags &= ~S_SYNC;
+ if (xflags & XFS_XFLAG_NOATIME)
+ inode->i_flags |= S_NOATIME;
+ else
+ inode->i_flags &= ~S_NOATIME;
+}
+
+#define FSX_PROJID 1
+#define FSX_EXTSIZE 2
+#define FSX_XFLAGS 4
+#define FSX_NONBLOCK 8
+
+STATIC int
+xfs_ioctl_setattr(
+ xfs_inode_t *ip,
+ struct fsxattr *fa,
+ int mask)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ unsigned int lock_flags = 0;
+ struct xfs_dquot *udqp = NULL, *gdqp = NULL;
+ struct xfs_dquot *olddquot = NULL;
+ int code;
+
+ xfs_itrace_entry(ip);
+
+ if (mp->m_flags & XFS_MOUNT_RDONLY)
+ return XFS_ERROR(EROFS);
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return XFS_ERROR(EIO);
+
+ /*
+ * If disk quotas is on, we make sure that the dquots do exist on disk,
+ * before we start any other transactions. Trying to do this later
+ * is messy. We don't care to take a readlock to look at the ids
+ * in inode here, because we can't hold it across the trans_reserve.
+ * If the IDs do change before we take the ilock, we're covered
+ * because the i_*dquot fields will get updated anyway.
+ */
+ if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
+ code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid,
+ ip->i_d.di_gid, fa->fsx_projid,
+ XFS_QMOPT_PQUOTA, &udqp, &gdqp);
+ if (code)
+ return code;
+ }
+
+ /*
+ * For the other attributes, we acquire the inode lock and
+ * first do an error checking pass.
+ */
+ tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+ code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+ if (code)
+ goto error_return;
+
+ lock_flags = XFS_ILOCK_EXCL;
+ xfs_ilock(ip, lock_flags);
+
+ /*
+ * CAP_FOWNER overrides the following restrictions:
+ *
+ * The user ID of the calling process must be equal
+ * to the file owner ID, except in cases where the
+ * CAP_FSETID capability is applicable.
+ */
+ if (current->fsuid != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
+ code = XFS_ERROR(EPERM);
+ goto error_return;
+ }
+
+ /*
+ * Do a quota reservation only if projid is actually going to change.
+ */
+ if (mask & FSX_PROJID) {
+ if (XFS_IS_PQUOTA_ON(mp) &&
+ ip->i_d.di_projid != fa->fsx_projid) {
+ ASSERT(tp);
+ code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+ capable(CAP_FOWNER) ?
+ XFS_QMOPT_FORCE_RES : 0);
+ if (code) /* out of quota */
+ goto error_return;
+ }
+ }
+
+ if (mask & FSX_EXTSIZE) {
+ /*
+ * Can't change extent size if any extents are allocated.
+ */
+ if (ip->i_d.di_nextents &&
+ ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
+ fa->fsx_extsize)) {
+ code = XFS_ERROR(EINVAL); /* EFBIG? */
+ goto error_return;
+ }
+
+ /*
+ * Extent size must be a multiple of the appropriate block
+ * size, if set at all.
+ */
+ if (fa->fsx_extsize != 0) {
+ xfs_extlen_t size;
+
+ if (XFS_IS_REALTIME_INODE(ip) ||
+ ((mask & FSX_XFLAGS) &&
+ (fa->fsx_xflags & XFS_XFLAG_REALTIME))) {
+ size = mp->m_sb.sb_rextsize <<
+ mp->m_sb.sb_blocklog;
+ } else {
+ size = mp->m_sb.sb_blocksize;
+ }
+
+ if (fa->fsx_extsize % size) {
+ code = XFS_ERROR(EINVAL);
+ goto error_return;
+ }
+ }
+ }
+
+
+ if (mask & FSX_XFLAGS) {
+ /*
+ * Can't change realtime flag if any extents are allocated.
+ */
+ if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
+ (XFS_IS_REALTIME_INODE(ip)) !=
+ (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+ code = XFS_ERROR(EINVAL); /* EFBIG? */
+ goto error_return;
+ }
+
+ /*
+ * If realtime flag is set then must have realtime data.
+ */
+ if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+ if ((mp->m_sb.sb_rblocks == 0) ||
+ (mp->m_sb.sb_rextsize == 0) ||
+ (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
+ code = XFS_ERROR(EINVAL);
+ goto error_return;
+ }
+ }
+
+ /*
+ * Can't modify an immutable/append-only file unless
+ * we have appropriate permission.
+ */
+ if ((ip->i_d.di_flags &
+ (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
+ (fa->fsx_xflags &
+ (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
+ !capable(CAP_LINUX_IMMUTABLE)) {
+ code = XFS_ERROR(EPERM);
+ goto error_return;
+ }
+ }
+
+ xfs_trans_ijoin(tp, ip, lock_flags);
+ xfs_trans_ihold(tp, ip);
+
+ /*
+ * Change file ownership. Must be the owner or privileged.
+ * If the system was configured with the "restricted_chown"
+ * option, the owner is not permitted to give away the file,
+ * and can change the group id only to a group of which he
+ * or she is a member.
+ */
+ if (mask & FSX_PROJID) {
+ /*
+ * CAP_FSETID overrides the following restrictions:
+ *
+ * The set-user-ID and set-group-ID bits of a file will be
+ * cleared upon successful return from chown()
+ */
+ if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+ !capable(CAP_FSETID))
+ ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+
+ /*
+ * Change the ownerships and register quota modifications
+ * in the transaction.
+ */
+ if (ip->i_d.di_projid != fa->fsx_projid) {
+ if (XFS_IS_PQUOTA_ON(mp)) {
+ olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+ &ip->i_gdquot, gdqp);
+ }
+ ip->i_d.di_projid = fa->fsx_projid;
+
+ /*
+ * We may have to rev the inode as well as
+ * the superblock version number since projids didn't
+ * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
+ */
+ if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
+ xfs_bump_ino_vers2(tp, ip);
+ }
+
+ }
+
+ if (mask & FSX_EXTSIZE)
+ ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
+ if (mask & FSX_XFLAGS) {
+ xfs_set_diflags(ip, fa->fsx_xflags);
+ xfs_diflags_to_linux(ip);
+ }
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
+
+ XFS_STATS_INC(xs_ig_attrchg);
+
+ /*
+ * If this is a synchronous mount, make sure that the
+ * transaction goes to disk before returning to the user.
+ * This is slightly sub-optimal in that truncates require
+ * two sync transactions instead of one for wsync filesystems.
+ * One for the truncate and one for the timestamps since we
+ * don't want to change the timestamps unless we're sure the
+ * truncate worked. Truncates are less than 1% of the laddis
+ * mix so this probably isn't worth the trouble to optimize.
+ */
+ if (mp->m_flags & XFS_MOUNT_WSYNC)
+ xfs_trans_set_sync(tp);
+ code = xfs_trans_commit(tp, 0);
+ xfs_iunlock(ip, lock_flags);
+
+ /*
+ * Release any dquot(s) the inode had kept before chown.
+ */
+ XFS_QM_DQRELE(mp, olddquot);
+ XFS_QM_DQRELE(mp, udqp);
+ XFS_QM_DQRELE(mp, gdqp);
+
+ if (code)
+ return code;
+
+ if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE)) {
+ XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
+ NULL, DM_RIGHT_NULL, NULL, NULL, 0, 0,
+ (mask & FSX_NONBLOCK) ? DM_FLAGS_NDELAY : 0);
+ }
+
+ return 0;
+
+ error_return:
+ XFS_QM_DQRELE(mp, udqp);
+ XFS_QM_DQRELE(mp, gdqp);
+ xfs_trans_cancel(tp, 0);
+ if (lock_flags)
+ xfs_iunlock(ip, lock_flags);
+ return code;
+}
+
STATIC int
xfs_ioc_fssetxattr(
xfs_inode_t *ip,
@@ -882,31 +1204,16 @@ xfs_ioc_fssetxattr(
void __user *arg)
{
struct fsxattr fa;
- struct bhv_vattr *vattr;
- int error;
- int attr_flags;
+ unsigned int mask;
if (copy_from_user(&fa, arg, sizeof(fa)))
return -EFAULT;
- vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
- if (unlikely(!vattr))
- return -ENOMEM;
-
- attr_flags = 0;
+ mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID;
if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
- attr_flags |= ATTR_NONBLOCK;
-
- vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID;
- vattr->va_xflags = fa.fsx_xflags;
- vattr->va_extsize = fa.fsx_extsize;
- vattr->va_projid = fa.fsx_projid;
+ mask |= FSX_NONBLOCK;
- error = -xfs_setattr(ip, vattr, attr_flags, NULL);
- if (!error)
- vn_revalidate(XFS_ITOV(ip)); /* update flags */
- kfree(vattr);
- return 0;
+ return -xfs_ioctl_setattr(ip, &fa, mask);
}
STATIC int
@@ -928,10 +1235,9 @@ xfs_ioc_setxflags(
struct file *filp,
void __user *arg)
{
- struct bhv_vattr *vattr;
+ struct fsxattr fa;
unsigned int flags;
- int attr_flags;
- int error;
+ unsigned int mask;
if (copy_from_user(&flags, arg, sizeof(flags)))
return -EFAULT;
@@ -941,22 +1247,12 @@ xfs_ioc_setxflags(
FS_SYNC_FL))
return -EOPNOTSUPP;
- vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
- if (unlikely(!vattr))
- return -ENOMEM;
-
- attr_flags = 0;
+ mask = FSX_XFLAGS;
if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
- attr_flags |= ATTR_NONBLOCK;
-
- vattr->va_mask = XFS_AT_XFLAGS;
- vattr->va_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
+ mask |= FSX_NONBLOCK;
+ fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
- error = -xfs_setattr(ip, vattr, attr_flags, NULL);
- if (likely(!error))
- vn_revalidate(XFS_ITOV(ip)); /* update flags */
- kfree(vattr);
- return error;
+ return -xfs_ioctl_setattr(ip, &fa, mask);
}
STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 2bf287ef548..e88f5102808 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -181,23 +181,6 @@ xfs_ichgtime_fast(
mark_inode_dirty_sync(inode);
}
-
-/*
- * Pull the link count and size up from the xfs inode to the linux inode
- */
-STATIC void
-xfs_validate_fields(
- struct inode *inode)
-{
- struct xfs_inode *ip = XFS_I(inode);
- loff_t size;
-
- /* we're under i_sem so i_size can't change under us */
- size = XFS_ISIZE(ip);
- if (i_size_read(inode) != size)
- i_size_write(inode, size);
-}
-
/*
* Hook in SELinux. This is not quite correct yet, what we really need
* here (as we do for default ACLs) is a mechanism by which creation of
@@ -245,8 +228,7 @@ STATIC void
xfs_cleanup_inode(
struct inode *dir,
struct inode *inode,
- struct dentry *dentry,
- int mode)
+ struct dentry *dentry)
{
struct xfs_name teardown;
@@ -257,10 +239,7 @@ xfs_cleanup_inode(
*/
xfs_dentry_to_name(&teardown, dentry);
- if (S_ISDIR(mode))
- xfs_rmdir(XFS_I(dir), &teardown, XFS_I(inode));
- else
- xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
+ xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
iput(inode);
}
@@ -275,7 +254,7 @@ xfs_vn_mknod(
struct xfs_inode *ip = NULL;
xfs_acl_t *default_acl = NULL;
struct xfs_name name;
- attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS;
+ int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS;
int error;
/*
@@ -335,14 +314,11 @@ xfs_vn_mknod(
}
- if (S_ISDIR(mode))
- xfs_validate_fields(inode);
d_instantiate(dentry, inode);
- xfs_validate_fields(dir);
return -error;
out_cleanup_inode:
- xfs_cleanup_inode(dir, inode, dentry, mode);
+ xfs_cleanup_inode(dir, inode, dentry);
out_free_acl:
if (default_acl)
_ACL_FREE(default_acl);
@@ -382,7 +358,7 @@ xfs_vn_lookup(
return ERR_PTR(-ENAMETOOLONG);
xfs_dentry_to_name(&name, dentry);
- error = xfs_lookup(XFS_I(dir), &name, &cip);
+ error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
if (unlikely(error)) {
if (unlikely(error != ENOENT))
return ERR_PTR(-error);
@@ -393,6 +369,46 @@ xfs_vn_lookup(
return d_splice_alias(cip->i_vnode, dentry);
}
+STATIC struct dentry *
+xfs_vn_ci_lookup(
+ struct inode *dir,
+ struct dentry *dentry,
+ struct nameidata *nd)
+{
+ struct xfs_inode *ip;
+ struct xfs_name xname;
+ struct xfs_name ci_name;
+ struct qstr dname;
+ int error;
+
+ if (dentry->d_name.len >= MAXNAMELEN)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ xfs_dentry_to_name(&xname, dentry);
+ error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
+ if (unlikely(error)) {
+ if (unlikely(error != ENOENT))
+ return ERR_PTR(-error);
+ /*
+ * call d_add(dentry, NULL) here when d_drop_negative_children
+ * is called in xfs_vn_mknod (ie. allow negative dentries
+ * with CI filesystems).
+ */
+ return NULL;
+ }
+
+ /* if exact match, just splice and exit */
+ if (!ci_name.name)
+ return d_splice_alias(ip->i_vnode, dentry);
+
+ /* else case-insensitive match... */
+ dname.name = ci_name.name;
+ dname.len = ci_name.len;
+ dentry = d_add_ci(ip->i_vnode, dentry, &dname);
+ kmem_free(ci_name.name);
+ return dentry;
+}
+
STATIC int
xfs_vn_link(
struct dentry *old_dentry,
@@ -414,7 +430,6 @@ xfs_vn_link(
}
xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED);
- xfs_validate_fields(inode);
d_instantiate(dentry, inode);
return 0;
}
@@ -424,19 +439,23 @@ xfs_vn_unlink(
struct inode *dir,
struct dentry *dentry)
{
- struct inode *inode;
struct xfs_name name;
int error;
- inode = dentry->d_inode;
xfs_dentry_to_name(&name, dentry);
- error = xfs_remove(XFS_I(dir), &name, XFS_I(inode));
- if (likely(!error)) {
- xfs_validate_fields(dir); /* size needs update */
- xfs_validate_fields(inode);
- }
- return -error;
+ error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
+ if (error)
+ return error;
+
+ /*
+ * With unlink, the VFS makes the dentry "negative": no inode,
+ * but still hashed. This is incompatible with case-insensitive
+ * mode, so invalidate (unhash) the dentry in CI-mode.
+ */
+ if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
+ d_invalidate(dentry);
+ return 0;
}
STATIC int
@@ -466,36 +485,15 @@ xfs_vn_symlink(
goto out_cleanup_inode;
d_instantiate(dentry, inode);
- xfs_validate_fields(dir);
- xfs_validate_fields(inode);
return 0;
out_cleanup_inode:
- xfs_cleanup_inode(dir, inode, dentry, 0);
+ xfs_cleanup_inode(dir, inode, dentry);
out:
return -error;
}
STATIC int
-xfs_vn_rmdir(
- struct inode *dir,
- struct dentry *dentry)
-{
- struct inode *inode = dentry->d_inode;
- struct xfs_name name;
- int error;
-
- xfs_dentry_to_name(&name, dentry);
-
- error = xfs_rmdir(XFS_I(dir), &name, XFS_I(inode));
- if (likely(!error)) {
- xfs_validate_fields(inode);
- xfs_validate_fields(dir);
- }
- return -error;
-}
-
-STATIC int
xfs_vn_rename(
struct inode *odir,
struct dentry *odentry,
@@ -505,22 +503,13 @@ xfs_vn_rename(
struct inode *new_inode = ndentry->d_inode;
struct xfs_name oname;
struct xfs_name nname;
- int error;
xfs_dentry_to_name(&oname, odentry);
xfs_dentry_to_name(&nname, ndentry);
- error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
+ return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
XFS_I(ndir), &nname, new_inode ?
XFS_I(new_inode) : NULL);
- if (likely(!error)) {
- if (new_inode)
- xfs_validate_fields(new_inode);
- xfs_validate_fields(odir);
- if (ndir != odir)
- xfs_validate_fields(ndir);
- }
- return -error;
}
/*
@@ -589,8 +578,7 @@ xfs_check_acl(
STATIC int
xfs_vn_permission(
struct inode *inode,
- int mask,
- struct nameidata *nd)
+ int mask)
{
return generic_permission(inode, mask, xfs_check_acl);
}
@@ -660,57 +648,9 @@ xfs_vn_getattr(
STATIC int
xfs_vn_setattr(
struct dentry *dentry,
- struct iattr *attr)
+ struct iattr *iattr)
{
- struct inode *inode = dentry->d_inode;
- unsigned int ia_valid = attr->ia_valid;
- bhv_vattr_t vattr = { 0 };
- int flags = 0;
- int error;
-
- if (ia_valid & ATTR_UID) {
- vattr.va_mask |= XFS_AT_UID;
- vattr.va_uid = attr->ia_uid;
- }
- if (ia_valid & ATTR_GID) {
- vattr.va_mask |= XFS_AT_GID;
- vattr.va_gid = attr->ia_gid;
- }
- if (ia_valid & ATTR_SIZE) {
- vattr.va_mask |= XFS_AT_SIZE;
- vattr.va_size = attr->ia_size;
- }
- if (ia_valid & ATTR_ATIME) {
- vattr.va_mask |= XFS_AT_ATIME;
- vattr.va_atime = attr->ia_atime;
- inode->i_atime = attr->ia_atime;
- }
- if (ia_valid & ATTR_MTIME) {
- vattr.va_mask |= XFS_AT_MTIME;
- vattr.va_mtime = attr->ia_mtime;
- }
- if (ia_valid & ATTR_CTIME) {
- vattr.va_mask |= XFS_AT_CTIME;
- vattr.va_ctime = attr->ia_ctime;
- }
- if (ia_valid & ATTR_MODE) {
- vattr.va_mask |= XFS_AT_MODE;
- vattr.va_mode = attr->ia_mode;
- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
- inode->i_mode &= ~S_ISGID;
- }
-
- if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))
- flags |= ATTR_UTIME;
-#ifdef ATTR_NO_BLOCK
- if ((ia_valid & ATTR_NO_BLOCK))
- flags |= ATTR_NONBLOCK;
-#endif
-
- error = xfs_setattr(XFS_I(inode), &vattr, flags, NULL);
- if (likely(!error))
- vn_revalidate(vn_from_inode(inode));
- return -error;
+ return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL);
}
/*
@@ -728,109 +668,6 @@ xfs_vn_truncate(
WARN_ON(error);
}
-STATIC int
-xfs_vn_setxattr(
- struct dentry *dentry,
- const char *name,
- const void *data,
- size_t size,
- int flags)
-{
- bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
- char *attr = (char *)name;
- attrnames_t *namesp;
- int xflags = 0;
- int error;
-
- namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
- if (!namesp)
- return -EOPNOTSUPP;
- attr += namesp->attr_namelen;
- error = namesp->attr_capable(vp, NULL);
- if (error)
- return error;
-
- /* Convert Linux syscall to XFS internal ATTR flags */
- if (flags & XATTR_CREATE)
- xflags |= ATTR_CREATE;
- if (flags & XATTR_REPLACE)
- xflags |= ATTR_REPLACE;
- xflags |= namesp->attr_flag;
- return namesp->attr_set(vp, attr, (void *)data, size, xflags);
-}
-
-STATIC ssize_t
-xfs_vn_getxattr(
- struct dentry *dentry,
- const char *name,
- void *data,
- size_t size)
-{
- bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
- char *attr = (char *)name;
- attrnames_t *namesp;
- int xflags = 0;
- ssize_t error;
-
- namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
- if (!namesp)
- return -EOPNOTSUPP;
- attr += namesp->attr_namelen;
- error = namesp->attr_capable(vp, NULL);
- if (error)
- return error;
-
- /* Convert Linux syscall to XFS internal ATTR flags */
- if (!size) {
- xflags |= ATTR_KERNOVAL;
- data = NULL;
- }
- xflags |= namesp->attr_flag;
- return namesp->attr_get(vp, attr, (void *)data, size, xflags);
-}
-
-STATIC ssize_t
-xfs_vn_listxattr(
- struct dentry *dentry,
- char *data,
- size_t size)
-{
- bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
- int error, xflags = ATTR_KERNAMELS;
- ssize_t result;
-
- if (!size)
- xflags |= ATTR_KERNOVAL;
- xflags |= capable(CAP_SYS_ADMIN) ? ATTR_KERNFULLS : ATTR_KERNORMALS;
-
- error = attr_generic_list(vp, data, size, xflags, &result);
- if (error < 0)
- return error;
- return result;
-}
-
-STATIC int
-xfs_vn_removexattr(
- struct dentry *dentry,
- const char *name)
-{
- bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
- char *attr = (char *)name;
- attrnames_t *namesp;
- int xflags = 0;
- int error;
-
- namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
- if (!namesp)
- return -EOPNOTSUPP;
- attr += namesp->attr_namelen;
- error = namesp->attr_capable(vp, NULL);
- if (error)
- return error;
- xflags |= namesp->attr_flag;
- return namesp->attr_remove(vp, attr, xflags);
-}
-
STATIC long
xfs_vn_fallocate(
struct inode *inode,
@@ -854,18 +691,18 @@ xfs_vn_fallocate(
xfs_ilock(ip, XFS_IOLOCK_EXCL);
error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
- 0, NULL, ATTR_NOLOCK);
+ 0, NULL, XFS_ATTR_NOLOCK);
if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
offset + len > i_size_read(inode))
new_size = offset + len;
/* Change file size if needed */
if (new_size) {
- bhv_vattr_t va;
+ struct iattr iattr;
- va.va_mask = XFS_AT_SIZE;
- va.va_size = new_size;
- error = xfs_setattr(ip, &va, ATTR_NOLOCK, NULL);
+ iattr.ia_valid = ATTR_SIZE;
+ iattr.ia_size = new_size;
+ error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL);
}
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -878,10 +715,10 @@ const struct inode_operations xfs_inode_operations = {
.truncate = xfs_vn_truncate,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
- .setxattr = xfs_vn_setxattr,
- .getxattr = xfs_vn_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
- .removexattr = xfs_vn_removexattr,
.fallocate = xfs_vn_fallocate,
};
@@ -892,16 +729,47 @@ const struct inode_operations xfs_dir_inode_operations = {
.unlink = xfs_vn_unlink,
.symlink = xfs_vn_symlink,
.mkdir = xfs_vn_mkdir,
- .rmdir = xfs_vn_rmdir,
+ /*
+ * Yes, XFS uses the same method for rmdir and unlink.
+ *
+ * There are some subtile differences deeper in the code,
+ * but we use S_ISDIR to check for those.
+ */
+ .rmdir = xfs_vn_unlink,
+ .mknod = xfs_vn_mknod,
+ .rename = xfs_vn_rename,
+ .permission = xfs_vn_permission,
+ .getattr = xfs_vn_getattr,
+ .setattr = xfs_vn_setattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = xfs_vn_listxattr,
+};
+
+const struct inode_operations xfs_dir_ci_inode_operations = {
+ .create = xfs_vn_create,
+ .lookup = xfs_vn_ci_lookup,
+ .link = xfs_vn_link,
+ .unlink = xfs_vn_unlink,
+ .symlink = xfs_vn_symlink,
+ .mkdir = xfs_vn_mkdir,
+ /*
+ * Yes, XFS uses the same method for rmdir and unlink.
+ *
+ * There are some subtile differences deeper in the code,
+ * but we use S_ISDIR to check for those.
+ */
+ .rmdir = xfs_vn_unlink,
.mknod = xfs_vn_mknod,
.rename = xfs_vn_rename,
.permission = xfs_vn_permission,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
- .setxattr = xfs_vn_setxattr,
- .getxattr = xfs_vn_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
- .removexattr = xfs_vn_removexattr,
};
const struct inode_operations xfs_symlink_inode_operations = {
@@ -911,8 +779,8 @@ const struct inode_operations xfs_symlink_inode_operations = {
.permission = xfs_vn_permission,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
- .setxattr = xfs_vn_setxattr,
- .getxattr = xfs_vn_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
- .removexattr = xfs_vn_removexattr,
};
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 14d0deb7aff..d97ba934a2a 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -20,12 +20,14 @@
extern const struct inode_operations xfs_inode_operations;
extern const struct inode_operations xfs_dir_inode_operations;
+extern const struct inode_operations xfs_dir_ci_inode_operations;
extern const struct inode_operations xfs_symlink_inode_operations;
extern const struct file_operations xfs_file_operations;
extern const struct file_operations xfs_dir_file_operations;
extern const struct file_operations xfs_invis_file_operations;
+extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
struct xfs_inode;
extern void xfs_ichgtime(struct xfs_inode *, int);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 4edc46915b5..4d45d9351a6 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -76,6 +76,7 @@
#include <linux/log2.h>
#include <linux/spinlock.h>
#include <linux/random.h>
+#include <linux/ctype.h>
#include <asm/page.h>
#include <asm/div64.h>
@@ -299,4 +300,11 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
return x;
}
+/* ARM old ABI has some weird alignment/padding */
+#if defined(__arm__) && !defined(__ARM_EABI__)
+#define __arch_pack __attribute__((packed))
+#else
+#define __arch_pack
+#endif
+
#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 5e3b57516ec..82333b3e118 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -711,7 +711,7 @@ start:
!capable(CAP_FSETID)) {
error = xfs_write_clear_setuid(xip);
if (likely(!error))
- error = -remove_suid(file->f_path.dentry);
+ error = -file_remove_suid(file);
if (unlikely(error)) {
goto out_unlock_internal;
}
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index e480b610205..3d5b67c075c 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -98,12 +98,21 @@ xfs_read_xfsstats(
return len;
}
-void
+int
xfs_init_procfs(void)
{
if (!proc_mkdir("fs/xfs", NULL))
- return;
- create_proc_read_entry("fs/xfs/stat", 0, NULL, xfs_read_xfsstats, NULL);
+ goto out;
+
+ if (!create_proc_read_entry("fs/xfs/stat", 0, NULL,
+ xfs_read_xfsstats, NULL))
+ goto out_remove_entry;
+ return 0;
+
+ out_remove_entry:
+ remove_proc_entry("fs/xfs", NULL);
+ out:
+ return -ENOMEM;
}
void
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index afd0b0d5fdb..e83820febc9 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -134,7 +134,7 @@ DECLARE_PER_CPU(struct xfsstats, xfsstats);
#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--)
#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc))
-extern void xfs_init_procfs(void);
+extern int xfs_init_procfs(void);
extern void xfs_cleanup_procfs(void);
@@ -144,8 +144,14 @@ extern void xfs_cleanup_procfs(void);
# define XFS_STATS_DEC(count)
# define XFS_STATS_ADD(count, inc)
-static inline void xfs_init_procfs(void) { };
-static inline void xfs_cleanup_procfs(void) { };
+static inline int xfs_init_procfs(void)
+{
+ return 0;
+}
+
+static inline void xfs_cleanup_procfs(void)
+{
+}
#endif /* !CONFIG_PROC_FS */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 742b2c7852c..30ae96397e3 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -52,6 +52,12 @@
#include "xfs_version.h"
#include "xfs_log_priv.h"
#include "xfs_trans_priv.h"
+#include "xfs_filestream.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_trace.h"
+#include "xfs_extfree_item.h"
+#include "xfs_mru_cache.h"
+#include "xfs_inode_item.h"
#include <linux/namei.h>
#include <linux/init.h>
@@ -60,6 +66,7 @@
#include <linux/writeback.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/parser.h>
static struct quotactl_ops xfs_quotactl_operations;
static struct super_operations xfs_super_operations;
@@ -74,7 +81,10 @@ xfs_args_allocate(
{
struct xfs_mount_args *args;
- args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP);
+ args = kzalloc(sizeof(struct xfs_mount_args), GFP_KERNEL);
+ if (!args)
+ return NULL;
+
args->logbufs = args->logbufsize = -1;
strncpy(args->fsname, sb->s_id, MAXNAMELEN);
@@ -138,6 +148,23 @@ xfs_args_allocate(
#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */
#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */
+/*
+ * Table driven mount option parser.
+ *
+ * Currently only used for remount, but it will be used for mount
+ * in the future, too.
+ */
+enum {
+ Opt_barrier, Opt_nobarrier, Opt_err
+};
+
+static match_table_t tokens = {
+ {Opt_barrier, "barrier"},
+ {Opt_nobarrier, "nobarrier"},
+ {Opt_err, NULL}
+};
+
+
STATIC unsigned long
suffix_strtoul(char *s, char **endp, unsigned int base)
{
@@ -314,6 +341,7 @@ xfs_parseargs(
args->flags |= XFSMNT_ATTR2;
} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
args->flags &= ~XFSMNT_ATTR2;
+ args->flags |= XFSMNT_NOATTR2;
} else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
args->flags2 |= XFSMNT2_FILESTREAMS;
} else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
@@ -564,7 +592,10 @@ xfs_set_inodeops(
inode->i_mapping->a_ops = &xfs_address_space_operations;
break;
case S_IFDIR:
- inode->i_op = &xfs_dir_inode_operations;
+ if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
+ inode->i_op = &xfs_dir_ci_inode_operations;
+ else
+ inode->i_op = &xfs_dir_inode_operations;
inode->i_fop = &xfs_dir_file_operations;
break;
case S_IFLNK:
@@ -733,14 +764,6 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
return;
}
- if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered ==
- QUEUE_ORDERED_NONE) {
- xfs_fs_cmn_err(CE_NOTE, mp,
- "Disabling barriers, not supported by the underlying device");
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- return;
- }
-
if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
xfs_fs_cmn_err(CE_NOTE, mp,
"Disabling barriers, underlying device is readonly");
@@ -764,6 +787,139 @@ xfs_blkdev_issue_flush(
blkdev_issue_flush(buftarg->bt_bdev, NULL);
}
+STATIC void
+xfs_close_devices(
+ struct xfs_mount *mp)
+{
+ if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+ struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
+ xfs_free_buftarg(mp->m_logdev_targp);
+ xfs_blkdev_put(logdev);
+ }
+ if (mp->m_rtdev_targp) {
+ struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
+ xfs_free_buftarg(mp->m_rtdev_targp);
+ xfs_blkdev_put(rtdev);
+ }
+ xfs_free_buftarg(mp->m_ddev_targp);
+}
+
+/*
+ * The file system configurations are:
+ * (1) device (partition) with data and internal log
+ * (2) logical volume with data and log subvolumes.
+ * (3) logical volume with data, log, and realtime subvolumes.
+ *
+ * We only have to handle opening the log and realtime volumes here if
+ * they are present. The data subvolume has already been opened by
+ * get_sb_bdev() and is stored in sb->s_bdev.
+ */
+STATIC int
+xfs_open_devices(
+ struct xfs_mount *mp,
+ struct xfs_mount_args *args)
+{
+ struct block_device *ddev = mp->m_super->s_bdev;
+ struct block_device *logdev = NULL, *rtdev = NULL;
+ int error;
+
+ /*
+ * Open real time and log devices - order is important.
+ */
+ if (args->logname[0]) {
+ error = xfs_blkdev_get(mp, args->logname, &logdev);
+ if (error)
+ goto out;
+ }
+
+ if (args->rtname[0]) {
+ error = xfs_blkdev_get(mp, args->rtname, &rtdev);
+ if (error)
+ goto out_close_logdev;
+
+ if (rtdev == ddev || rtdev == logdev) {
+ cmn_err(CE_WARN,
+ "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
+ error = EINVAL;
+ goto out_close_rtdev;
+ }
+ }
+
+ /*
+ * Setup xfs_mount buffer target pointers
+ */
+ error = ENOMEM;
+ mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
+ if (!mp->m_ddev_targp)
+ goto out_close_rtdev;
+
+ if (rtdev) {
+ mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
+ if (!mp->m_rtdev_targp)
+ goto out_free_ddev_targ;
+ }
+
+ if (logdev && logdev != ddev) {
+ mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1);
+ if (!mp->m_logdev_targp)
+ goto out_free_rtdev_targ;
+ } else {
+ mp->m_logdev_targp = mp->m_ddev_targp;
+ }
+
+ return 0;
+
+ out_free_rtdev_targ:
+ if (mp->m_rtdev_targp)
+ xfs_free_buftarg(mp->m_rtdev_targp);
+ out_free_ddev_targ:
+ xfs_free_buftarg(mp->m_ddev_targp);
+ out_close_rtdev:
+ if (rtdev)
+ xfs_blkdev_put(rtdev);
+ out_close_logdev:
+ if (logdev && logdev != ddev)
+ xfs_blkdev_put(logdev);
+ out:
+ return error;
+}
+
+/*
+ * Setup xfs_mount buffer target pointers based on superblock
+ */
+STATIC int
+xfs_setup_devices(
+ struct xfs_mount *mp)
+{
+ int error;
+
+ error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
+ mp->m_sb.sb_sectsize);
+ if (error)
+ return error;
+
+ if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+ unsigned int log_sector_size = BBSIZE;
+
+ if (xfs_sb_version_hassector(&mp->m_sb))
+ log_sector_size = mp->m_sb.sb_logsectsize;
+ error = xfs_setsize_buftarg(mp->m_logdev_targp,
+ mp->m_sb.sb_blocksize,
+ log_sector_size);
+ if (error)
+ return error;
+ }
+ if (mp->m_rtdev_targp) {
+ error = xfs_setsize_buftarg(mp->m_rtdev_targp,
+ mp->m_sb.sb_blocksize,
+ mp->m_sb.sb_sectsize);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
/*
* XFS AIL push thread support
*/
@@ -843,48 +999,11 @@ xfs_fs_destroy_inode(
STATIC void
xfs_fs_inode_init_once(
- kmem_zone_t *zonep,
void *vnode)
{
inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));
}
-STATIC int __init
-xfs_init_zones(void)
-{
- xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode",
- KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
- KM_ZONE_SPREAD,
- xfs_fs_inode_init_once);
- if (!xfs_vnode_zone)
- goto out;
-
- xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
- if (!xfs_ioend_zone)
- goto out_destroy_vnode_zone;
-
- xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
- xfs_ioend_zone);
- if (!xfs_ioend_pool)
- goto out_free_ioend_zone;
- return 0;
-
- out_free_ioend_zone:
- kmem_zone_destroy(xfs_ioend_zone);
- out_destroy_vnode_zone:
- kmem_zone_destroy(xfs_vnode_zone);
- out:
- return -ENOMEM;
-}
-
-STATIC void
-xfs_destroy_zones(void)
-{
- mempool_destroy(xfs_ioend_pool);
- kmem_zone_destroy(xfs_vnode_zone);
- kmem_zone_destroy(xfs_ioend_zone);
-}
-
/*
* Attempt to flush the inode, this will actually fail
* if the inode is pinned, but we dirty the inode again
@@ -1074,7 +1193,7 @@ xfssyncd(
list_del(&work->w_list);
if (work == &mp->m_sync_work)
continue;
- kmem_free(work, sizeof(struct bhv_vfs_sync_work));
+ kmem_free(work);
}
}
@@ -1086,14 +1205,63 @@ xfs_fs_put_super(
struct super_block *sb)
{
struct xfs_mount *mp = XFS_M(sb);
+ struct xfs_inode *rip = mp->m_rootip;
+ int unmount_event_flags = 0;
int error;
kthread_stop(mp->m_sync_task);
xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI);
- error = xfs_unmount(mp, 0, NULL);
- if (error)
- printk("XFS: unmount got error=%d\n", error);
+
+#ifdef HAVE_DMAPI
+ if (mp->m_flags & XFS_MOUNT_DMAPI) {
+ unmount_event_flags =
+ (mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ?
+ 0 : DM_FLAGS_UNWANTED;
+ /*
+ * Ignore error from dmapi here, first unmount is not allowed
+ * to fail anyway, and second we wouldn't want to fail a
+ * unmount because of dmapi.
+ */
+ XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
+ NULL, NULL, 0, 0, unmount_event_flags);
+ }
+#endif
+
+ /*
+ * Blow away any referenced inode in the filestreams cache.
+ * This can and will cause log traffic as inodes go inactive
+ * here.
+ */
+ xfs_filestream_unmount(mp);
+
+ XFS_bflush(mp->m_ddev_targp);
+ error = xfs_unmount_flush(mp, 0);
+ WARN_ON(error);
+
+ IRELE(rip);
+
+ /*
+ * If we're forcing a shutdown, typically because of a media error,
+ * we want to make sure we invalidate dirty pages that belong to
+ * referenced vnodes as well.
+ */
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
+ ASSERT(error != EFSCORRUPTED);
+ }
+
+ if (mp->m_flags & XFS_MOUNT_DMAPI) {
+ XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
+ unmount_event_flags);
+ }
+
+ xfs_unmountfs(mp);
+ xfs_icsb_destroy_counters(mp);
+ xfs_close_devices(mp);
+ xfs_qmops_put(mp);
+ xfs_dmops_put(mp);
+ kfree(mp);
}
STATIC void
@@ -1216,14 +1384,54 @@ xfs_fs_remount(
char *options)
{
struct xfs_mount *mp = XFS_M(sb);
- struct xfs_mount_args *args = xfs_args_allocate(sb, 0);
- int error;
+ substring_t args[MAX_OPT_ARGS];
+ char *p;
- error = xfs_parseargs(mp, options, args, 1);
- if (!error)
- error = xfs_mntupdate(mp, flags, args);
- kmem_free(args, sizeof(*args));
- return -error;
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_barrier:
+ mp->m_flags |= XFS_MOUNT_BARRIER;
+
+ /*
+ * Test if barriers are actually working if we can,
+ * else delay this check until the filesystem is
+ * marked writeable.
+ */
+ if (!(mp->m_flags & XFS_MOUNT_RDONLY))
+ xfs_mountfs_check_barriers(mp);
+ break;
+ case Opt_nobarrier:
+ mp->m_flags &= ~XFS_MOUNT_BARRIER;
+ break;
+ default:
+ printk(KERN_INFO
+ "XFS: mount option \"%s\" not supported for remount\n", p);
+ return -EINVAL;
+ }
+ }
+
+ /* rw/ro -> rw */
+ if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
+ mp->m_flags &= ~XFS_MOUNT_RDONLY;
+ if (mp->m_flags & XFS_MOUNT_BARRIER)
+ xfs_mountfs_check_barriers(mp);
+ }
+
+ /* rw -> ro */
+ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
+ xfs_filestream_flush(mp);
+ xfs_sync(mp, SYNC_DATA_QUIESCE);
+ xfs_attr_quiesce(mp);
+ mp->m_flags |= XFS_MOUNT_RDONLY;
+ }
+
+ return 0;
}
/*
@@ -1300,6 +1508,225 @@ xfs_fs_setxquota(
Q_XSETPQLIM), id, (caddr_t)fdq);
}
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock has _not_ yet been read in.
+ */
+STATIC int
+xfs_start_flags(
+ struct xfs_mount_args *ap,
+ struct xfs_mount *mp)
+{
+ /* Values are in BBs */
+ if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
+ /*
+ * At this point the superblock has not been read
+ * in, therefore we do not know the block size.
+ * Before the mount call ends we will convert
+ * these to FSBs.
+ */
+ mp->m_dalign = ap->sunit;
+ mp->m_swidth = ap->swidth;
+ }
+
+ if (ap->logbufs != -1 &&
+ ap->logbufs != 0 &&
+ (ap->logbufs < XLOG_MIN_ICLOGS ||
+ ap->logbufs > XLOG_MAX_ICLOGS)) {
+ cmn_err(CE_WARN,
+ "XFS: invalid logbufs value: %d [not %d-%d]",
+ ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
+ return XFS_ERROR(EINVAL);
+ }
+ mp->m_logbufs = ap->logbufs;
+ if (ap->logbufsize != -1 &&
+ ap->logbufsize != 0 &&
+ (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
+ ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
+ !is_power_of_2(ap->logbufsize))) {
+ cmn_err(CE_WARN,
+ "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
+ ap->logbufsize);
+ return XFS_ERROR(EINVAL);
+ }
+ mp->m_logbsize = ap->logbufsize;
+ mp->m_fsname_len = strlen(ap->fsname) + 1;
+ mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
+ strcpy(mp->m_fsname, ap->fsname);
+ if (ap->rtname[0]) {
+ mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP);
+ strcpy(mp->m_rtname, ap->rtname);
+ }
+ if (ap->logname[0]) {
+ mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP);
+ strcpy(mp->m_logname, ap->logname);
+ }
+
+ if (ap->flags & XFSMNT_WSYNC)
+ mp->m_flags |= XFS_MOUNT_WSYNC;
+#if XFS_BIG_INUMS
+ if (ap->flags & XFSMNT_INO64) {
+ mp->m_flags |= XFS_MOUNT_INO64;
+ mp->m_inoadd = XFS_INO64_OFFSET;
+ }
+#endif
+ if (ap->flags & XFSMNT_RETERR)
+ mp->m_flags |= XFS_MOUNT_RETERR;
+ if (ap->flags & XFSMNT_NOALIGN)
+ mp->m_flags |= XFS_MOUNT_NOALIGN;
+ if (ap->flags & XFSMNT_SWALLOC)
+ mp->m_flags |= XFS_MOUNT_SWALLOC;
+ if (ap->flags & XFSMNT_OSYNCISOSYNC)
+ mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
+ if (ap->flags & XFSMNT_32BITINODES)
+ mp->m_flags |= XFS_MOUNT_32BITINODES;
+
+ if (ap->flags & XFSMNT_IOSIZE) {
+ if (ap->iosizelog > XFS_MAX_IO_LOG ||
+ ap->iosizelog < XFS_MIN_IO_LOG) {
+ cmn_err(CE_WARN,
+ "XFS: invalid log iosize: %d [not %d-%d]",
+ ap->iosizelog, XFS_MIN_IO_LOG,
+ XFS_MAX_IO_LOG);
+ return XFS_ERROR(EINVAL);
+ }
+
+ mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
+ mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
+ }
+
+ if (ap->flags & XFSMNT_IKEEP)
+ mp->m_flags |= XFS_MOUNT_IKEEP;
+ if (ap->flags & XFSMNT_DIRSYNC)
+ mp->m_flags |= XFS_MOUNT_DIRSYNC;
+ if (ap->flags & XFSMNT_ATTR2)
+ mp->m_flags |= XFS_MOUNT_ATTR2;
+ if (ap->flags & XFSMNT_NOATTR2)
+ mp->m_flags |= XFS_MOUNT_NOATTR2;
+
+ if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
+ mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+
+ /*
+ * no recovery flag requires a read-only mount
+ */
+ if (ap->flags & XFSMNT_NORECOVERY) {
+ if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
+ cmn_err(CE_WARN,
+ "XFS: tried to mount a FS read-write without recovery!");
+ return XFS_ERROR(EINVAL);
+ }
+ mp->m_flags |= XFS_MOUNT_NORECOVERY;
+ }
+
+ if (ap->flags & XFSMNT_NOUUID)
+ mp->m_flags |= XFS_MOUNT_NOUUID;
+ if (ap->flags & XFSMNT_BARRIER)
+ mp->m_flags |= XFS_MOUNT_BARRIER;
+ else
+ mp->m_flags &= ~XFS_MOUNT_BARRIER;
+
+ if (ap->flags2 & XFSMNT2_FILESTREAMS)
+ mp->m_flags |= XFS_MOUNT_FILESTREAMS;
+
+ if (ap->flags & XFSMNT_DMAPI)
+ mp->m_flags |= XFS_MOUNT_DMAPI;
+ return 0;
+}
+
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock _has_ now been read in.
+ */
+STATIC int
+xfs_finish_flags(
+ struct xfs_mount_args *ap,
+ struct xfs_mount *mp)
+{
+ int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
+
+ /* Fail a mount where the logbuf is smaller then the log stripe */
+ if (xfs_sb_version_haslogv2(&mp->m_sb)) {
+ if ((ap->logbufsize <= 0) &&
+ (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
+ mp->m_logbsize = mp->m_sb.sb_logsunit;
+ } else if (ap->logbufsize > 0 &&
+ ap->logbufsize < mp->m_sb.sb_logsunit) {
+ cmn_err(CE_WARN,
+ "XFS: logbuf size must be greater than or equal to log stripe size");
+ return XFS_ERROR(EINVAL);
+ }
+ } else {
+ /* Fail a mount if the logbuf is larger than 32K */
+ if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
+ cmn_err(CE_WARN,
+ "XFS: logbuf size for version 1 logs must be 16K or 32K");
+ return XFS_ERROR(EINVAL);
+ }
+ }
+
+ /*
+ * mkfs'ed attr2 will turn on attr2 mount unless explicitly
+ * told by noattr2 to turn it off
+ */
+ if (xfs_sb_version_hasattr2(&mp->m_sb) &&
+ !(ap->flags & XFSMNT_NOATTR2))
+ mp->m_flags |= XFS_MOUNT_ATTR2;
+
+ /*
+ * prohibit r/w mounts of read-only filesystems
+ */
+ if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
+ cmn_err(CE_WARN,
+ "XFS: cannot mount a read-only filesystem as read-write");
+ return XFS_ERROR(EROFS);
+ }
+
+ /*
+ * check for shared mount.
+ */
+ if (ap->flags & XFSMNT_SHARED) {
+ if (!xfs_sb_version_hasshared(&mp->m_sb))
+ return XFS_ERROR(EINVAL);
+
+ /*
+ * For IRIX 6.5, shared mounts must have the shared
+ * version bit set, have the persistent readonly
+ * field set, must be version 0 and can only be mounted
+ * read-only.
+ */
+ if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
+ (mp->m_sb.sb_shared_vn != 0))
+ return XFS_ERROR(EINVAL);
+
+ mp->m_flags |= XFS_MOUNT_SHARED;
+
+ /*
+ * Shared XFS V0 can't deal with DMI. Return EINVAL.
+ */
+ if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
+ return XFS_ERROR(EINVAL);
+ }
+
+ if (ap->flags & XFSMNT_UQUOTA) {
+ mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
+ if (ap->flags & XFSMNT_UQUOTAENF)
+ mp->m_qflags |= XFS_UQUOTA_ENFD;
+ }
+
+ if (ap->flags & XFSMNT_GQUOTA) {
+ mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
+ if (ap->flags & XFSMNT_GQUOTAENF)
+ mp->m_qflags |= XFS_OQUOTA_ENFD;
+ } else if (ap->flags & XFSMNT_PQUOTA) {
+ mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
+ if (ap->flags & XFSMNT_PQUOTAENF)
+ mp->m_qflags |= XFS_OQUOTA_ENFD;
+ }
+
+ return 0;
+}
+
STATIC int
xfs_fs_fill_super(
struct super_block *sb,
@@ -1308,11 +1735,21 @@ xfs_fs_fill_super(
{
struct inode *root;
struct xfs_mount *mp = NULL;
- struct xfs_mount_args *args = xfs_args_allocate(sb, silent);
- int error;
+ struct xfs_mount_args *args;
+ int flags = 0, error = ENOMEM;
+
+ args = xfs_args_allocate(sb, silent);
+ if (!args)
+ return -ENOMEM;
- mp = xfs_mount_init();
+ mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
+ if (!mp)
+ goto out_free_args;
+ spin_lock_init(&mp->m_sb_lock);
+ mutex_init(&mp->m_ilock);
+ mutex_init(&mp->m_growlock);
+ atomic_set(&mp->m_active_trans, 0);
INIT_LIST_HEAD(&mp->m_sync_list);
spin_lock_init(&mp->m_sync_lock);
init_waitqueue_head(&mp->m_wait_single_sync_task);
@@ -1325,16 +1762,60 @@ xfs_fs_fill_super(
error = xfs_parseargs(mp, (char *)data, args, 0);
if (error)
- goto fail_vfsop;
+ goto out_free_mp;
sb_min_blocksize(sb, BBSIZE);
+ sb->s_xattr = xfs_xattr_handlers;
sb->s_export_op = &xfs_export_operations;
sb->s_qcop = &xfs_quotactl_operations;
sb->s_op = &xfs_super_operations;
- error = xfs_mount(mp, args, NULL);
+ error = xfs_dmops_get(mp, args);
if (error)
- goto fail_vfsop;
+ goto out_free_mp;
+ error = xfs_qmops_get(mp, args);
+ if (error)
+ goto out_put_dmops;
+
+ if (args->flags & XFSMNT_QUIET)
+ flags |= XFS_MFSI_QUIET;
+
+ error = xfs_open_devices(mp, args);
+ if (error)
+ goto out_put_qmops;
+
+ if (xfs_icsb_init_counters(mp))
+ mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
+
+ /*
+ * Setup flags based on mount(2) options and then the superblock
+ */
+ error = xfs_start_flags(args, mp);
+ if (error)
+ goto out_destroy_counters;
+ error = xfs_readsb(mp, flags);
+ if (error)
+ goto out_destroy_counters;
+ error = xfs_finish_flags(args, mp);
+ if (error)
+ goto out_free_sb;
+
+ error = xfs_setup_devices(mp);
+ if (error)
+ goto out_free_sb;
+
+ if (mp->m_flags & XFS_MOUNT_BARRIER)
+ xfs_mountfs_check_barriers(mp);
+
+ error = xfs_filestream_mount(mp);
+ if (error)
+ goto out_free_sb;
+
+ error = xfs_mountfs(mp, flags);
+ if (error)
+ goto out_filestream_unmount;
+
+ XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
sb->s_dirt = 1;
sb->s_magic = XFS_SB_MAGIC;
@@ -1369,10 +1850,27 @@ xfs_fs_fill_super(
xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
- kmem_free(args, sizeof(*args));
+ kfree(args);
return 0;
-fail_vnrele:
+ out_filestream_unmount:
+ xfs_filestream_unmount(mp);
+ out_free_sb:
+ xfs_freesb(mp);
+ out_destroy_counters:
+ xfs_icsb_destroy_counters(mp);
+ xfs_close_devices(mp);
+ out_put_qmops:
+ xfs_qmops_put(mp);
+ out_put_dmops:
+ xfs_dmops_put(mp);
+ out_free_mp:
+ kfree(mp);
+ out_free_args:
+ kfree(args);
+ return -error;
+
+ fail_vnrele:
if (sb->s_root) {
dput(sb->s_root);
sb->s_root = NULL;
@@ -1380,12 +1878,22 @@ fail_vnrele:
iput(root);
}
-fail_unmount:
- xfs_unmount(mp, 0, NULL);
+ fail_unmount:
+ /*
+ * Blow away any referenced inode in the filestreams cache.
+ * This can and will cause log traffic as inodes go inactive
+ * here.
+ */
+ xfs_filestream_unmount(mp);
-fail_vfsop:
- kmem_free(args, sizeof(*args));
- return -error;
+ XFS_bflush(mp->m_ddev_targp);
+ error = xfs_unmount_flush(mp, 0);
+ WARN_ON(error);
+
+ IRELE(mp->m_rootip);
+
+ xfs_unmountfs(mp);
+ goto out_destroy_counters;
}
STATIC int
@@ -1430,9 +1938,235 @@ static struct file_system_type xfs_fs_type = {
.fs_flags = FS_REQUIRES_DEV,
};
+STATIC int __init
+xfs_alloc_trace_bufs(void)
+{
+#ifdef XFS_ALLOC_TRACE
+ xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_MAYFAIL);
+ if (!xfs_alloc_trace_buf)
+ goto out;
+#endif
+#ifdef XFS_BMAP_TRACE
+ xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_MAYFAIL);
+ if (!xfs_bmap_trace_buf)
+ goto out_free_alloc_trace;
+#endif
+#ifdef XFS_BMBT_TRACE
+ xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
+ if (!xfs_bmbt_trace_buf)
+ goto out_free_bmap_trace;
+#endif
+#ifdef XFS_ATTR_TRACE
+ xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
+ if (!xfs_attr_trace_buf)
+ goto out_free_bmbt_trace;
+#endif
+#ifdef XFS_DIR2_TRACE
+ xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_MAYFAIL);
+ if (!xfs_dir2_trace_buf)
+ goto out_free_attr_trace;
+#endif
+
+ return 0;
+
+#ifdef XFS_DIR2_TRACE
+ out_free_attr_trace:
+#endif
+#ifdef XFS_ATTR_TRACE
+ ktrace_free(xfs_attr_trace_buf);
+ out_free_bmbt_trace:
+#endif
+#ifdef XFS_BMBT_TRACE
+ ktrace_free(xfs_bmbt_trace_buf);
+ out_free_bmap_trace:
+#endif
+#ifdef XFS_BMAP_TRACE
+ ktrace_free(xfs_bmap_trace_buf);
+ out_free_alloc_trace:
+#endif
+#ifdef XFS_ALLOC_TRACE
+ ktrace_free(xfs_alloc_trace_buf);
+ out:
+#endif
+ return -ENOMEM;
+}
+
+STATIC void
+xfs_free_trace_bufs(void)
+{
+#ifdef XFS_DIR2_TRACE
+ ktrace_free(xfs_dir2_trace_buf);
+#endif
+#ifdef XFS_ATTR_TRACE
+ ktrace_free(xfs_attr_trace_buf);
+#endif
+#ifdef XFS_BMBT_TRACE
+ ktrace_free(xfs_bmbt_trace_buf);
+#endif
+#ifdef XFS_BMAP_TRACE
+ ktrace_free(xfs_bmap_trace_buf);
+#endif
+#ifdef XFS_ALLOC_TRACE
+ ktrace_free(xfs_alloc_trace_buf);
+#endif
+}
STATIC int __init
-init_xfs_fs( void )
+xfs_init_zones(void)
+{
+ xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode",
+ KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
+ KM_ZONE_SPREAD,
+ xfs_fs_inode_init_once);
+ if (!xfs_vnode_zone)
+ goto out;
+
+ xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
+ if (!xfs_ioend_zone)
+ goto out_destroy_vnode_zone;
+
+ xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
+ xfs_ioend_zone);
+ if (!xfs_ioend_pool)
+ goto out_destroy_ioend_zone;
+
+ xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
+ "xfs_log_ticket");
+ if (!xfs_log_ticket_zone)
+ goto out_destroy_ioend_pool;
+
+ xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
+ "xfs_bmap_free_item");
+ if (!xfs_bmap_free_item_zone)
+ goto out_destroy_log_ticket_zone;
+ xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
+ "xfs_btree_cur");
+ if (!xfs_btree_cur_zone)
+ goto out_destroy_bmap_free_item_zone;
+
+ xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
+ "xfs_da_state");
+ if (!xfs_da_state_zone)
+ goto out_destroy_btree_cur_zone;
+
+ xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
+ if (!xfs_dabuf_zone)
+ goto out_destroy_da_state_zone;
+
+ xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
+ if (!xfs_ifork_zone)
+ goto out_destroy_dabuf_zone;
+
+ xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
+ if (!xfs_trans_zone)
+ goto out_destroy_ifork_zone;
+
+ /*
+ * The size of the zone allocated buf log item is the maximum
+ * size possible under XFS. This wastes a little bit of memory,
+ * but it is much faster.
+ */
+ xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
+ (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
+ NBWORD) * sizeof(int))), "xfs_buf_item");
+ if (!xfs_buf_item_zone)
+ goto out_destroy_trans_zone;
+
+ xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
+ ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
+ sizeof(xfs_extent_t))), "xfs_efd_item");
+ if (!xfs_efd_zone)
+ goto out_destroy_buf_item_zone;
+
+ xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
+ ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
+ sizeof(xfs_extent_t))), "xfs_efi_item");
+ if (!xfs_efi_zone)
+ goto out_destroy_efd_zone;
+
+ xfs_inode_zone =
+ kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
+ KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
+ KM_ZONE_SPREAD, NULL);
+ if (!xfs_inode_zone)
+ goto out_destroy_efi_zone;
+
+ xfs_ili_zone =
+ kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
+ KM_ZONE_SPREAD, NULL);
+ if (!xfs_ili_zone)
+ goto out_destroy_inode_zone;
+
+#ifdef CONFIG_XFS_POSIX_ACL
+ xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl");
+ if (!xfs_acl_zone)
+ goto out_destroy_ili_zone;
+#endif
+
+ return 0;
+
+#ifdef CONFIG_XFS_POSIX_ACL
+ out_destroy_ili_zone:
+#endif
+ kmem_zone_destroy(xfs_ili_zone);
+ out_destroy_inode_zone:
+ kmem_zone_destroy(xfs_inode_zone);
+ out_destroy_efi_zone:
+ kmem_zone_destroy(xfs_efi_zone);
+ out_destroy_efd_zone:
+ kmem_zone_destroy(xfs_efd_zone);
+ out_destroy_buf_item_zone:
+ kmem_zone_destroy(xfs_buf_item_zone);
+ out_destroy_trans_zone:
+ kmem_zone_destroy(xfs_trans_zone);
+ out_destroy_ifork_zone:
+ kmem_zone_destroy(xfs_ifork_zone);
+ out_destroy_dabuf_zone:
+ kmem_zone_destroy(xfs_dabuf_zone);
+ out_destroy_da_state_zone:
+ kmem_zone_destroy(xfs_da_state_zone);
+ out_destroy_btree_cur_zone:
+ kmem_zone_destroy(xfs_btree_cur_zone);
+ out_destroy_bmap_free_item_zone:
+ kmem_zone_destroy(xfs_bmap_free_item_zone);
+ out_destroy_log_ticket_zone:
+ kmem_zone_destroy(xfs_log_ticket_zone);
+ out_destroy_ioend_pool:
+ mempool_destroy(xfs_ioend_pool);
+ out_destroy_ioend_zone:
+ kmem_zone_destroy(xfs_ioend_zone);
+ out_destroy_vnode_zone:
+ kmem_zone_destroy(xfs_vnode_zone);
+ out:
+ return -ENOMEM;
+}
+
+STATIC void
+xfs_destroy_zones(void)
+{
+#ifdef CONFIG_XFS_POSIX_ACL
+ kmem_zone_destroy(xfs_acl_zone);
+#endif
+ kmem_zone_destroy(xfs_ili_zone);
+ kmem_zone_destroy(xfs_inode_zone);
+ kmem_zone_destroy(xfs_efi_zone);
+ kmem_zone_destroy(xfs_efd_zone);
+ kmem_zone_destroy(xfs_buf_item_zone);
+ kmem_zone_destroy(xfs_trans_zone);
+ kmem_zone_destroy(xfs_ifork_zone);
+ kmem_zone_destroy(xfs_dabuf_zone);
+ kmem_zone_destroy(xfs_da_state_zone);
+ kmem_zone_destroy(xfs_btree_cur_zone);
+ kmem_zone_destroy(xfs_bmap_free_item_zone);
+ kmem_zone_destroy(xfs_log_ticket_zone);
+ mempool_destroy(xfs_ioend_pool);
+ kmem_zone_destroy(xfs_ioend_zone);
+ kmem_zone_destroy(xfs_vnode_zone);
+
+}
+
+STATIC int __init
+init_xfs_fs(void)
{
int error;
static char message[] __initdata = KERN_INFO \
@@ -1441,42 +2175,73 @@ init_xfs_fs( void )
printk(message);
ktrace_init(64);
+ vn_init();
+ xfs_dir_startup();
error = xfs_init_zones();
- if (error < 0)
- goto undo_zones;
+ if (error)
+ goto out;
+
+ error = xfs_alloc_trace_bufs();
+ if (error)
+ goto out_destroy_zones;
+
+ error = xfs_mru_cache_init();
+ if (error)
+ goto out_free_trace_buffers;
+
+ error = xfs_filestream_init();
+ if (error)
+ goto out_mru_cache_uninit;
error = xfs_buf_init();
- if (error < 0)
- goto undo_buffers;
+ if (error)
+ goto out_filestream_uninit;
+
+ error = xfs_init_procfs();
+ if (error)
+ goto out_buf_terminate;
+
+ error = xfs_sysctl_register();
+ if (error)
+ goto out_cleanup_procfs;
- vn_init();
- xfs_init();
- uuid_init();
vfs_initquota();
error = register_filesystem(&xfs_fs_type);
if (error)
- goto undo_register;
+ goto out_sysctl_unregister;
return 0;
-undo_register:
+ out_sysctl_unregister:
+ xfs_sysctl_unregister();
+ out_cleanup_procfs:
+ xfs_cleanup_procfs();
+ out_buf_terminate:
xfs_buf_terminate();
-
-undo_buffers:
+ out_filestream_uninit:
+ xfs_filestream_uninit();
+ out_mru_cache_uninit:
+ xfs_mru_cache_uninit();
+ out_free_trace_buffers:
+ xfs_free_trace_bufs();
+ out_destroy_zones:
xfs_destroy_zones();
-
-undo_zones:
+ out:
return error;
}
STATIC void __exit
-exit_xfs_fs( void )
+exit_xfs_fs(void)
{
vfs_exitquota();
unregister_filesystem(&xfs_fs_type);
- xfs_cleanup();
+ xfs_sysctl_unregister();
+ xfs_cleanup_procfs();
xfs_buf_terminate();
+ xfs_filestream_uninit();
+ xfs_mru_cache_uninit();
+ xfs_free_trace_bufs();
xfs_destroy_zones();
ktrace_uninit();
}
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 3efb7c6d330..b7d13da01bd 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -107,12 +107,10 @@ extern void xfs_initialize_vnode(struct xfs_mount *mp, bhv_vnode_t *vp,
extern void xfs_flush_inode(struct xfs_inode *);
extern void xfs_flush_device(struct xfs_inode *);
-extern int xfs_blkdev_get(struct xfs_mount *, const char *,
- struct block_device **);
-extern void xfs_blkdev_put(struct block_device *);
extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
extern const struct export_operations xfs_export_operations;
+extern struct xattr_handler *xfs_xattr_handlers[];
#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index bb997d75c05..7dacb5bbde3 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -259,15 +259,17 @@ static ctl_table xfs_root_table[] = {
{}
};
-void
+int
xfs_sysctl_register(void)
{
xfs_table_header = register_sysctl_table(xfs_root_table);
+ if (!xfs_table_header)
+ return -ENOMEM;
+ return 0;
}
void
xfs_sysctl_unregister(void)
{
- if (xfs_table_header)
- unregister_sysctl_table(xfs_table_header);
+ unregister_sysctl_table(xfs_table_header);
}
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index 98b97e399d6..4aadb8056c3 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -93,10 +93,10 @@ enum {
extern xfs_param_t xfs_params;
#ifdef CONFIG_SYSCTL
-extern void xfs_sysctl_register(void);
+extern int xfs_sysctl_register(void);
extern void xfs_sysctl_unregister(void);
#else
-# define xfs_sysctl_register() do { } while (0)
+# define xfs_sysctl_register() (0)
# define xfs_sysctl_unregister() do { } while (0)
#endif /* CONFIG_SYSCTL */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index bc7afe00733..25488b6d988 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -82,56 +82,6 @@ vn_ioerror(
xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l);
}
-/*
- * Revalidate the Linux inode from the XFS inode.
- * Note: i_size _not_ updated; we must hold the inode
- * semaphore when doing that - callers responsibility.
- */
-int
-vn_revalidate(
- bhv_vnode_t *vp)
-{
- struct inode *inode = vn_to_inode(vp);
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- unsigned long xflags;
-
- xfs_itrace_entry(ip);
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -EIO;
-
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- inode->i_mode = ip->i_d.di_mode;
- inode->i_uid = ip->i_d.di_uid;
- inode->i_gid = ip->i_d.di_gid;
- inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
- inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
- inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
- inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
-
- xflags = xfs_ip2xflags(ip);
- if (xflags & XFS_XFLAG_IMMUTABLE)
- inode->i_flags |= S_IMMUTABLE;
- else
- inode->i_flags &= ~S_IMMUTABLE;
- if (xflags & XFS_XFLAG_APPEND)
- inode->i_flags |= S_APPEND;
- else
- inode->i_flags &= ~S_APPEND;
- if (xflags & XFS_XFLAG_SYNC)
- inode->i_flags |= S_SYNC;
- else
- inode->i_flags &= ~S_SYNC;
- if (xflags & XFS_XFLAG_NOATIME)
- inode->i_flags |= S_NOATIME;
- else
- inode->i_flags &= ~S_NOATIME;
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
- xfs_iflags_clear(ip, XFS_IMODIFIED);
- return 0;
-}
/*
* Add a reference to a referenced vnode.
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 25eb2a9e8d9..41ca2cec5d3 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -19,7 +19,6 @@
#define __XFS_VNODE_H__
struct file;
-struct bhv_vattr;
struct xfs_iomap;
struct attrlist_cursor_kern;
@@ -66,87 +65,8 @@ static inline struct inode *vn_to_inode(bhv_vnode_t *vnode)
Prevent VM access to the pages until
the operation completes. */
-/*
- * Vnode attributes. va_mask indicates those attributes the caller
- * wants to set or extract.
- */
-typedef struct bhv_vattr {
- int va_mask; /* bit-mask of attributes present */
- mode_t va_mode; /* file access mode and type */
- xfs_nlink_t va_nlink; /* number of references to file */
- uid_t va_uid; /* owner user id */
- gid_t va_gid; /* owner group id */
- xfs_ino_t va_nodeid; /* file id */
- xfs_off_t va_size; /* file size in bytes */
- u_long va_blocksize; /* blocksize preferred for i/o */
- struct timespec va_atime; /* time of last access */
- struct timespec va_mtime; /* time of last modification */
- struct timespec va_ctime; /* time file changed */
- u_int va_gen; /* generation number of file */
- xfs_dev_t va_rdev; /* device the special file represents */
- __int64_t va_nblocks; /* number of blocks allocated */
- u_long va_xflags; /* random extended file flags */
- u_long va_extsize; /* file extent size */
- u_long va_nextents; /* number of extents in file */
- u_long va_anextents; /* number of attr extents in file */
- prid_t va_projid; /* project id */
-} bhv_vattr_t;
-
-/*
- * setattr or getattr attributes
- */
-#define XFS_AT_TYPE 0x00000001
-#define XFS_AT_MODE 0x00000002
-#define XFS_AT_UID 0x00000004
-#define XFS_AT_GID 0x00000008
-#define XFS_AT_FSID 0x00000010
-#define XFS_AT_NODEID 0x00000020
-#define XFS_AT_NLINK 0x00000040
-#define XFS_AT_SIZE 0x00000080
-#define XFS_AT_ATIME 0x00000100
-#define XFS_AT_MTIME 0x00000200
-#define XFS_AT_CTIME 0x00000400
-#define XFS_AT_RDEV 0x00000800
-#define XFS_AT_BLKSIZE 0x00001000
-#define XFS_AT_NBLOCKS 0x00002000
-#define XFS_AT_VCODE 0x00004000
-#define XFS_AT_MAC 0x00008000
-#define XFS_AT_UPDATIME 0x00010000
-#define XFS_AT_UPDMTIME 0x00020000
-#define XFS_AT_UPDCTIME 0x00040000
-#define XFS_AT_ACL 0x00080000
-#define XFS_AT_CAP 0x00100000
-#define XFS_AT_INF 0x00200000
-#define XFS_AT_XFLAGS 0x00400000
-#define XFS_AT_EXTSIZE 0x00800000
-#define XFS_AT_NEXTENTS 0x01000000
-#define XFS_AT_ANEXTENTS 0x02000000
-#define XFS_AT_PROJID 0x04000000
-#define XFS_AT_SIZE_NOPERM 0x08000000
-#define XFS_AT_GENCOUNT 0x10000000
-
-#define XFS_AT_ALL (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
- XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
- XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
- XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\
- XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\
- XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT)
-
-#define XFS_AT_STAT (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
- XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
- XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
- XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_PROJID)
-
-#define XFS_AT_TIMES (XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME)
-
-#define XFS_AT_UPDTIMES (XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME)
-
-#define XFS_AT_NOSET (XFS_AT_NLINK|XFS_AT_RDEV|XFS_AT_FSID|XFS_AT_NODEID|\
- XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
- XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
extern void vn_init(void);
-extern int vn_revalidate(bhv_vnode_t *);
/*
* Yeah, these don't take vnode anymore at all, all this should be
@@ -219,15 +139,6 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt)
#define VN_DIRTY(vp) mapping_tagged(vn_to_inode(vp)->i_mapping, \
PAGECACHE_TAG_DIRTY)
-/*
- * Flags to vop_setattr/getattr.
- */
-#define ATTR_UTIME 0x01 /* non-default utime(2) request */
-#define ATTR_DMI 0x08 /* invocation from a DMI function */
-#define ATTR_LAZY 0x80 /* set/get attributes lazily */
-#define ATTR_NONBLOCK 0x100 /* return EAGAIN if operation would block */
-#define ATTR_NOLOCK 0x200 /* Don't grab any conflicting locks */
-#define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */
/*
* Tracking vnode activity.
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
new file mode 100644
index 00000000000..964621fde6e
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (C) 2008 Christoph Hellwig.
+ * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "xfs.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_acl.h"
+#include "xfs_vnodeops.h"
+
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+
+/*
+ * ACL handling. Should eventually be moved into xfs_acl.c
+ */
+
+static int
+xfs_decode_acl(const char *name)
+{
+ if (strcmp(name, "posix_acl_access") == 0)
+ return _ACL_TYPE_ACCESS;
+ else if (strcmp(name, "posix_acl_default") == 0)
+ return _ACL_TYPE_DEFAULT;
+ return -EINVAL;
+}
+
+/*
+ * Get system extended attributes which at the moment only
+ * includes Posix ACLs.
+ */
+static int
+xfs_xattr_system_get(struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ int acl;
+
+ acl = xfs_decode_acl(name);
+ if (acl < 0)
+ return acl;
+
+ return xfs_acl_vget(inode, buffer, size, acl);
+}
+
+static int
+xfs_xattr_system_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ int acl;
+
+ acl = xfs_decode_acl(name);
+ if (acl < 0)
+ return acl;
+ if (flags & XATTR_CREATE)
+ return -EINVAL;
+
+ if (!value)
+ return xfs_acl_vremove(inode, acl);
+
+ return xfs_acl_vset(inode, (void *)value, size, acl);
+}
+
+static struct xattr_handler xfs_xattr_system_handler = {
+ .prefix = XATTR_SYSTEM_PREFIX,
+ .get = xfs_xattr_system_get,
+ .set = xfs_xattr_system_set,
+};
+
+
+/*
+ * Real xattr handling. The only difference between the namespaces is
+ * a flag passed to the low-level attr code.
+ */
+
+static int
+__xfs_xattr_get(struct inode *inode, const char *name,
+ void *value, size_t size, int xflags)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ int error, asize = size;
+
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+
+ /* Convert Linux syscall to XFS internal ATTR flags */
+ if (!size) {
+ xflags |= ATTR_KERNOVAL;
+ value = NULL;
+ }
+
+ error = -xfs_attr_get(ip, name, value, &asize, xflags);
+ if (error)
+ return error;
+ return asize;
+}
+
+static int
+__xfs_xattr_set(struct inode *inode, const char *name, const void *value,
+ size_t size, int flags, int xflags)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+
+ /* Convert Linux syscall to XFS internal ATTR flags */
+ if (flags & XATTR_CREATE)
+ xflags |= ATTR_CREATE;
+ if (flags & XATTR_REPLACE)
+ xflags |= ATTR_REPLACE;
+
+ if (!value)
+ return -xfs_attr_remove(ip, name, xflags);
+ return -xfs_attr_set(ip, name, (void *)value, size, xflags);
+}
+
+static int
+xfs_xattr_user_get(struct inode *inode, const char *name,
+ void *value, size_t size)
+{
+ return __xfs_xattr_get(inode, name, value, size, 0);
+}
+
+static int
+xfs_xattr_user_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return __xfs_xattr_set(inode, name, value, size, flags, 0);
+}
+
+static struct xattr_handler xfs_xattr_user_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .get = xfs_xattr_user_get,
+ .set = xfs_xattr_user_set,
+};
+
+
+static int
+xfs_xattr_trusted_get(struct inode *inode, const char *name,
+ void *value, size_t size)
+{
+ return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT);
+}
+
+static int
+xfs_xattr_trusted_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT);
+}
+
+static struct xattr_handler xfs_xattr_trusted_handler = {
+ .prefix = XATTR_TRUSTED_PREFIX,
+ .get = xfs_xattr_trusted_get,
+ .set = xfs_xattr_trusted_set,
+};
+
+
+static int
+xfs_xattr_secure_get(struct inode *inode, const char *name,
+ void *value, size_t size)
+{
+ return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE);
+}
+
+static int
+xfs_xattr_secure_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE);
+}
+
+static struct xattr_handler xfs_xattr_security_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .get = xfs_xattr_secure_get,
+ .set = xfs_xattr_secure_set,
+};
+
+
+struct xattr_handler *xfs_xattr_handlers[] = {
+ &xfs_xattr_user_handler,
+ &xfs_xattr_trusted_handler,
+ &xfs_xattr_security_handler,
+ &xfs_xattr_system_handler,
+ NULL
+};
+
+static unsigned int xfs_xattr_prefix_len(int flags)
+{
+ if (flags & XFS_ATTR_SECURE)
+ return sizeof("security");
+ else if (flags & XFS_ATTR_ROOT)
+ return sizeof("trusted");
+ else
+ return sizeof("user");
+}
+
+static const char *xfs_xattr_prefix(int flags)
+{
+ if (flags & XFS_ATTR_SECURE)
+ return xfs_xattr_security_handler.prefix;
+ else if (flags & XFS_ATTR_ROOT)
+ return xfs_xattr_trusted_handler.prefix;
+ else
+ return xfs_xattr_user_handler.prefix;
+}
+
+static int
+xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags,
+ char *name, int namelen, int valuelen, char *value)
+{
+ unsigned int prefix_len = xfs_xattr_prefix_len(flags);
+ char *offset;
+ int arraytop;
+
+ ASSERT(context->count >= 0);
+
+ /*
+ * Only show root namespace entries if we are actually allowed to
+ * see them.
+ */
+ if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
+ return 0;
+
+ arraytop = context->count + prefix_len + namelen + 1;
+ if (arraytop > context->firstu) {
+ context->count = -1; /* insufficient space */
+ return 1;
+ }
+ offset = (char *)context->alist + context->count;
+ strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
+ offset += prefix_len;
+ strncpy(offset, name, namelen); /* real name */
+ offset += namelen;
+ *offset = '\0';
+ context->count += prefix_len + namelen + 1;
+ return 0;
+}
+
+static int
+xfs_xattr_put_listent_sizes(struct xfs_attr_list_context *context, int flags,
+ char *name, int namelen, int valuelen, char *value)
+{
+ context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
+ return 0;
+}
+
+static int
+list_one_attr(const char *name, const size_t len, void *data,
+ size_t size, ssize_t *result)
+{
+ char *p = data + *result;
+
+ *result += len;
+ if (!size)
+ return 0;
+ if (*result > size)
+ return -ERANGE;
+
+ strcpy(p, name);
+ return 0;
+}
+
+ssize_t
+xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
+{
+ struct xfs_attr_list_context context;
+ struct attrlist_cursor_kern cursor = { 0 };
+ struct inode *inode = dentry->d_inode;
+ int error;
+
+ /*
+ * First read the regular on-disk attributes.
+ */
+ memset(&context, 0, sizeof(context));
+ context.dp = XFS_I(inode);
+ context.cursor = &cursor;
+ context.resynch = 1;
+ context.alist = data;
+ context.bufsize = size;
+ context.firstu = context.bufsize;
+
+ if (size)
+ context.put_listent = xfs_xattr_put_listent;
+ else
+ context.put_listent = xfs_xattr_put_listent_sizes;
+
+ xfs_attr_list_int(&context);
+ if (context.count < 0)
+ return -ERANGE;
+
+ /*
+ * Then add the two synthetic ACL attributes.
+ */
+ if (xfs_acl_vhasacl_access(inode)) {
+ error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
+ strlen(POSIX_ACL_XATTR_ACCESS) + 1,
+ data, size, &context.count);
+ if (error)
+ return error;
+ }
+
+ if (xfs_acl_vhasacl_default(inode)) {
+ error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
+ strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
+ data, size, &context.count);
+ if (error)
+ return error;
+ }
+
+ return context.count;
+}
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 85df3288efd..fc9f3fb39b7 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -1435,8 +1435,7 @@ xfs_dqlock2(
/* ARGSUSED */
int
xfs_qm_dqpurge(
- xfs_dquot_t *dqp,
- uint flags)
+ xfs_dquot_t *dqp)
{
xfs_dqhash_t *thishash;
xfs_mount_t *mp = dqp->q_mount;
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 5c371a92e3e..f7393bba4e9 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -164,7 +164,7 @@ extern void xfs_qm_dqprint(xfs_dquot_t *);
extern void xfs_qm_dqdestroy(xfs_dquot_t *);
extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
-extern int xfs_qm_dqpurge(xfs_dquot_t *, uint);
+extern int xfs_qm_dqpurge(xfs_dquot_t *);
extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
extern int xfs_qm_dqlock_nowait(xfs_dquot_t *);
extern int xfs_qm_dqflock_nowait(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 36e05ca7841..08d2fc89e6a 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -576,8 +576,8 @@ xfs_qm_qoffend_logitem_committed(
* xfs_trans_delete_ail() drops the AIL lock.
*/
xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs);
- kmem_free(qfs, sizeof(xfs_qoff_logitem_t));
- kmem_free(qfe, sizeof(xfs_qoff_logitem_t));
+ kmem_free(qfs);
+ kmem_free(qfe);
return (xfs_lsn_t)-1;
}
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index d31cce1165c..021934a3d45 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -192,8 +192,8 @@ xfs_qm_destroy(
xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
}
- kmem_free(xqm->qm_usr_dqhtable, hsize * sizeof(xfs_dqhash_t));
- kmem_free(xqm->qm_grp_dqhtable, hsize * sizeof(xfs_dqhash_t));
+ kmem_free(xqm->qm_usr_dqhtable);
+ kmem_free(xqm->qm_grp_dqhtable);
xqm->qm_usr_dqhtable = NULL;
xqm->qm_grp_dqhtable = NULL;
xqm->qm_dqhashmask = 0;
@@ -201,7 +201,7 @@ xfs_qm_destroy(
#ifdef DEBUG
mutex_destroy(&qcheck_lock);
#endif
- kmem_free(xqm, sizeof(xfs_qm_t));
+ kmem_free(xqm);
}
/*
@@ -445,11 +445,11 @@ xfs_qm_unmount_quotas(
}
}
if (uqp) {
- XFS_PURGE_INODE(uqp);
+ IRELE(uqp);
mp->m_quotainfo->qi_uquotaip = NULL;
}
if (gqp) {
- XFS_PURGE_INODE(gqp);
+ IRELE(gqp);
mp->m_quotainfo->qi_gquotaip = NULL;
}
out:
@@ -631,7 +631,7 @@ xfs_qm_dqpurge_int(
* freelist in INACTIVE state.
*/
nextdqp = dqp->MPL_NEXT;
- nmisses += xfs_qm_dqpurge(dqp, flags);
+ nmisses += xfs_qm_dqpurge(dqp);
dqp = nextdqp;
}
xfs_qm_mplist_unlock(mp);
@@ -1134,7 +1134,7 @@ xfs_qm_init_quotainfo(
* and change the superblock accordingly.
*/
if ((error = xfs_qm_init_quotainos(mp))) {
- kmem_free(qinf, sizeof(xfs_quotainfo_t));
+ kmem_free(qinf);
mp->m_quotainfo = NULL;
return error;
}
@@ -1240,15 +1240,15 @@ xfs_qm_destroy_quotainfo(
xfs_qm_list_destroy(&qi->qi_dqlist);
if (qi->qi_uquotaip) {
- XFS_PURGE_INODE(qi->qi_uquotaip);
+ IRELE(qi->qi_uquotaip);
qi->qi_uquotaip = NULL; /* paranoia */
}
if (qi->qi_gquotaip) {
- XFS_PURGE_INODE(qi->qi_gquotaip);
+ IRELE(qi->qi_gquotaip);
qi->qi_gquotaip = NULL;
}
mutex_destroy(&qi->qi_quotaofflock);
- kmem_free(qi, sizeof(xfs_quotainfo_t));
+ kmem_free(qi);
mp->m_quotainfo = NULL;
}
@@ -1394,7 +1394,7 @@ xfs_qm_qino_alloc(
* locked exclusively and joined to the transaction already.
*/
ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
- VN_HOLD(XFS_ITOV((*ip)));
+ IHOLD(*ip);
/*
* Make the changes in the superblock, and log those too.
@@ -1623,7 +1623,7 @@ xfs_qm_dqiterate(
break;
} while (nmaps > 0);
- kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map));
+ kmem_free(map);
return error;
}
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 768a3b27d2b..adfb8723f65 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -362,11 +362,11 @@ xfs_qm_scall_quotaoff(
* if we don't need them anymore.
*/
if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) {
- XFS_PURGE_INODE(XFS_QI_UQIP(mp));
+ IRELE(XFS_QI_UQIP(mp));
XFS_QI_UQIP(mp) = NULL;
}
if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) {
- XFS_PURGE_INODE(XFS_QI_GQIP(mp));
+ IRELE(XFS_QI_GQIP(mp));
XFS_QI_GQIP(mp) = NULL;
}
out_error:
@@ -1449,14 +1449,14 @@ xfs_qm_internalqcheck(
for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
xfs_dqtest_cmp(d);
e = (xfs_dqtest_t *) d->HL_NEXT;
- kmem_free(d, sizeof(xfs_dqtest_t));
+ kmem_free(d);
d = e;
}
h1 = &qmtest_gdqtab[i];
for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
xfs_dqtest_cmp(d);
e = (xfs_dqtest_t *) d->HL_NEXT;
- kmem_free(d, sizeof(xfs_dqtest_t));
+ kmem_free(d);
d = e;
}
}
@@ -1467,8 +1467,8 @@ xfs_qm_internalqcheck(
} else {
cmn_err(CE_DEBUG, "******** quotacheck successful! ********");
}
- kmem_free(qmtest_udqtab, qmtest_hashmask * sizeof(xfs_dqhash_t));
- kmem_free(qmtest_gdqtab, qmtest_hashmask * sizeof(xfs_dqhash_t));
+ kmem_free(qmtest_udqtab);
+ kmem_free(qmtest_gdqtab);
mutex_unlock(&qcheck_lock);
return (qmtest_nfails);
}
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index 5e4a40b1c56..c4fcea600bc 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -158,9 +158,6 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
#define XFS_IS_SUSER_DQUOT(dqp) \
(!((dqp)->q_core.d_id))
-#define XFS_PURGE_INODE(ip) \
- IRELE(ip);
-
#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
(((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
(((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 0b75d302508..a34ef05489b 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -89,7 +89,7 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
if (sleep & KM_SLEEP)
panic("ktrace_alloc: NULL memory on KM_SLEEP request!");
- kmem_free(ktp, sizeof(*ktp));
+ kmem_free(ktp);
return NULL;
}
@@ -126,7 +126,7 @@ ktrace_free(ktrace_t *ktp)
} else {
entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t));
- kmem_free(ktp->kt_entries, entries_size);
+ kmem_free(ktp->kt_entries);
}
kmem_zone_free(ktrace_hdr_zone, ktp);
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index 493a6ecf859..5830c040ea7 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -17,7 +17,7 @@
*/
#include <xfs.h>
-static mutex_t uuid_monitor;
+static DEFINE_MUTEX(uuid_monitor);
static int uuid_table_size;
static uuid_t *uuid_table;
@@ -132,9 +132,3 @@ uuid_table_remove(uuid_t *uuid)
ASSERT(i < uuid_table_size);
mutex_unlock(&uuid_monitor);
}
-
-void __init
-uuid_init(void)
-{
- mutex_init(&uuid_monitor);
-}
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h
index b6f5922199b..cff5b607d44 100644
--- a/fs/xfs/support/uuid.h
+++ b/fs/xfs/support/uuid.h
@@ -22,7 +22,6 @@ typedef struct {
unsigned char __u_bits[16];
} uuid_t;
-extern void uuid_init(void);
extern void uuid_create_nil(uuid_t *uuid);
extern int uuid_is_nil(uuid_t *uuid);
extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index ebee3a4f703..3e4648ad9cf 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -341,8 +341,7 @@ xfs_acl_iaccess(
/* If the file has no ACL return -1. */
rval = sizeof(xfs_acl_t);
- if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval,
- ATTR_ROOT | ATTR_KERNACCESS)) {
+ if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) {
_ACL_FREE(acl);
return -1;
}
@@ -720,7 +719,7 @@ xfs_acl_setmode(
xfs_acl_t *acl,
int *basicperms)
{
- bhv_vattr_t va;
+ struct iattr iattr;
xfs_acl_entry_t *ap;
xfs_acl_entry_t *gap = NULL;
int i, nomask = 1;
@@ -734,25 +733,25 @@ xfs_acl_setmode(
* Copy the u::, g::, o::, and m:: bits from the ACL into the
* mode. The m:: bits take precedence over the g:: bits.
*/
- va.va_mask = XFS_AT_MODE;
- va.va_mode = xfs_vtoi(vp)->i_d.di_mode;
- va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
+ iattr.ia_valid = ATTR_MODE;
+ iattr.ia_mode = xfs_vtoi(vp)->i_d.di_mode;
+ iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
ap = acl->acl_entry;
for (i = 0; i < acl->acl_cnt; ++i) {
switch (ap->ae_tag) {
case ACL_USER_OBJ:
- va.va_mode |= ap->ae_perm << 6;
+ iattr.ia_mode |= ap->ae_perm << 6;
break;
case ACL_GROUP_OBJ:
gap = ap;
break;
case ACL_MASK: /* more than just standard modes */
nomask = 0;
- va.va_mode |= ap->ae_perm << 3;
+ iattr.ia_mode |= ap->ae_perm << 3;
*basicperms = 0;
break;
case ACL_OTHER:
- va.va_mode |= ap->ae_perm;
+ iattr.ia_mode |= ap->ae_perm;
break;
default: /* more than just standard modes */
*basicperms = 0;
@@ -763,9 +762,9 @@ xfs_acl_setmode(
/* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */
if (gap && nomask)
- va.va_mode |= gap->ae_perm << 3;
+ iattr.ia_mode |= gap->ae_perm << 3;
- return xfs_setattr(xfs_vtoi(vp), &va, 0, sys_cred);
+ return xfs_setattr(xfs_vtoi(vp), &iattr, 0, sys_cred);
}
/*
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 332a772461c..323ee94cf83 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -46,6 +46,8 @@ typedef struct xfs_acl {
#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1)
#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
+#define _ACL_TYPE_ACCESS 1
+#define _ACL_TYPE_DEFAULT 2
#ifdef CONFIG_XFS_POSIX_ACL
@@ -66,8 +68,6 @@ extern int xfs_acl_vset(bhv_vnode_t *, void *, size_t, int);
extern int xfs_acl_vget(bhv_vnode_t *, void *, size_t, int);
extern int xfs_acl_vremove(bhv_vnode_t *, int);
-#define _ACL_TYPE_ACCESS 1
-#define _ACL_TYPE_DEFAULT 2
#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
#define _ACL_INHERIT(c,m,d) (xfs_acl_inherit(c,m,d))
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index df151a85918..78de80e3caa 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -16,8 +16,6 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include <linux/capability.h>
-
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_types.h"
@@ -57,11 +55,6 @@
* Provide the external interfaces to manage attribute lists.
*/
-#define ATTR_SYSCOUNT 2
-static struct attrnames posix_acl_access;
-static struct attrnames posix_acl_default;
-static struct attrnames *attr_system_names[ATTR_SYSCOUNT];
-
/*========================================================================
* Function prototypes for the kernel.
*========================================================================*/
@@ -116,6 +109,17 @@ xfs_attr_name_to_xname(
return 0;
}
+STATIC int
+xfs_inode_hasattr(
+ struct xfs_inode *ip)
+{
+ if (!XFS_IFORK_Q(ip) ||
+ (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
+ ip->i_d.di_anextents == 0))
+ return 0;
+ return 1;
+}
+
/*========================================================================
* Overall external interface routines.
*========================================================================*/
@@ -127,10 +131,8 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
xfs_da_args_t args;
int error;
- if ((XFS_IFORK_Q(ip) == 0) ||
- (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
- ip->i_d.di_anextents == 0))
- return(ENOATTR);
+ if (!xfs_inode_hasattr(ip))
+ return ENOATTR;
/*
* Fill in the arg structure for this request.
@@ -148,11 +150,7 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
/*
* Decide on what work routines to call based on the inode size.
*/
- if (XFS_IFORK_Q(ip) == 0 ||
- (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
- ip->i_d.di_anextents == 0)) {
- error = XFS_ERROR(ENOATTR);
- } else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
+ if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
error = xfs_attr_shortform_getvalue(&args);
} else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) {
error = xfs_attr_leaf_get(&args);
@@ -241,8 +239,7 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
args.firstblock = &firstblock;
args.flist = &flist;
args.whichfork = XFS_ATTR_FORK;
- args.addname = 1;
- args.oknoent = 1;
+ args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
/*
* Determine space new attribute will use, and if it would be
@@ -529,9 +526,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
/*
* Decide on what work routines to call based on the inode size.
*/
- if (XFS_IFORK_Q(dp) == 0 ||
- (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
- dp->i_d.di_anextents == 0)) {
+ if (!xfs_inode_hasattr(dp)) {
error = XFS_ERROR(ENOATTR);
goto out;
}
@@ -601,29 +596,33 @@ xfs_attr_remove(
return error;
xfs_ilock(dp, XFS_ILOCK_SHARED);
- if (XFS_IFORK_Q(dp) == 0 ||
- (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
- dp->i_d.di_anextents == 0)) {
+ if (!xfs_inode_hasattr(dp)) {
xfs_iunlock(dp, XFS_ILOCK_SHARED);
- return(XFS_ERROR(ENOATTR));
+ return XFS_ERROR(ENOATTR);
}
xfs_iunlock(dp, XFS_ILOCK_SHARED);
return xfs_attr_remove_int(dp, &xname, flags);
}
-STATIC int
+int
xfs_attr_list_int(xfs_attr_list_context_t *context)
{
int error;
xfs_inode_t *dp = context->dp;
+ XFS_STATS_INC(xs_attr_list);
+
+ if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+ return EIO;
+
+ xfs_ilock(dp, XFS_ILOCK_SHARED);
+ xfs_attr_trace_l_c("syscall start", context);
+
/*
* Decide on what work routines to call based on the inode size.
*/
- if (XFS_IFORK_Q(dp) == 0 ||
- (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
- dp->i_d.di_anextents == 0)) {
+ if (!xfs_inode_hasattr(dp)) {
error = 0;
} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
error = xfs_attr_shortform_list(context);
@@ -632,6 +631,10 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
} else {
error = xfs_attr_node_list(context);
}
+
+ xfs_iunlock(dp, XFS_ILOCK_SHARED);
+ xfs_attr_trace_l_c("syscall end", context);
+
return error;
}
@@ -648,74 +651,50 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
*/
/*ARGSUSED*/
STATIC int
-xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp,
+xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags,
char *name, int namelen,
int valuelen, char *value)
{
+ struct attrlist *alist = (struct attrlist *)context->alist;
attrlist_ent_t *aep;
int arraytop;
ASSERT(!(context->flags & ATTR_KERNOVAL));
ASSERT(context->count >= 0);
ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
- ASSERT(context->firstu >= sizeof(*context->alist));
+ ASSERT(context->firstu >= sizeof(*alist));
ASSERT(context->firstu <= context->bufsize);
- arraytop = sizeof(*context->alist) +
- context->count * sizeof(context->alist->al_offset[0]);
+ /*
+ * Only list entries in the right namespace.
+ */
+ if (((context->flags & ATTR_SECURE) == 0) !=
+ ((flags & XFS_ATTR_SECURE) == 0))
+ return 0;
+ if (((context->flags & ATTR_ROOT) == 0) !=
+ ((flags & XFS_ATTR_ROOT) == 0))
+ return 0;
+
+ arraytop = sizeof(*alist) +
+ context->count * sizeof(alist->al_offset[0]);
context->firstu -= ATTR_ENTSIZE(namelen);
if (context->firstu < arraytop) {
xfs_attr_trace_l_c("buffer full", context);
- context->alist->al_more = 1;
+ alist->al_more = 1;
context->seen_enough = 1;
return 1;
}
- aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]);
+ aep = (attrlist_ent_t *)&context->alist[context->firstu];
aep->a_valuelen = valuelen;
memcpy(aep->a_name, name, namelen);
- aep->a_name[ namelen ] = 0;
- context->alist->al_offset[ context->count++ ] = context->firstu;
- context->alist->al_count = context->count;
+ aep->a_name[namelen] = 0;
+ alist->al_offset[context->count++] = context->firstu;
+ alist->al_count = context->count;
xfs_attr_trace_l_c("add", context);
return 0;
}
-STATIC int
-xfs_attr_kern_list(xfs_attr_list_context_t *context, attrnames_t *namesp,
- char *name, int namelen,
- int valuelen, char *value)
-{
- char *offset;
- int arraytop;
-
- ASSERT(context->count >= 0);
-
- arraytop = context->count + namesp->attr_namelen + namelen + 1;
- if (arraytop > context->firstu) {
- context->count = -1; /* insufficient space */
- return 1;
- }
- offset = (char *)context->alist + context->count;
- strncpy(offset, namesp->attr_name, namesp->attr_namelen);
- offset += namesp->attr_namelen;
- strncpy(offset, name, namelen); /* real name */
- offset += namelen;
- *offset = '\0';
- context->count += namesp->attr_namelen + namelen + 1;
- return 0;
-}
-
-/*ARGSUSED*/
-STATIC int
-xfs_attr_kern_list_sizes(xfs_attr_list_context_t *context, attrnames_t *namesp,
- char *name, int namelen,
- int valuelen, char *value)
-{
- context->count += namesp->attr_namelen + namelen + 1;
- return 0;
-}
-
/*
* Generate a list of extended attribute names and optionally
* also value lengths. Positive return value follows the XFS
@@ -732,10 +711,9 @@ xfs_attr_list(
attrlist_cursor_kern_t *cursor)
{
xfs_attr_list_context_t context;
+ struct attrlist *alist;
int error;
- XFS_STATS_INC(xs_attr_list);
-
/*
* Validate the cursor.
*/
@@ -756,52 +734,23 @@ xfs_attr_list(
/*
* Initialize the output buffer.
*/
+ memset(&context, 0, sizeof(context));
context.dp = dp;
context.cursor = cursor;
- context.count = 0;
- context.dupcnt = 0;
context.resynch = 1;
context.flags = flags;
- context.seen_enough = 0;
- context.alist = (attrlist_t *)buffer;
- context.put_value = 0;
-
- if (flags & ATTR_KERNAMELS) {
- context.bufsize = bufsize;
- context.firstu = context.bufsize;
- if (flags & ATTR_KERNOVAL)
- context.put_listent = xfs_attr_kern_list_sizes;
- else
- context.put_listent = xfs_attr_kern_list;
- } else {
- context.bufsize = (bufsize & ~(sizeof(int)-1)); /* align */
- context.firstu = context.bufsize;
- context.alist->al_count = 0;
- context.alist->al_more = 0;
- context.alist->al_offset[0] = context.bufsize;
- context.put_listent = xfs_attr_put_listent;
- }
+ context.alist = buffer;
+ context.bufsize = (bufsize & ~(sizeof(int)-1)); /* align */
+ context.firstu = context.bufsize;
+ context.put_listent = xfs_attr_put_listent;
- if (XFS_FORCED_SHUTDOWN(dp->i_mount))
- return EIO;
-
- xfs_ilock(dp, XFS_ILOCK_SHARED);
- xfs_attr_trace_l_c("syscall start", &context);
+ alist = (struct attrlist *)context.alist;
+ alist->al_count = 0;
+ alist->al_more = 0;
+ alist->al_offset[0] = context.bufsize;
error = xfs_attr_list_int(&context);
-
- xfs_iunlock(dp, XFS_ILOCK_SHARED);
- xfs_attr_trace_l_c("syscall end", &context);
-
- if (context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS)) {
- /* must return negated buffer size or the error */
- if (context.count < 0)
- error = XFS_ERROR(ERANGE);
- else
- error = -context.count;
- } else
- ASSERT(error >= 0);
-
+ ASSERT(error >= 0);
return error;
}
@@ -816,12 +765,10 @@ xfs_attr_inactive(xfs_inode_t *dp)
ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
xfs_ilock(dp, XFS_ILOCK_SHARED);
- if ((XFS_IFORK_Q(dp) == 0) ||
- (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
- (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
- dp->i_d.di_anextents == 0)) {
+ if (!xfs_inode_hasattr(dp) ||
+ dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
xfs_iunlock(dp, XFS_ILOCK_SHARED);
- return(0);
+ return 0;
}
xfs_iunlock(dp, XFS_ILOCK_SHARED);
@@ -854,10 +801,8 @@ xfs_attr_inactive(xfs_inode_t *dp)
/*
* Decide on what work routines to call based on the inode size.
*/
- if ((XFS_IFORK_Q(dp) == 0) ||
- (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
- (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
- dp->i_d.di_anextents == 0)) {
+ if (!xfs_inode_hasattr(dp) ||
+ dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
error = 0;
goto out;
}
@@ -974,7 +919,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
xfs_da_brelse(args->trans, bp);
return(retval);
}
- args->rename = 1; /* an atomic rename */
+ args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */
args->blkno2 = args->blkno; /* set 2nd entry info*/
args->index2 = args->index;
args->rmtblkno2 = args->rmtblkno;
@@ -1054,7 +999,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* so that one disappears and one appears atomically. Then we
* must remove the "old" attribute/value pair.
*/
- if (args->rename) {
+ if (args->op_flags & XFS_DA_OP_RENAME) {
/*
* In a separate transaction, set the incomplete flag on the
* "old" attr and clear the incomplete flag on the "new" attr.
@@ -1307,7 +1252,7 @@ restart:
} else if (retval == EEXIST) {
if (args->flags & ATTR_CREATE)
goto out;
- args->rename = 1; /* atomic rename op */
+ args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */
args->blkno2 = args->blkno; /* set 2nd entry info*/
args->index2 = args->index;
args->rmtblkno2 = args->rmtblkno;
@@ -1425,7 +1370,7 @@ restart:
* so that one disappears and one appears atomically. Then we
* must remove the "old" attribute/value pair.
*/
- if (args->rename) {
+ if (args->op_flags & XFS_DA_OP_RENAME) {
/*
* In a separate transaction, set the incomplete flag on the
* "old" attr and clear the incomplete flag on the "new" attr.
@@ -2300,23 +2245,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
void
xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context)
{
- xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where,
- (__psunsigned_t)context->dp,
- (__psunsigned_t)context->cursor->hashval,
- (__psunsigned_t)context->cursor->blkno,
- (__psunsigned_t)context->cursor->offset,
- (__psunsigned_t)context->alist,
- (__psunsigned_t)context->bufsize,
- (__psunsigned_t)context->count,
- (__psunsigned_t)context->firstu,
- (__psunsigned_t)
- ((context->count > 0) &&
- !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
- ? (ATTR_ENTRY(context->alist,
- context->count-1)->a_valuelen)
- : 0,
- (__psunsigned_t)context->dupcnt,
- (__psunsigned_t)context->flags,
+ xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where, context,
(__psunsigned_t)NULL,
(__psunsigned_t)NULL,
(__psunsigned_t)NULL);
@@ -2329,23 +2258,7 @@ void
xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
struct xfs_da_intnode *node)
{
- xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where,
- (__psunsigned_t)context->dp,
- (__psunsigned_t)context->cursor->hashval,
- (__psunsigned_t)context->cursor->blkno,
- (__psunsigned_t)context->cursor->offset,
- (__psunsigned_t)context->alist,
- (__psunsigned_t)context->bufsize,
- (__psunsigned_t)context->count,
- (__psunsigned_t)context->firstu,
- (__psunsigned_t)
- ((context->count > 0) &&
- !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
- ? (ATTR_ENTRY(context->alist,
- context->count-1)->a_valuelen)
- : 0,
- (__psunsigned_t)context->dupcnt,
- (__psunsigned_t)context->flags,
+ xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where, context,
(__psunsigned_t)be16_to_cpu(node->hdr.count),
(__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
(__psunsigned_t)be32_to_cpu(node->btree[
@@ -2359,23 +2272,7 @@ void
xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
struct xfs_da_node_entry *btree)
{
- xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where,
- (__psunsigned_t)context->dp,
- (__psunsigned_t)context->cursor->hashval,
- (__psunsigned_t)context->cursor->blkno,
- (__psunsigned_t)context->cursor->offset,
- (__psunsigned_t)context->alist,
- (__psunsigned_t)context->bufsize,
- (__psunsigned_t)context->count,
- (__psunsigned_t)context->firstu,
- (__psunsigned_t)
- ((context->count > 0) &&
- !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
- ? (ATTR_ENTRY(context->alist,
- context->count-1)->a_valuelen)
- : 0,
- (__psunsigned_t)context->dupcnt,
- (__psunsigned_t)context->flags,
+ xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where, context,
(__psunsigned_t)be32_to_cpu(btree->hashval),
(__psunsigned_t)be32_to_cpu(btree->before),
(__psunsigned_t)NULL);
@@ -2388,23 +2285,7 @@ void
xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
struct xfs_attr_leafblock *leaf)
{
- xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where,
- (__psunsigned_t)context->dp,
- (__psunsigned_t)context->cursor->hashval,
- (__psunsigned_t)context->cursor->blkno,
- (__psunsigned_t)context->cursor->offset,
- (__psunsigned_t)context->alist,
- (__psunsigned_t)context->bufsize,
- (__psunsigned_t)context->count,
- (__psunsigned_t)context->firstu,
- (__psunsigned_t)
- ((context->count > 0) &&
- !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
- ? (ATTR_ENTRY(context->alist,
- context->count-1)->a_valuelen)
- : 0,
- (__psunsigned_t)context->dupcnt,
- (__psunsigned_t)context->flags,
+ xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where, context,
(__psunsigned_t)be16_to_cpu(leaf->hdr.count),
(__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval),
(__psunsigned_t)be32_to_cpu(leaf->entries[
@@ -2417,329 +2298,24 @@ xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
*/
void
xfs_attr_trace_enter(int type, char *where,
- __psunsigned_t a2, __psunsigned_t a3,
- __psunsigned_t a4, __psunsigned_t a5,
- __psunsigned_t a6, __psunsigned_t a7,
- __psunsigned_t a8, __psunsigned_t a9,
- __psunsigned_t a10, __psunsigned_t a11,
- __psunsigned_t a12, __psunsigned_t a13,
- __psunsigned_t a14, __psunsigned_t a15)
+ struct xfs_attr_list_context *context,
+ __psunsigned_t a13, __psunsigned_t a14,
+ __psunsigned_t a15)
{
ASSERT(xfs_attr_trace_buf);
ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type),
- (void *)where,
- (void *)a2, (void *)a3, (void *)a4,
- (void *)a5, (void *)a6, (void *)a7,
- (void *)a8, (void *)a9, (void *)a10,
- (void *)a11, (void *)a12, (void *)a13,
- (void *)a14, (void *)a15);
+ (void *)((__psunsigned_t)where),
+ (void *)((__psunsigned_t)context->dp),
+ (void *)((__psunsigned_t)context->cursor->hashval),
+ (void *)((__psunsigned_t)context->cursor->blkno),
+ (void *)((__psunsigned_t)context->cursor->offset),
+ (void *)((__psunsigned_t)context->alist),
+ (void *)((__psunsigned_t)context->bufsize),
+ (void *)((__psunsigned_t)context->count),
+ (void *)((__psunsigned_t)context->firstu),
+ NULL,
+ (void *)((__psunsigned_t)context->dupcnt),
+ (void *)((__psunsigned_t)context->flags),
+ (void *)a13, (void *)a14, (void *)a15);
}
#endif /* XFS_ATTR_TRACE */
-
-
-/*========================================================================
- * System (pseudo) namespace attribute interface routines.
- *========================================================================*/
-
-STATIC int
-posix_acl_access_set(
- bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
- return xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS);
-}
-
-STATIC int
-posix_acl_access_remove(
- bhv_vnode_t *vp, char *name, int xflags)
-{
- return xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
-}
-
-STATIC int
-posix_acl_access_get(
- bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
- return xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS);
-}
-
-STATIC int
-posix_acl_access_exists(
- bhv_vnode_t *vp)
-{
- return xfs_acl_vhasacl_access(vp);
-}
-
-STATIC int
-posix_acl_default_set(
- bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
- return xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT);
-}
-
-STATIC int
-posix_acl_default_get(
- bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
- return xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT);
-}
-
-STATIC int
-posix_acl_default_remove(
- bhv_vnode_t *vp, char *name, int xflags)
-{
- return xfs_acl_vremove(vp, _ACL_TYPE_DEFAULT);
-}
-
-STATIC int
-posix_acl_default_exists(
- bhv_vnode_t *vp)
-{
- return xfs_acl_vhasacl_default(vp);
-}
-
-static struct attrnames posix_acl_access = {
- .attr_name = "posix_acl_access",
- .attr_namelen = sizeof("posix_acl_access") - 1,
- .attr_get = posix_acl_access_get,
- .attr_set = posix_acl_access_set,
- .attr_remove = posix_acl_access_remove,
- .attr_exists = posix_acl_access_exists,
-};
-
-static struct attrnames posix_acl_default = {
- .attr_name = "posix_acl_default",
- .attr_namelen = sizeof("posix_acl_default") - 1,
- .attr_get = posix_acl_default_get,
- .attr_set = posix_acl_default_set,
- .attr_remove = posix_acl_default_remove,
- .attr_exists = posix_acl_default_exists,
-};
-
-static struct attrnames *attr_system_names[] =
- { &posix_acl_access, &posix_acl_default };
-
-
-/*========================================================================
- * Namespace-prefix-style attribute name interface routines.
- *========================================================================*/
-
-STATIC int
-attr_generic_set(
- bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
- return -xfs_attr_set(xfs_vtoi(vp), name, data, size, xflags);
-}
-
-STATIC int
-attr_generic_get(
- bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
- int error, asize = size;
-
- error = xfs_attr_get(xfs_vtoi(vp), name, data, &asize, xflags);
- if (!error)
- return asize;
- return -error;
-}
-
-STATIC int
-attr_generic_remove(
- bhv_vnode_t *vp, char *name, int xflags)
-{
- return -xfs_attr_remove(xfs_vtoi(vp), name, xflags);
-}
-
-STATIC int
-attr_generic_listadd(
- attrnames_t *prefix,
- attrnames_t *namesp,
- void *data,
- size_t size,
- ssize_t *result)
-{
- char *p = data + *result;
-
- *result += prefix->attr_namelen;
- *result += namesp->attr_namelen + 1;
- if (!size)
- return 0;
- if (*result > size)
- return -ERANGE;
- strcpy(p, prefix->attr_name);
- p += prefix->attr_namelen;
- strcpy(p, namesp->attr_name);
- p += namesp->attr_namelen + 1;
- return 0;
-}
-
-STATIC int
-attr_system_list(
- bhv_vnode_t *vp,
- void *data,
- size_t size,
- ssize_t *result)
-{
- attrnames_t *namesp;
- int i, error = 0;
-
- for (i = 0; i < ATTR_SYSCOUNT; i++) {
- namesp = attr_system_names[i];
- if (!namesp->attr_exists || !namesp->attr_exists(vp))
- continue;
- error = attr_generic_listadd(&attr_system, namesp,
- data, size, result);
- if (error)
- break;
- }
- return error;
-}
-
-int
-attr_generic_list(
- bhv_vnode_t *vp, void *data, size_t size, int xflags, ssize_t *result)
-{
- attrlist_cursor_kern_t cursor = { 0 };
- int error;
-
- error = xfs_attr_list(xfs_vtoi(vp), data, size, xflags, &cursor);
- if (error > 0)
- return -error;
- *result = -error;
- return attr_system_list(vp, data, size, result);
-}
-
-attrnames_t *
-attr_lookup_namespace(
- char *name,
- struct attrnames **names,
- int nnames)
-{
- int i;
-
- for (i = 0; i < nnames; i++)
- if (!strncmp(name, names[i]->attr_name, names[i]->attr_namelen))
- return names[i];
- return NULL;
-}
-
-/*
- * Some checks to prevent people abusing EAs to get over quota:
- * - Don't allow modifying user EAs on devices/symlinks;
- * - Don't allow modifying user EAs if sticky bit set;
- */
-STATIC int
-attr_user_capable(
- bhv_vnode_t *vp,
- cred_t *cred)
-{
- struct inode *inode = vn_to_inode(vp);
-
- if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- return -EPERM;
- if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
- !capable(CAP_SYS_ADMIN))
- return -EPERM;
- if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
- (current_fsuid(cred) != inode->i_uid) && !capable(CAP_FOWNER))
- return -EPERM;
- return 0;
-}
-
-STATIC int
-attr_trusted_capable(
- bhv_vnode_t *vp,
- cred_t *cred)
-{
- struct inode *inode = vn_to_inode(vp);
-
- if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- return -EPERM;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- return 0;
-}
-
-STATIC int
-attr_system_set(
- bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
- attrnames_t *namesp;
- int error;
-
- if (xflags & ATTR_CREATE)
- return -EINVAL;
-
- namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
- if (!namesp)
- return -EOPNOTSUPP;
- error = namesp->attr_set(vp, name, data, size, xflags);
- if (!error)
- error = vn_revalidate(vp);
- return error;
-}
-
-STATIC int
-attr_system_get(
- bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
- attrnames_t *namesp;
-
- namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
- if (!namesp)
- return -EOPNOTSUPP;
- return namesp->attr_get(vp, name, data, size, xflags);
-}
-
-STATIC int
-attr_system_remove(
- bhv_vnode_t *vp, char *name, int xflags)
-{
- attrnames_t *namesp;
-
- namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
- if (!namesp)
- return -EOPNOTSUPP;
- return namesp->attr_remove(vp, name, xflags);
-}
-
-struct attrnames attr_system = {
- .attr_name = "system.",
- .attr_namelen = sizeof("system.") - 1,
- .attr_flag = ATTR_SYSTEM,
- .attr_get = attr_system_get,
- .attr_set = attr_system_set,
- .attr_remove = attr_system_remove,
- .attr_capable = (attrcapable_t)fs_noerr,
-};
-
-struct attrnames attr_trusted = {
- .attr_name = "trusted.",
- .attr_namelen = sizeof("trusted.") - 1,
- .attr_flag = ATTR_ROOT,
- .attr_get = attr_generic_get,
- .attr_set = attr_generic_set,
- .attr_remove = attr_generic_remove,
- .attr_capable = attr_trusted_capable,
-};
-
-struct attrnames attr_secure = {
- .attr_name = "security.",
- .attr_namelen = sizeof("security.") - 1,
- .attr_flag = ATTR_SECURE,
- .attr_get = attr_generic_get,
- .attr_set = attr_generic_set,
- .attr_remove = attr_generic_remove,
- .attr_capable = (attrcapable_t)fs_noerr,
-};
-
-struct attrnames attr_user = {
- .attr_name = "user.",
- .attr_namelen = sizeof("user.") - 1,
- .attr_get = attr_generic_get,
- .attr_set = attr_generic_set,
- .attr_remove = attr_generic_remove,
- .attr_capable = attr_user_capable,
-};
-
-struct attrnames *attr_namespaces[] =
- { &attr_system, &attr_trusted, &attr_secure, &attr_user };
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 6cfc9384fe3..8b2d31c19e4 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -18,9 +18,11 @@
#ifndef __XFS_ATTR_H__
#define __XFS_ATTR_H__
+struct xfs_inode;
+struct xfs_da_args;
+struct xfs_attr_list_context;
+
/*
- * xfs_attr.h
- *
* Large attribute lists are structured around Btrees where all the data
* elements are in the leaf nodes. Attribute names are hashed into an int,
* then that int is used as the index into the Btree. Since the hashval
@@ -35,35 +37,6 @@
* External interfaces
*========================================================================*/
-struct cred;
-struct xfs_attr_list_context;
-
-typedef int (*attrset_t)(bhv_vnode_t *, char *, void *, size_t, int);
-typedef int (*attrget_t)(bhv_vnode_t *, char *, void *, size_t, int);
-typedef int (*attrremove_t)(bhv_vnode_t *, char *, int);
-typedef int (*attrexists_t)(bhv_vnode_t *);
-typedef int (*attrcapable_t)(bhv_vnode_t *, struct cred *);
-
-typedef struct attrnames {
- char * attr_name;
- unsigned int attr_namelen;
- unsigned int attr_flag;
- attrget_t attr_get;
- attrset_t attr_set;
- attrremove_t attr_remove;
- attrexists_t attr_exists;
- attrcapable_t attr_capable;
-} attrnames_t;
-
-#define ATTR_NAMECOUNT 4
-extern struct attrnames attr_user;
-extern struct attrnames attr_secure;
-extern struct attrnames attr_system;
-extern struct attrnames attr_trusted;
-extern struct attrnames *attr_namespaces[ATTR_NAMECOUNT];
-
-extern attrnames_t *attr_lookup_namespace(char *, attrnames_t **, int);
-extern int attr_generic_list(bhv_vnode_t *, void *, size_t, int, ssize_t *);
#define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */
#define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */
@@ -71,16 +44,9 @@ extern int attr_generic_list(bhv_vnode_t *, void *, size_t, int, ssize_t *);
#define ATTR_SECURE 0x0008 /* use attrs in security namespace */
#define ATTR_CREATE 0x0010 /* pure create: fail if attr already exists */
#define ATTR_REPLACE 0x0020 /* pure set: fail if attr does not exist */
-#define ATTR_SYSTEM 0x0100 /* use attrs in system (pseudo) namespace */
-#define ATTR_KERNACCESS 0x0400 /* [kernel] iaccess, inode held io-locked */
#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */
#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */
-#define ATTR_KERNAMELS 0x4000 /* [kernel] list attr names (simple list) */
-
-#define ATTR_KERNORMALS 0x0800 /* [kernel] normal attr list: user+secure */
-#define ATTR_KERNROOTLS 0x8000 /* [kernel] include root in the attr list */
-#define ATTR_KERNFULLS (ATTR_KERNORMALS|ATTR_KERNROOTLS)
/*
* The maximum size (into the kernel or returned from the kernel) of an
@@ -119,22 +85,6 @@ typedef struct attrlist_ent { /* data from attr_list() */
&((char *)buffer)[ ((attrlist_t *)(buffer))->al_offset[index] ])
/*
- * Multi-attribute operation vector.
- */
-typedef struct attr_multiop {
- int am_opcode; /* operation to perform (ATTR_OP_GET, etc.) */
- int am_error; /* [out arg] result of this sub-op (an errno) */
- char *am_attrname; /* attribute name to work with */
- char *am_attrvalue; /* [in/out arg] attribute value (raw bytes) */
- int am_length; /* [in/out arg] length of value */
- int am_flags; /* bitwise OR of attr API flags defined above */
-} attr_multiop_t;
-
-#define ATTR_OP_GET 1 /* return the indicated attr's value */
-#define ATTR_OP_SET 2 /* set/create the indicated attr/value pair */
-#define ATTR_OP_REMOVE 3 /* remove the indicated attr */
-
-/*
* Kernel-internal version of the attrlist cursor.
*/
typedef struct attrlist_cursor_kern {
@@ -148,20 +98,40 @@ typedef struct attrlist_cursor_kern {
/*========================================================================
- * Function prototypes for the kernel.
+ * Structure used to pass context around among the routines.
*========================================================================*/
-struct xfs_inode;
-struct attrlist_cursor_kern;
-struct xfs_da_args;
+
+typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int,
+ char *, int, int, char *);
+
+typedef struct xfs_attr_list_context {
+ struct xfs_inode *dp; /* inode */
+ struct attrlist_cursor_kern *cursor; /* position in list */
+ char *alist; /* output buffer */
+ int seen_enough; /* T/F: seen enough of list? */
+ ssize_t count; /* num used entries */
+ int dupcnt; /* count dup hashvals seen */
+ int bufsize; /* total buffer size */
+ int firstu; /* first used byte in buffer */
+ int flags; /* from VOP call */
+ int resynch; /* T/F: resynch with cursor */
+ int put_value; /* T/F: need value for listent */
+ put_listent_func_t put_listent; /* list output fmt function */
+ int index; /* index into output buffer */
+} xfs_attr_list_context_t;
+
+
+/*========================================================================
+ * Function prototypes for the kernel.
+ *========================================================================*/
/*
* Overall external interface routines.
*/
int xfs_attr_inactive(struct xfs_inode *dp);
-
-int xfs_attr_shortform_getvalue(struct xfs_da_args *);
int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
int xfs_attr_rmtval_get(struct xfs_da_args *args);
+int xfs_attr_list_int(struct xfs_attr_list_context *);
#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 303d41e4217..23ef5d7c87e 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -94,13 +94,6 @@ STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
* Namespace helper routines
*========================================================================*/
-STATIC_INLINE attrnames_t *
-xfs_attr_flags_namesp(int flags)
-{
- return ((flags & XFS_ATTR_SECURE) ? &attr_secure:
- ((flags & XFS_ATTR_ROOT) ? &attr_trusted : &attr_user));
-}
-
/*
* If namespace bits don't match return 0.
* If all match then return 1.
@@ -111,25 +104,6 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
}
-/*
- * If namespace bits don't match and we don't have an override for it
- * then return 0.
- * If all match or are overridable then return 1.
- */
-STATIC_INLINE int
-xfs_attr_namesp_match_overrides(int arg_flags, int ondisk_flags)
-{
- if (((arg_flags & ATTR_SECURE) == 0) !=
- ((ondisk_flags & XFS_ATTR_SECURE) == 0) &&
- !(arg_flags & ATTR_KERNORMALS))
- return 0;
- if (((arg_flags & ATTR_ROOT) == 0) !=
- ((ondisk_flags & XFS_ATTR_ROOT) == 0) &&
- !(arg_flags & ATTR_KERNROOTLS))
- return 0;
- return 1;
-}
-
/*========================================================================
* External routines when attribute fork size < XFS_LITINO(mp).
@@ -369,9 +343,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
* Fix up the start offset of the attribute fork
*/
totsize -= size;
- if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname &&
- (mp->m_flags & XFS_MOUNT_ATTR2) &&
- (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) {
+ if (totsize == sizeof(xfs_attr_sf_hdr_t) &&
+ !(args->op_flags & XFS_DA_OP_ADDNAME) &&
+ (mp->m_flags & XFS_MOUNT_ATTR2) &&
+ (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) {
/*
* Last attribute now removed, revert to original
* inode format making all literal area available
@@ -389,9 +364,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
ASSERT(dp->i_d.di_forkoff);
- ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname ||
- !(mp->m_flags & XFS_MOUNT_ATTR2) ||
- dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
+ ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) ||
+ (args->op_flags & XFS_DA_OP_ADDNAME) ||
+ !(mp->m_flags & XFS_MOUNT_ATTR2) ||
+ dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
dp->i_afp->if_ext_max =
XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
dp->i_df.if_ext_max =
@@ -531,7 +507,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
nargs.total = args->total;
nargs.whichfork = XFS_ATTR_FORK;
nargs.trans = args->trans;
- nargs.oknoent = 1;
+ nargs.op_flags = XFS_DA_OP_OKNOENT;
sfe = &sf->list[0];
for (i = 0; i < sf->hdr.count; i++) {
@@ -555,7 +531,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
out:
if(bp)
xfs_da_buf_done(bp);
- kmem_free(tmpbuffer, size);
+ kmem_free(tmpbuffer);
return(error);
}
@@ -624,15 +600,8 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
(XFS_ISRESET_CURSOR(cursor) &&
(dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
- attrnames_t *namesp;
-
- if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) {
- sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
- continue;
- }
- namesp = xfs_attr_flags_namesp(sfe->flags);
error = context->put_listent(context,
- namesp,
+ sfe->flags,
(char *)sfe->nameval,
(int)sfe->namelen,
(int)sfe->valuelen,
@@ -676,13 +645,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
XFS_ERRLEVEL_LOW,
context->dp->i_mount, sfe);
xfs_attr_trace_l_c("sf corrupted", context);
- kmem_free(sbuf, sbsize);
+ kmem_free(sbuf);
return XFS_ERROR(EFSCORRUPTED);
}
- if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) {
- sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
- continue;
- }
+
sbp->entno = i;
sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen);
sbp->name = (char *)sfe->nameval;
@@ -717,7 +683,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
}
}
if (i == nsbuf) {
- kmem_free(sbuf, sbsize);
+ kmem_free(sbuf);
xfs_attr_trace_l_c("blk end", context);
return(0);
}
@@ -726,16 +692,12 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
* Loop putting entries into the user buffer.
*/
for ( ; i < nsbuf; i++, sbp++) {
- attrnames_t *namesp;
-
- namesp = xfs_attr_flags_namesp(sbp->flags);
-
if (cursor->hashval != sbp->hash) {
cursor->hashval = sbp->hash;
cursor->offset = 0;
}
error = context->put_listent(context,
- namesp,
+ sbp->flags,
sbp->name,
sbp->namelen,
sbp->valuelen,
@@ -747,7 +709,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
cursor->offset++;
}
- kmem_free(sbuf, sbsize);
+ kmem_free(sbuf);
xfs_attr_trace_l_c("sf E-O-F", context);
return(0);
}
@@ -853,7 +815,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
nargs.total = args->total;
nargs.whichfork = XFS_ATTR_FORK;
nargs.trans = args->trans;
- nargs.oknoent = 1;
+ nargs.op_flags = XFS_DA_OP_OKNOENT;
entry = &leaf->entries[0];
for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
if (entry->flags & XFS_ATTR_INCOMPLETE)
@@ -873,7 +835,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
error = 0;
out:
- kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount));
+ kmem_free(tmpbuffer);
return(error);
}
@@ -1155,7 +1117,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
entry->hashval = cpu_to_be32(args->hashval);
entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags);
- if (args->rename) {
+ if (args->op_flags & XFS_DA_OP_RENAME) {
entry->flags |= XFS_ATTR_INCOMPLETE;
if ((args->blkno2 == args->blkno) &&
(args->index2 <= args->index)) {
@@ -1271,7 +1233,7 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
be16_to_cpu(hdr_s->count), mp);
xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
- kmem_free(tmpbuffer, XFS_LBSIZE(mp));
+ kmem_free(tmpbuffer);
}
/*
@@ -1921,7 +1883,7 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
be16_to_cpu(drop_hdr->count), mp);
}
memcpy((char *)save_leaf, (char *)tmp_leaf, state->blocksize);
- kmem_free(tmpbuffer, state->blocksize);
+ kmem_free(tmpbuffer);
}
xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
@@ -2400,8 +2362,6 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
*/
retval = 0;
for ( ; (i < be16_to_cpu(leaf->hdr.count)); entry++, i++) {
- attrnames_t *namesp;
-
if (be32_to_cpu(entry->hashval) != cursor->hashval) {
cursor->hashval = be32_to_cpu(entry->hashval);
cursor->offset = 0;
@@ -2409,17 +2369,13 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
if (entry->flags & XFS_ATTR_INCOMPLETE)
continue; /* skip incomplete entries */
- if (!xfs_attr_namesp_match_overrides(context->flags, entry->flags))
- continue;
-
- namesp = xfs_attr_flags_namesp(entry->flags);
if (entry->flags & XFS_ATTR_LOCAL) {
xfs_attr_leaf_name_local_t *name_loc =
XFS_ATTR_LEAF_NAME_LOCAL(leaf, i);
retval = context->put_listent(context,
- namesp,
+ entry->flags,
(char *)name_loc->nameval,
(int)name_loc->namelen,
be16_to_cpu(name_loc->valuelen),
@@ -2446,16 +2402,15 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
if (retval)
return retval;
retval = context->put_listent(context,
- namesp,
+ entry->flags,
(char *)name_rmt->name,
(int)name_rmt->namelen,
valuelen,
(char*)args.value);
- kmem_free(args.value, valuelen);
- }
- else {
+ kmem_free(args.value);
+ } else {
retval = context->put_listent(context,
- namesp,
+ entry->flags,
(char *)name_rmt->name,
(int)name_rmt->namelen,
valuelen,
@@ -2954,7 +2909,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
error = tmp; /* save only the 1st errno */
}
- kmem_free((xfs_caddr_t)list, size);
+ kmem_free((xfs_caddr_t)list);
return(error);
}
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 040f732ce1e..5ecf437b782 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -30,7 +30,7 @@
struct attrlist;
struct attrlist_cursor_kern;
-struct attrnames;
+struct xfs_attr_list_context;
struct xfs_dabuf;
struct xfs_da_args;
struct xfs_da_state;
@@ -204,33 +204,6 @@ static inline int xfs_attr_leaf_entsize_local_max(int bsize)
return (((bsize) >> 1) + ((bsize) >> 2));
}
-
-/*========================================================================
- * Structure used to pass context around among the routines.
- *========================================================================*/
-
-
-struct xfs_attr_list_context;
-
-typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, struct attrnames *,
- char *, int, int, char *);
-
-typedef struct xfs_attr_list_context {
- struct xfs_inode *dp; /* inode */
- struct attrlist_cursor_kern *cursor; /* position in list */
- struct attrlist *alist; /* output buffer */
- int seen_enough; /* T/F: seen enough of list? */
- int count; /* num used entries */
- int dupcnt; /* count dup hashvals seen */
- int bufsize; /* total buffer size */
- int firstu; /* first used byte in buffer */
- int flags; /* from VOP call */
- int resynch; /* T/F: resynch with cursor */
- int put_value; /* T/F: need value for listent */
- put_listent_func_t put_listent; /* list output fmt function */
- int index; /* index into output buffer */
-} xfs_attr_list_context_t;
-
/*
* Used to keep a list of "remote value" extents when unlinking an inode.
*/
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h
index f67f917803b..ea22839caed 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/xfs_attr_sf.h
@@ -97,13 +97,9 @@ void xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
struct xfs_attr_leafblock *leaf);
void xfs_attr_trace_enter(int type, char *where,
- __psunsigned_t a2, __psunsigned_t a3,
- __psunsigned_t a4, __psunsigned_t a5,
- __psunsigned_t a6, __psunsigned_t a7,
- __psunsigned_t a8, __psunsigned_t a9,
- __psunsigned_t a10, __psunsigned_t a11,
- __psunsigned_t a12, __psunsigned_t a13,
- __psunsigned_t a14, __psunsigned_t a15);
+ struct xfs_attr_list_context *context,
+ __psunsigned_t a13, __psunsigned_t a14,
+ __psunsigned_t a15);
#else
#define xfs_attr_trace_l_c(w,c)
#define xfs_attr_trace_l_cn(w,c,n)
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 53c259f5a5a..3c4beb3a432 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -428,7 +428,8 @@ xfs_bmap_add_attrfork_btree(
cur->bc_private.b.firstblock = *firstblock;
if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
goto error0;
- ASSERT(stat == 1); /* must be at least one entry */
+ /* must be at least one entry */
+ XFS_WANT_CORRUPTED_GOTO(stat == 1, error0);
if ((error = xfs_bmbt_newroot(cur, flags, &stat)))
goto error0;
if (stat == 0) {
@@ -816,13 +817,13 @@ xfs_bmap_add_extent_delay_real(
RIGHT.br_startblock,
RIGHT.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_delete(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_decrement(cur, 0, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
LEFT.br_startblock,
LEFT.br_blockcount +
@@ -860,7 +861,7 @@ xfs_bmap_add_extent_delay_real(
LEFT.br_startblock, LEFT.br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
LEFT.br_startblock,
LEFT.br_blockcount +
@@ -895,7 +896,7 @@ xfs_bmap_add_extent_delay_real(
RIGHT.br_startblock,
RIGHT.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
new->br_startblock,
PREV.br_blockcount +
@@ -928,11 +929,11 @@ xfs_bmap_add_extent_delay_real(
new->br_startblock, new->br_blockcount,
&i)))
goto done;
- ASSERT(i == 0);
+ XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
*dnew = 0;
/* DELTA: The in-core extent described by new changed type. */
@@ -963,7 +964,7 @@ xfs_bmap_add_extent_delay_real(
LEFT.br_startblock, LEFT.br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
LEFT.br_startblock,
LEFT.br_blockcount +
@@ -1004,11 +1005,11 @@ xfs_bmap_add_extent_delay_real(
new->br_startblock, new->br_blockcount,
&i)))
goto done;
- ASSERT(i == 0);
+ XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1054,7 +1055,7 @@ xfs_bmap_add_extent_delay_real(
RIGHT.br_startblock,
RIGHT.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, new->br_startoff,
new->br_startblock,
new->br_blockcount +
@@ -1094,11 +1095,11 @@ xfs_bmap_add_extent_delay_real(
new->br_startblock, new->br_blockcount,
&i)))
goto done;
- ASSERT(i == 0);
+ XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1149,11 +1150,11 @@ xfs_bmap_add_extent_delay_real(
new->br_startblock, new->br_blockcount,
&i)))
goto done;
- ASSERT(i == 0);
+ XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1377,19 +1378,19 @@ xfs_bmap_add_extent_unwritten_real(
RIGHT.br_startblock,
RIGHT.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_delete(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_decrement(cur, 0, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_delete(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_decrement(cur, 0, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
LEFT.br_startblock,
LEFT.br_blockcount + PREV.br_blockcount +
@@ -1426,13 +1427,13 @@ xfs_bmap_add_extent_unwritten_real(
PREV.br_startblock, PREV.br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_delete(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_decrement(cur, 0, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
LEFT.br_startblock,
LEFT.br_blockcount + PREV.br_blockcount,
@@ -1469,13 +1470,13 @@ xfs_bmap_add_extent_unwritten_real(
RIGHT.br_startblock,
RIGHT.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_delete(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_decrement(cur, 0, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, new->br_startoff,
new->br_startblock,
new->br_blockcount + RIGHT.br_blockcount,
@@ -1508,7 +1509,7 @@ xfs_bmap_add_extent_unwritten_real(
new->br_startblock, new->br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, new->br_startoff,
new->br_startblock, new->br_blockcount,
newext)))
@@ -1549,7 +1550,7 @@ xfs_bmap_add_extent_unwritten_real(
PREV.br_startblock, PREV.br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur,
PREV.br_startoff + new->br_blockcount,
PREV.br_startblock + new->br_blockcount,
@@ -1596,7 +1597,7 @@ xfs_bmap_add_extent_unwritten_real(
PREV.br_startblock, PREV.br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur,
PREV.br_startoff + new->br_blockcount,
PREV.br_startblock + new->br_blockcount,
@@ -1606,7 +1607,7 @@ xfs_bmap_add_extent_unwritten_real(
cur->bc_rec.b = *new;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
/* DELTA: One in-core extent is split in two. */
temp = PREV.br_startoff;
@@ -1640,7 +1641,7 @@ xfs_bmap_add_extent_unwritten_real(
PREV.br_startblock,
PREV.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
PREV.br_startblock,
PREV.br_blockcount - new->br_blockcount,
@@ -1682,7 +1683,7 @@ xfs_bmap_add_extent_unwritten_real(
PREV.br_startblock, PREV.br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
PREV.br_startblock,
PREV.br_blockcount - new->br_blockcount,
@@ -1692,11 +1693,11 @@ xfs_bmap_add_extent_unwritten_real(
new->br_startblock, new->br_blockcount,
&i)))
goto done;
- ASSERT(i == 0);
+ XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
/* DELTA: One in-core extent is split in two. */
temp = PREV.br_startoff;
@@ -1732,27 +1733,34 @@ xfs_bmap_add_extent_unwritten_real(
PREV.br_startblock, PREV.br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
/* new right extent - oldext */
if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
r[1].br_startblock, r[1].br_blockcount,
r[1].br_state)))
goto done;
/* new left extent - oldext */
- PREV.br_blockcount =
- new->br_startoff - PREV.br_startoff;
cur->bc_rec.b = PREV;
+ cur->bc_rec.b.br_blockcount =
+ new->br_startoff - PREV.br_startoff;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
- if ((error = xfs_bmbt_increment(cur, 0, &i)))
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+ /*
+ * Reset the cursor to the position of the new extent
+ * we are about to insert as we can't trust it after
+ * the previous insert.
+ */
+ if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+ new->br_startblock, new->br_blockcount,
+ &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 0, done);
/* new middle extent - newext */
- cur->bc_rec.b = *new;
+ cur->bc_rec.b.br_state = new->br_state;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
/* DELTA: One in-core extent is split in three. */
temp = PREV.br_startoff;
@@ -2097,13 +2105,13 @@ xfs_bmap_add_extent_hole_real(
right.br_startblock,
right.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_delete(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_decrement(cur, 0, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, left.br_startoff,
left.br_startblock,
left.br_blockcount +
@@ -2139,7 +2147,7 @@ xfs_bmap_add_extent_hole_real(
left.br_startblock,
left.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, left.br_startoff,
left.br_startblock,
left.br_blockcount +
@@ -2174,7 +2182,7 @@ xfs_bmap_add_extent_hole_real(
right.br_startblock,
right.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, new->br_startoff,
new->br_startblock,
new->br_blockcount +
@@ -2208,11 +2216,11 @@ xfs_bmap_add_extent_hole_real(
new->br_startblock,
new->br_blockcount, &i)))
goto done;
- ASSERT(i == 0);
+ XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = new->br_state;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
/* DELTA: A new extent was added in a hole. */
temp = new->br_startoff;
@@ -3131,7 +3139,7 @@ xfs_bmap_del_extent(
got.br_startblock, got.br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
da_old = da_new = 0;
} else {
@@ -3164,7 +3172,7 @@ xfs_bmap_del_extent(
}
if ((error = xfs_bmbt_delete(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
break;
case 2:
@@ -3268,7 +3276,7 @@ xfs_bmap_del_extent(
got.br_startblock,
temp, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
/*
* Update the btree record back
* to the original value.
@@ -3289,7 +3297,7 @@ xfs_bmap_del_extent(
error = XFS_ERROR(ENOSPC);
goto done;
}
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
} else
flags |= XFS_ILOG_FEXT(whichfork);
XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -5970,7 +5978,7 @@ unlock_and_return:
xfs_iunlock_map_shared(ip, lock);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- kmem_free(map, subnex * sizeof(*map));
+ kmem_free(map);
return error;
}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 6ff70cda451..9f3e3a836d1 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -54,12 +54,23 @@ typedef struct xfs_bmap_free_item
/*
* Header for free extent list.
+ *
+ * xbf_low is used by the allocator to activate the lowspace algorithm -
+ * when free space is running low the extent allocator may choose to
+ * allocate an extent from an AG without leaving sufficient space for
+ * a btree split when inserting the new extent. In this case the allocator
+ * will enable the lowspace algorithm which is supposed to allow further
+ * allocations (such as btree splits and newroots) to allocate from
+ * sequential AGs. In order to avoid locking AGs out of order the lowspace
+ * algorithm will start searching for free space from AG 0. If the correct
+ * transaction reservations have been made then this algorithm will eventually
+ * find all the space it needs.
*/
typedef struct xfs_bmap_free
{
xfs_bmap_free_item_t *xbf_first; /* list of to-be-free extents */
int xbf_count; /* count of items on list */
- int xbf_low; /* kludge: alloc in low mode */
+ int xbf_low; /* alloc in low mode */
} xfs_bmap_free_t;
#define XFS_BMAP_MAX_NMAP 4
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 4f0e849d973..23efad29a5c 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -1493,12 +1493,27 @@ xfs_bmbt_split(
left = XFS_BUF_TO_BMBT_BLOCK(lbp);
args.fsbno = cur->bc_private.b.firstblock;
args.firstblock = args.fsbno;
+ args.minleft = 0;
if (args.fsbno == NULLFSBLOCK) {
args.fsbno = lbno;
args.type = XFS_ALLOCTYPE_START_BNO;
- } else
+ /*
+ * Make sure there is sufficient room left in the AG to
+ * complete a full tree split for an extent insert. If
+ * we are converting the middle part of an extent then
+ * we may need space for two tree splits.
+ *
+ * We are relying on the caller to make the correct block
+ * reservation for this operation to succeed. If the
+ * reservation amount is insufficient then we may fail a
+ * block allocation here and corrupt the filesystem.
+ */
+ args.minleft = xfs_trans_get_block_res(args.tp);
+ } else if (cur->bc_private.b.flist->xbf_low)
+ args.type = XFS_ALLOCTYPE_START_BNO;
+ else
args.type = XFS_ALLOCTYPE_NEAR_BNO;
- args.mod = args.minleft = args.alignment = args.total = args.isfl =
+ args.mod = args.alignment = args.total = args.isfl =
args.userdata = args.minalignslop = 0;
args.minlen = args.maxlen = args.prod = 1;
args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
@@ -1510,6 +1525,21 @@ xfs_bmbt_split(
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
return error;
}
+ if (args.fsbno == NULLFSBLOCK && args.minleft) {
+ /*
+ * Could not find an AG with enough free space to satisfy
+ * a full btree split. Try again without minleft and if
+ * successful activate the lowspace algorithm.
+ */
+ args.fsbno = 0;
+ args.type = XFS_ALLOCTYPE_FIRST_AG;
+ args.minleft = 0;
+ if ((error = xfs_alloc_vextent(&args))) {
+ XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+ return error;
+ }
+ cur->bc_private.b.flist->xbf_low = 1;
+ }
if (args.fsbno == NULLFSBLOCK) {
XFS_BMBT_TRACE_CURSOR(cur, EXIT);
*stat = 0;
@@ -2029,22 +2059,8 @@ xfs_bmbt_increment(
* Insert the current record at the point referenced by cur.
*
* A multi-level split of the tree on insert will invalidate the original
- * cursor. It appears, however, that some callers assume that the cursor is
- * always valid. Hence if we do a multi-level split we need to revalidate the
- * cursor.
- *
- * When a split occurs, we will see a new cursor returned. Use that as a
- * trigger to determine if we need to revalidate the original cursor. If we get
- * a split, then use the original irec to lookup up the path of the record we
- * just inserted.
- *
- * Note that the fact that the btree root is in the inode means that we can
- * have the level of the tree change without a "split" occurring at the root
- * level. What happens is that the root is migrated to an allocated block and
- * the inode root is pointed to it. This means a single split can change the
- * level of the tree (level 2 -> level 3) and invalidate the old cursor. Hence
- * the level change should be accounted as a split so as to correctly trigger a
- * revalidation of the old cursor.
+ * cursor. All callers of this function should assume that the cursor is
+ * no longer valid and revalidate it.
*/
int /* error */
xfs_bmbt_insert(
@@ -2057,14 +2073,11 @@ xfs_bmbt_insert(
xfs_fsblock_t nbno;
xfs_btree_cur_t *ncur;
xfs_bmbt_rec_t nrec;
- xfs_bmbt_irec_t oirec; /* original irec */
xfs_btree_cur_t *pcur;
- int splits = 0;
XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
level = 0;
nbno = NULLFSBLOCK;
- oirec = cur->bc_rec.b;
xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
ncur = NULL;
pcur = cur;
@@ -2073,13 +2086,11 @@ xfs_bmbt_insert(
&i))) {
if (pcur != cur)
xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
- goto error0;
+ XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+ return error;
}
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) {
- /* allocating a new root is effectively a split */
- if (cur->bc_nlevels != pcur->bc_nlevels)
- splits++;
cur->bc_nlevels = pcur->bc_nlevels;
cur->bc_private.b.allocated +=
pcur->bc_private.b.allocated;
@@ -2093,21 +2104,10 @@ xfs_bmbt_insert(
xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
}
if (ncur) {
- splits++;
pcur = ncur;
ncur = NULL;
}
} while (nbno != NULLFSBLOCK);
-
- if (splits > 1) {
- /* revalidate the old cursor as we had a multi-level split */
- error = xfs_bmbt_lookup_eq(cur, oirec.br_startoff,
- oirec.br_startblock, oirec.br_blockcount, &i);
- if (error)
- goto error0;
- ASSERT(i == 1);
- }
-
XFS_BMBT_TRACE_CURSOR(cur, EXIT);
*stat = i;
return 0;
@@ -2254,7 +2254,9 @@ xfs_bmbt_newroot(
#endif
args.fsbno = be64_to_cpu(*pp);
args.type = XFS_ALLOCTYPE_START_BNO;
- } else
+ } else if (cur->bc_private.b.flist->xbf_low)
+ args.type = XFS_ALLOCTYPE_START_BNO;
+ else
args.type = XFS_ALLOCTYPE_NEAR_BNO;
if ((error = xfs_alloc_vextent(&args))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 53a71c62025..d86ca2c03a7 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -889,9 +889,9 @@ xfs_buf_item_relse(
}
#ifdef XFS_TRANS_DEBUG
- kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp));
+ kmem_free(bip->bli_orig);
bip->bli_orig = NULL;
- kmem_free(bip->bli_logged, XFS_BUF_COUNT(bp) / NBBY);
+ kmem_free(bip->bli_logged);
bip->bli_logged = NULL;
#endif /* XFS_TRANS_DEBUG */
@@ -1138,9 +1138,9 @@ xfs_buf_iodone(
xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
#ifdef XFS_TRANS_DEBUG
- kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp));
+ kmem_free(bip->bli_orig);
bip->bli_orig = NULL;
- kmem_free(bip->bli_logged, XFS_BUF_COUNT(bp) / NBBY);
+ kmem_free(bip->bli_logged);
bip->bli_logged = NULL;
#endif /* XFS_TRANS_DEBUG */
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index d5d1e60ee22..d2ce5dd70d8 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -78,6 +78,7 @@ struct xfs_mount_args {
#define XFSMNT_IOSIZE 0x00002000 /* optimize for I/O size */
#define XFSMNT_OSYNCISOSYNC 0x00004000 /* o_sync is REALLY o_sync */
/* (osyncisdsync is default) */
+#define XFSMNT_NOATTR2 0x00008000 /* turn off ATTR2 EA format */
#define XFSMNT_32BITINODES 0x00200000 /* restrict inodes to 32
* bits of address space */
#define XFSMNT_GQUOTA 0x00400000 /* group quota accounting */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 021a8f7e563..9e561a9cefc 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1431,7 +1431,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
}
if (level < 0) {
*result = XFS_ERROR(ENOENT); /* we're out of our tree */
- ASSERT(args->oknoent);
+ ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
return(0);
}
@@ -1530,6 +1530,28 @@ xfs_da_hashname(const uchar_t *name, int namelen)
}
}
+enum xfs_dacmp
+xfs_da_compname(
+ struct xfs_da_args *args,
+ const char *name,
+ int len)
+{
+ return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
+ XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
+}
+
+static xfs_dahash_t
+xfs_default_hashname(
+ struct xfs_name *name)
+{
+ return xfs_da_hashname(name->name, name->len);
+}
+
+const struct xfs_nameops xfs_default_nameops = {
+ .hashname = xfs_default_hashname,
+ .compname = xfs_da_compname
+};
+
/*
* Add a block to the btree ahead of the file.
* Return the new block number to the caller.
@@ -1598,7 +1620,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
args->firstblock, args->total,
&mapp[mapi], &nmap, args->flist,
NULL))) {
- kmem_free(mapp, sizeof(*mapp) * count);
+ kmem_free(mapp);
return error;
}
if (nmap < 1)
@@ -1620,11 +1642,11 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
bno + count) {
if (mapp != &map)
- kmem_free(mapp, sizeof(*mapp) * count);
+ kmem_free(mapp);
return XFS_ERROR(ENOSPC);
}
if (mapp != &map)
- kmem_free(mapp, sizeof(*mapp) * count);
+ kmem_free(mapp);
*new_blkno = (xfs_dablk_t)bno;
return 0;
}
@@ -2090,10 +2112,10 @@ xfs_da_do_buf(
}
}
if (bplist) {
- kmem_free(bplist, sizeof(*bplist) * nmap);
+ kmem_free(bplist);
}
if (mapp != &map) {
- kmem_free(mapp, sizeof(*mapp) * nfsb);
+ kmem_free(mapp);
}
if (bpp)
*bpp = rbp;
@@ -2102,11 +2124,11 @@ exit1:
if (bplist) {
for (i = 0; i < nbplist; i++)
xfs_trans_brelse(trans, bplist[i]);
- kmem_free(bplist, sizeof(*bplist) * nmap);
+ kmem_free(bplist);
}
exit0:
if (mapp != &map)
- kmem_free(mapp, sizeof(*mapp) * nfsb);
+ kmem_free(mapp);
if (bpp)
*bpp = NULL;
return error;
@@ -2218,7 +2240,7 @@ xfs_da_state_free(xfs_da_state_t *state)
#ifdef XFS_DABUF_DEBUG
xfs_dabuf_t *xfs_dabuf_global_list;
-spinlock_t xfs_dabuf_global_lock;
+static DEFINE_SPINLOCK(xfs_dabuf_global_lock);
#endif
/*
@@ -2315,7 +2337,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
if (dabuf->dirty)
xfs_da_buf_clean(dabuf);
if (dabuf->nbuf > 1)
- kmem_free(dabuf->data, BBTOB(dabuf->bbcount));
+ kmem_free(dabuf->data);
#ifdef XFS_DABUF_DEBUG
{
spin_lock(&xfs_dabuf_global_lock);
@@ -2332,7 +2354,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
if (dabuf->nbuf == 1)
kmem_zone_free(xfs_dabuf_zone, dabuf);
else
- kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf));
+ kmem_free(dabuf);
}
/*
@@ -2403,7 +2425,7 @@ xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
for (i = 0; i < nbuf; i++)
xfs_trans_brelse(tp, bplist[i]);
if (bplist != &bp)
- kmem_free(bplist, nbuf * sizeof(*bplist));
+ kmem_free(bplist);
}
/*
@@ -2429,7 +2451,7 @@ xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
for (i = 0; i < nbuf; i++)
xfs_trans_binval(tp, bplist[i]);
if (bplist != &bp)
- kmem_free(bplist, nbuf * sizeof(*bplist));
+ kmem_free(bplist);
}
/*
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 7facf86f74f..8be0b00ede9 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -99,6 +99,15 @@ typedef struct xfs_da_node_entry xfs_da_node_entry_t;
*========================================================================*/
/*
+ * Search comparison results
+ */
+enum xfs_dacmp {
+ XFS_CMP_DIFFERENT, /* names are completely different */
+ XFS_CMP_EXACT, /* names are exactly the same */
+ XFS_CMP_CASE /* names are same but differ in case */
+};
+
+/*
* Structure to ease passing around component names.
*/
typedef struct xfs_da_args {
@@ -123,13 +132,20 @@ typedef struct xfs_da_args {
int index2; /* index of 2nd attr in blk */
xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */
int rmtblkcnt2; /* remote attr value block count */
- unsigned char justcheck; /* T/F: check for ok with no space */
- unsigned char rename; /* T/F: this is an atomic rename op */
- unsigned char addname; /* T/F: this is an add operation */
- unsigned char oknoent; /* T/F: ok to return ENOENT, else die */
+ int op_flags; /* operation flags */
+ enum xfs_dacmp cmpresult; /* name compare result for lookups */
} xfs_da_args_t;
/*
+ * Operation flags:
+ */
+#define XFS_DA_OP_JUSTCHECK 0x0001 /* check for ok with no space */
+#define XFS_DA_OP_RENAME 0x0002 /* this is an atomic rename op */
+#define XFS_DA_OP_ADDNAME 0x0004 /* this is an add operation */
+#define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */
+#define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */
+
+/*
* Structure to describe buffer(s) for a block.
* This is needed in the directory version 2 format case, when
* multiple non-contiguous fsblocks might be needed to cover one
@@ -201,6 +217,14 @@ typedef struct xfs_da_state {
(uint)(XFS_DA_LOGOFF(BASE, ADDR)), \
(uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1)
+/*
+ * Name ops for directory and/or attr name operations
+ */
+struct xfs_nameops {
+ xfs_dahash_t (*hashname)(struct xfs_name *);
+ enum xfs_dacmp (*compname)(struct xfs_da_args *, const char *, int);
+};
+
#ifdef __KERNEL__
/*========================================================================
@@ -249,6 +273,10 @@ int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
xfs_dabuf_t *dead_buf);
uint xfs_da_hashname(const uchar_t *name_string, int name_length);
+enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
+ const char *name, int len);
+
+
xfs_da_state_t *xfs_da_state_alloc(void);
void xfs_da_state_free(xfs_da_state_t *state);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 5f3647cb988..2211e885ef2 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -116,7 +116,7 @@ xfs_swapext(
out_put_file:
fput(file);
out_free_sxp:
- kmem_free(sxp, sizeof(xfs_swapext_t));
+ kmem_free(sxp);
out:
return error;
}
@@ -381,6 +381,6 @@ xfs_swap_extents(
xfs_iunlock(tip, lock_flags);
}
if (tempifp != NULL)
- kmem_free(tempifp, sizeof(xfs_ifork_t));
+ kmem_free(tempifp);
return error;
}
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 7cb26529766..80e0dc51361 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -46,6 +46,54 @@
struct xfs_name xfs_name_dotdot = {"..", 2};
+extern const struct xfs_nameops xfs_default_nameops;
+
+/*
+ * ASCII case-insensitive (ie. A-Z) support for directories that was
+ * used in IRIX.
+ */
+STATIC xfs_dahash_t
+xfs_ascii_ci_hashname(
+ struct xfs_name *name)
+{
+ xfs_dahash_t hash;
+ int i;
+
+ for (i = 0, hash = 0; i < name->len; i++)
+ hash = tolower(name->name[i]) ^ rol32(hash, 7);
+
+ return hash;
+}
+
+STATIC enum xfs_dacmp
+xfs_ascii_ci_compname(
+ struct xfs_da_args *args,
+ const char *name,
+ int len)
+{
+ enum xfs_dacmp result;
+ int i;
+
+ if (args->namelen != len)
+ return XFS_CMP_DIFFERENT;
+
+ result = XFS_CMP_EXACT;
+ for (i = 0; i < len; i++) {
+ if (args->name[i] == name[i])
+ continue;
+ if (tolower(args->name[i]) != tolower(name[i]))
+ return XFS_CMP_DIFFERENT;
+ result = XFS_CMP_CASE;
+ }
+
+ return result;
+}
+
+static struct xfs_nameops xfs_ascii_ci_nameops = {
+ .hashname = xfs_ascii_ci_hashname,
+ .compname = xfs_ascii_ci_compname,
+};
+
void
xfs_dir_mount(
xfs_mount_t *mp)
@@ -65,6 +113,10 @@ xfs_dir_mount(
(mp->m_dirblksize - (uint)sizeof(xfs_da_node_hdr_t)) /
(uint)sizeof(xfs_da_node_entry_t);
mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100;
+ if (xfs_sb_version_hasasciici(&mp->m_sb))
+ mp->m_dirnameops = &xfs_ascii_ci_nameops;
+ else
+ mp->m_dirnameops = &xfs_default_nameops;
}
/*
@@ -162,9 +214,10 @@ xfs_dir_createname(
return rval;
XFS_STATS_INC(xs_dir_create);
+ memset(&args, 0, sizeof(xfs_da_args_t));
args.name = name->name;
args.namelen = name->len;
- args.hashval = xfs_da_hashname(name->name, name->len);
+ args.hashval = dp->i_mount->m_dirnameops->hashname(name);
args.inumber = inum;
args.dp = dp;
args.firstblock = first;
@@ -172,8 +225,7 @@ xfs_dir_createname(
args.total = total;
args.whichfork = XFS_DATA_FORK;
args.trans = tp;
- args.justcheck = 0;
- args.addname = args.oknoent = 1;
+ args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
rval = xfs_dir2_sf_addname(&args);
@@ -191,14 +243,43 @@ xfs_dir_createname(
}
/*
+ * If doing a CI lookup and case-insensitive match, dup actual name into
+ * args.value. Return EEXIST for success (ie. name found) or an error.
+ */
+int
+xfs_dir_cilookup_result(
+ struct xfs_da_args *args,
+ const char *name,
+ int len)
+{
+ if (args->cmpresult == XFS_CMP_DIFFERENT)
+ return ENOENT;
+ if (args->cmpresult != XFS_CMP_CASE ||
+ !(args->op_flags & XFS_DA_OP_CILOOKUP))
+ return EEXIST;
+
+ args->value = kmem_alloc(len, KM_MAYFAIL);
+ if (!args->value)
+ return ENOMEM;
+
+ memcpy(args->value, name, len);
+ args->valuelen = len;
+ return EEXIST;
+}
+
+/*
* Lookup a name in a directory, give back the inode number.
+ * If ci_name is not NULL, returns the actual name in ci_name if it differs
+ * to name, or ci_name->name is set to NULL for an exact match.
*/
+
int
xfs_dir_lookup(
xfs_trans_t *tp,
xfs_inode_t *dp,
struct xfs_name *name,
- xfs_ino_t *inum) /* out: inode number */
+ xfs_ino_t *inum, /* out: inode number */
+ struct xfs_name *ci_name) /* out: actual name if CI match */
{
xfs_da_args_t args;
int rval;
@@ -206,15 +287,17 @@ xfs_dir_lookup(
ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
XFS_STATS_INC(xs_dir_lookup);
- memset(&args, 0, sizeof(xfs_da_args_t));
+ memset(&args, 0, sizeof(xfs_da_args_t));
args.name = name->name;
args.namelen = name->len;
- args.hashval = xfs_da_hashname(name->name, name->len);
+ args.hashval = dp->i_mount->m_dirnameops->hashname(name);
args.dp = dp;
args.whichfork = XFS_DATA_FORK;
args.trans = tp;
- args.oknoent = 1;
+ args.op_flags = XFS_DA_OP_OKNOENT;
+ if (ci_name)
+ args.op_flags |= XFS_DA_OP_CILOOKUP;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
rval = xfs_dir2_sf_lookup(&args);
@@ -230,8 +313,13 @@ xfs_dir_lookup(
rval = xfs_dir2_node_lookup(&args);
if (rval == EEXIST)
rval = 0;
- if (rval == 0)
+ if (!rval) {
*inum = args.inumber;
+ if (ci_name) {
+ ci_name->name = args.value;
+ ci_name->len = args.valuelen;
+ }
+ }
return rval;
}
@@ -255,9 +343,10 @@ xfs_dir_removename(
ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
XFS_STATS_INC(xs_dir_remove);
+ memset(&args, 0, sizeof(xfs_da_args_t));
args.name = name->name;
args.namelen = name->len;
- args.hashval = xfs_da_hashname(name->name, name->len);
+ args.hashval = dp->i_mount->m_dirnameops->hashname(name);
args.inumber = ino;
args.dp = dp;
args.firstblock = first;
@@ -265,7 +354,6 @@ xfs_dir_removename(
args.total = total;
args.whichfork = XFS_DATA_FORK;
args.trans = tp;
- args.justcheck = args.addname = args.oknoent = 0;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
rval = xfs_dir2_sf_removename(&args);
@@ -338,9 +426,10 @@ xfs_dir_replace(
if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
return rval;
+ memset(&args, 0, sizeof(xfs_da_args_t));
args.name = name->name;
args.namelen = name->len;
- args.hashval = xfs_da_hashname(name->name, name->len);
+ args.hashval = dp->i_mount->m_dirnameops->hashname(name);
args.inumber = inum;
args.dp = dp;
args.firstblock = first;
@@ -348,7 +437,6 @@ xfs_dir_replace(
args.total = total;
args.whichfork = XFS_DATA_FORK;
args.trans = tp;
- args.justcheck = args.addname = args.oknoent = 0;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
rval = xfs_dir2_sf_replace(&args);
@@ -384,15 +472,16 @@ xfs_dir_canenter(
return 0;
ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
- memset(&args, 0, sizeof(xfs_da_args_t));
+ memset(&args, 0, sizeof(xfs_da_args_t));
args.name = name->name;
args.namelen = name->len;
- args.hashval = xfs_da_hashname(name->name, name->len);
+ args.hashval = dp->i_mount->m_dirnameops->hashname(name);
args.dp = dp;
args.whichfork = XFS_DATA_FORK;
args.trans = tp;
- args.justcheck = args.addname = args.oknoent = 1;
+ args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
+ XFS_DA_OP_OKNOENT;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
rval = xfs_dir2_sf_addname(&args);
@@ -493,7 +582,7 @@ xfs_dir2_grow_inode(
args->firstblock, args->total,
&mapp[mapi], &nmap, args->flist,
NULL))) {
- kmem_free(mapp, sizeof(*mapp) * count);
+ kmem_free(mapp);
return error;
}
if (nmap < 1)
@@ -525,14 +614,14 @@ xfs_dir2_grow_inode(
mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
bno + count) {
if (mapp != &map)
- kmem_free(mapp, sizeof(*mapp) * count);
+ kmem_free(mapp);
return XFS_ERROR(ENOSPC);
}
/*
* Done with the temporary mapping table.
*/
if (mapp != &map)
- kmem_free(mapp, sizeof(*mapp) * count);
+ kmem_free(mapp);
*dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
/*
* Update file's size if this is the data space and it grew.
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 6392f939029..1d9ef96f33a 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -74,7 +74,8 @@ extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_fsblock_t *first,
struct xfs_bmap_free *flist, xfs_extlen_t tot);
extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
- struct xfs_name *name, xfs_ino_t *inum);
+ struct xfs_name *name, xfs_ino_t *inum,
+ struct xfs_name *ci_name);
extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name, xfs_ino_t ino,
xfs_fsblock_t *first,
@@ -99,4 +100,7 @@ extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
struct xfs_dabuf *bp);
+extern int xfs_dir_cilookup_result(struct xfs_da_args *args, const char *name,
+ int len);
+
#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index fb5a556725b..e2fa0a1d8e9 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -215,7 +215,7 @@ xfs_dir2_block_addname(
/*
* If this isn't a real add, we're done with the buffer.
*/
- if (args->justcheck)
+ if (args->op_flags & XFS_DA_OP_JUSTCHECK)
xfs_da_brelse(tp, bp);
/*
* If we don't have space for the new entry & leaf ...
@@ -225,7 +225,7 @@ xfs_dir2_block_addname(
* Not trying to actually do anything, or don't have
* a space reservation: return no-space.
*/
- if (args->justcheck || args->total == 0)
+ if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
return XFS_ERROR(ENOSPC);
/*
* Convert to the next larger format.
@@ -240,7 +240,7 @@ xfs_dir2_block_addname(
/*
* Just checking, and it would work, so say so.
*/
- if (args->justcheck)
+ if (args->op_flags & XFS_DA_OP_JUSTCHECK)
return 0;
needlog = needscan = 0;
/*
@@ -610,14 +610,15 @@ xfs_dir2_block_lookup(
/*
* Get the offset from the leaf entry, to point to the data.
*/
- dep = (xfs_dir2_data_entry_t *)
- ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+ dep = (xfs_dir2_data_entry_t *)((char *)block +
+ xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
/*
- * Fill in inode number, release the block.
+ * Fill in inode number, CI name if appropriate, release the block.
*/
args->inumber = be64_to_cpu(dep->inumber);
+ error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
xfs_da_brelse(args->trans, bp);
- return XFS_ERROR(EEXIST);
+ return XFS_ERROR(error);
}
/*
@@ -643,6 +644,7 @@ xfs_dir2_block_lookup_int(
int mid; /* binary search current idx */
xfs_mount_t *mp; /* filesystem mount point */
xfs_trans_t *tp; /* transaction pointer */
+ enum xfs_dacmp cmp; /* comparison result */
dp = args->dp;
tp = args->trans;
@@ -673,7 +675,7 @@ xfs_dir2_block_lookup_int(
else
high = mid - 1;
if (low > high) {
- ASSERT(args->oknoent);
+ ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
xfs_da_brelse(tp, bp);
return XFS_ERROR(ENOENT);
}
@@ -697,20 +699,31 @@ xfs_dir2_block_lookup_int(
dep = (xfs_dir2_data_entry_t *)
((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
/*
- * Compare, if it's right give back buffer & entry number.
+ * Compare name and if it's an exact match, return the index
+ * and buffer. If it's the first case-insensitive match, store
+ * the index and buffer and continue looking for an exact match.
*/
- if (dep->namelen == args->namelen &&
- dep->name[0] == args->name[0] &&
- memcmp(dep->name, args->name, args->namelen) == 0) {
+ cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
+ if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+ args->cmpresult = cmp;
*bpp = bp;
*entno = mid;
- return 0;
+ if (cmp == XFS_CMP_EXACT)
+ return 0;
}
- } while (++mid < be32_to_cpu(btp->count) && be32_to_cpu(blp[mid].hashval) == hash);
+ } while (++mid < be32_to_cpu(btp->count) &&
+ be32_to_cpu(blp[mid].hashval) == hash);
+
+ ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
+ /*
+ * Here, we can only be doing a lookup (not a rename or replace).
+ * If a case-insensitive match was found earlier, return success.
+ */
+ if (args->cmpresult == XFS_CMP_CASE)
+ return 0;
/*
* No match, release the buffer and return ENOENT.
*/
- ASSERT(args->oknoent);
xfs_da_brelse(tp, bp);
return XFS_ERROR(ENOENT);
}
@@ -1033,6 +1046,7 @@ xfs_dir2_sf_to_block(
xfs_dir2_sf_t *sfp; /* shortform structure */
__be16 *tagp; /* end of data entry */
xfs_trans_t *tp; /* transaction pointer */
+ struct xfs_name name;
xfs_dir2_trace_args("sf_to_block", args);
dp = args->dp;
@@ -1071,7 +1085,7 @@ xfs_dir2_sf_to_block(
*/
error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
if (error) {
- kmem_free(buf, buf_len);
+ kmem_free(buf);
return error;
}
/*
@@ -1079,7 +1093,7 @@ xfs_dir2_sf_to_block(
*/
error = xfs_dir2_data_init(args, blkno, &bp);
if (error) {
- kmem_free(buf, buf_len);
+ kmem_free(buf);
return error;
}
block = bp->data;
@@ -1187,8 +1201,10 @@ xfs_dir2_sf_to_block(
tagp = xfs_dir2_data_entry_tag_p(dep);
*tagp = cpu_to_be16((char *)dep - (char *)block);
xfs_dir2_data_log_entry(tp, bp, dep);
- blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname(
- (char *)sfep->name, sfep->namelen));
+ name.name = sfep->name;
+ name.len = sfep->namelen;
+ blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
+ hashname(&name));
blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
(char *)dep - (char *)block));
offset = (int)((char *)(tagp + 1) - (char *)block);
@@ -1198,7 +1214,7 @@ xfs_dir2_sf_to_block(
sfep = xfs_dir2_sf_nextentry(sfp, sfep);
}
/* Done with the temporary buffer */
- kmem_free(buf, buf_len);
+ kmem_free(buf);
/*
* Sort the leaf entries by hash value.
*/
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index fb8c9e08b23..498f8d69433 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -65,6 +65,7 @@ xfs_dir2_data_check(
xfs_mount_t *mp; /* filesystem mount point */
char *p; /* current data position */
int stale; /* count of stale leaves */
+ struct xfs_name name;
mp = dp->i_mount;
d = bp->data;
@@ -140,7 +141,9 @@ xfs_dir2_data_check(
addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
(xfs_dir2_data_aoff_t)
((char *)dep - (char *)d));
- hash = xfs_da_hashname((char *)dep->name, dep->namelen);
+ name.name = dep->name;
+ name.len = dep->namelen;
+ hash = mp->m_dirnameops->hashname(&name);
for (i = 0; i < be32_to_cpu(btp->count); i++) {
if (be32_to_cpu(lep[i].address) == addr &&
be32_to_cpu(lep[i].hashval) == hash)
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index bc52b803d79..93535992cb6 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -263,20 +263,21 @@ xfs_dir2_leaf_addname(
* If we don't have enough free bytes but we can make enough
* by compacting out stale entries, we'll do that.
*/
- if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] < needbytes &&
- be16_to_cpu(leaf->hdr.stale) > 1) {
+ if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
+ needbytes && be16_to_cpu(leaf->hdr.stale) > 1) {
compact = 1;
}
/*
* Otherwise if we don't have enough free bytes we need to
* convert to node form.
*/
- else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
- needbytes) {
+ else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(
+ leaf->hdr.count)] < needbytes) {
/*
* Just checking or no space reservation, give up.
*/
- if (args->justcheck || args->total == 0) {
+ if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
+ args->total == 0) {
xfs_da_brelse(tp, lbp);
return XFS_ERROR(ENOSPC);
}
@@ -301,7 +302,7 @@ xfs_dir2_leaf_addname(
* If just checking, then it will fit unless we needed to allocate
* a new data block.
*/
- if (args->justcheck) {
+ if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
xfs_da_brelse(tp, lbp);
return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
}
@@ -1110,7 +1111,7 @@ xfs_dir2_leaf_getdents(
*offset = XFS_DIR2_MAX_DATAPTR;
else
*offset = xfs_dir2_byte_to_dataptr(mp, curoff);
- kmem_free(map, map_size * sizeof(*map));
+ kmem_free(map);
if (bp)
xfs_da_brelse(NULL, bp);
return error;
@@ -1298,12 +1299,13 @@ xfs_dir2_leaf_lookup(
((char *)dbp->data +
xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
/*
- * Return the found inode number.
+ * Return the found inode number & CI name if appropriate
*/
args->inumber = be64_to_cpu(dep->inumber);
+ error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
xfs_da_brelse(tp, dbp);
xfs_da_brelse(tp, lbp);
- return XFS_ERROR(EEXIST);
+ return XFS_ERROR(error);
}
/*
@@ -1319,8 +1321,8 @@ xfs_dir2_leaf_lookup_int(
int *indexp, /* out: index in leaf block */
xfs_dabuf_t **dbpp) /* out: data buffer */
{
- xfs_dir2_db_t curdb; /* current data block number */
- xfs_dabuf_t *dbp; /* data buffer */
+ xfs_dir2_db_t curdb = -1; /* current data block number */
+ xfs_dabuf_t *dbp = NULL; /* data buffer */
xfs_dir2_data_entry_t *dep; /* data entry */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return code */
@@ -1331,6 +1333,8 @@ xfs_dir2_leaf_lookup_int(
xfs_mount_t *mp; /* filesystem mount point */
xfs_dir2_db_t newdb; /* new data block number */
xfs_trans_t *tp; /* transaction pointer */
+ xfs_dir2_db_t cidb = -1; /* case match data block no. */
+ enum xfs_dacmp cmp; /* name compare result */
dp = args->dp;
tp = args->trans;
@@ -1338,11 +1342,10 @@ xfs_dir2_leaf_lookup_int(
/*
* Read the leaf block into the buffer.
*/
- if ((error =
- xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
- XFS_DATA_FORK))) {
+ error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
+ XFS_DATA_FORK);
+ if (error)
return error;
- }
*lbpp = lbp;
leaf = lbp->data;
xfs_dir2_leaf_check(dp, lbp);
@@ -1354,9 +1357,9 @@ xfs_dir2_leaf_lookup_int(
* Loop over all the entries with the right hash value
* looking to match the name.
*/
- for (lep = &leaf->ents[index], dbp = NULL, curdb = -1;
- index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
- lep++, index++) {
+ for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
+ be32_to_cpu(lep->hashval) == args->hashval;
+ lep++, index++) {
/*
* Skip over stale leaf entries.
*/
@@ -1373,10 +1376,10 @@ xfs_dir2_leaf_lookup_int(
if (newdb != curdb) {
if (dbp)
xfs_da_brelse(tp, dbp);
- if ((error =
- xfs_da_read_buf(tp, dp,
- xfs_dir2_db_to_da(mp, newdb), -1, &dbp,
- XFS_DATA_FORK))) {
+ error = xfs_da_read_buf(tp, dp,
+ xfs_dir2_db_to_da(mp, newdb),
+ -1, &dbp, XFS_DATA_FORK);
+ if (error) {
xfs_da_brelse(tp, lbp);
return error;
}
@@ -1386,24 +1389,50 @@ xfs_dir2_leaf_lookup_int(
/*
* Point to the data entry.
*/
- dep = (xfs_dir2_data_entry_t *)
- ((char *)dbp->data +
- xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+ dep = (xfs_dir2_data_entry_t *)((char *)dbp->data +
+ xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
/*
- * If it matches then return it.
+ * Compare name and if it's an exact match, return the index
+ * and buffer. If it's the first case-insensitive match, store
+ * the index and buffer and continue looking for an exact match.
*/
- if (dep->namelen == args->namelen &&
- dep->name[0] == args->name[0] &&
- memcmp(dep->name, args->name, args->namelen) == 0) {
- *dbpp = dbp;
+ cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
+ if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+ args->cmpresult = cmp;
*indexp = index;
- return 0;
+ /* case exact match: return the current buffer. */
+ if (cmp == XFS_CMP_EXACT) {
+ *dbpp = dbp;
+ return 0;
+ }
+ cidb = curdb;
}
}
+ ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
+ /*
+ * Here, we can only be doing a lookup (not a rename or remove).
+ * If a case-insensitive match was found earlier, re-read the
+ * appropriate data block if required and return it.
+ */
+ if (args->cmpresult == XFS_CMP_CASE) {
+ ASSERT(cidb != -1);
+ if (cidb != curdb) {
+ xfs_da_brelse(tp, dbp);
+ error = xfs_da_read_buf(tp, dp,
+ xfs_dir2_db_to_da(mp, cidb),
+ -1, &dbp, XFS_DATA_FORK);
+ if (error) {
+ xfs_da_brelse(tp, lbp);
+ return error;
+ }
+ }
+ *dbpp = dbp;
+ return 0;
+ }
/*
* No match found, return ENOENT.
*/
- ASSERT(args->oknoent);
+ ASSERT(cidb == -1);
if (dbp)
xfs_da_brelse(tp, dbp);
xfs_da_brelse(tp, lbp);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 8dade711f09..fa6c3a5ddbc 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -226,7 +226,7 @@ xfs_dir2_leafn_add(
ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
be32_to_cpu(leaf->ents[index].hashval) >= args->hashval);
- if (args->justcheck)
+ if (args->op_flags & XFS_DA_OP_JUSTCHECK)
return 0;
/*
@@ -387,28 +387,26 @@ xfs_dir2_leafn_lasthash(
}
/*
- * Look up a leaf entry in a node-format leaf block.
- * If this is an addname then the extrablk in state is a freespace block,
- * otherwise it's a data block.
+ * Look up a leaf entry for space to add a name in a node-format leaf block.
+ * The extrablk in state is a freespace block.
*/
-int
-xfs_dir2_leafn_lookup_int(
+STATIC int
+xfs_dir2_leafn_lookup_for_addname(
xfs_dabuf_t *bp, /* leaf buffer */
xfs_da_args_t *args, /* operation arguments */
int *indexp, /* out: leaf entry index */
xfs_da_state_t *state) /* state to fill in */
{
- xfs_dabuf_t *curbp; /* current data/free buffer */
- xfs_dir2_db_t curdb; /* current data block number */
- xfs_dir2_db_t curfdb; /* current free block number */
- xfs_dir2_data_entry_t *dep; /* data block entry */
+ xfs_dabuf_t *curbp = NULL; /* current data/free buffer */
+ xfs_dir2_db_t curdb = -1; /* current data block number */
+ xfs_dir2_db_t curfdb = -1; /* current free block number */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return value */
int fi; /* free entry index */
- xfs_dir2_free_t *free=NULL; /* free block structure */
+ xfs_dir2_free_t *free = NULL; /* free block structure */
int index; /* leaf entry index */
xfs_dir2_leaf_t *leaf; /* leaf structure */
- int length=0; /* length of new data entry */
+ int length; /* length of new data entry */
xfs_dir2_leaf_entry_t *lep; /* leaf entry */
xfs_mount_t *mp; /* filesystem mount point */
xfs_dir2_db_t newdb; /* new data block number */
@@ -431,33 +429,20 @@ xfs_dir2_leafn_lookup_int(
/*
* Do we have a buffer coming in?
*/
- if (state->extravalid)
+ if (state->extravalid) {
+ /* If so, it's a free block buffer, get the block number. */
curbp = state->extrablk.bp;
- else
- curbp = NULL;
- /*
- * For addname, it's a free block buffer, get the block number.
- */
- if (args->addname) {
- curfdb = curbp ? state->extrablk.blkno : -1;
- curdb = -1;
- length = xfs_dir2_data_entsize(args->namelen);
- if ((free = (curbp ? curbp->data : NULL)))
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
- }
- /*
- * For others, it's a data block buffer, get the block number.
- */
- else {
- curfdb = -1;
- curdb = curbp ? state->extrablk.blkno : -1;
+ curfdb = state->extrablk.blkno;
+ free = curbp->data;
+ ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
}
+ length = xfs_dir2_data_entsize(args->namelen);
/*
* Loop over leaf entries with the right hash value.
*/
- for (lep = &leaf->ents[index];
- index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
- lep++, index++) {
+ for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
+ be32_to_cpu(lep->hashval) == args->hashval;
+ lep++, index++) {
/*
* Skip stale leaf entries.
*/
@@ -471,161 +456,244 @@ xfs_dir2_leafn_lookup_int(
* For addname, we're looking for a place to put the new entry.
* We want to use a data block with an entry of equal
* hash value to ours if there is one with room.
+ *
+ * If this block isn't the data block we already have
+ * in hand, take a look at it.
*/
- if (args->addname) {
+ if (newdb != curdb) {
+ curdb = newdb;
/*
- * If this block isn't the data block we already have
- * in hand, take a look at it.
+ * Convert the data block to the free block
+ * holding its freespace information.
*/
- if (newdb != curdb) {
- curdb = newdb;
- /*
- * Convert the data block to the free block
- * holding its freespace information.
- */
- newfdb = xfs_dir2_db_to_fdb(mp, newdb);
- /*
- * If it's not the one we have in hand,
- * read it in.
- */
- if (newfdb != curfdb) {
- /*
- * If we had one before, drop it.
- */
- if (curbp)
- xfs_da_brelse(tp, curbp);
- /*
- * Read the free block.
- */
- if ((error = xfs_da_read_buf(tp, dp,
- xfs_dir2_db_to_da(mp,
- newfdb),
- -1, &curbp,
- XFS_DATA_FORK))) {
- return error;
- }
- free = curbp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) ==
- XFS_DIR2_FREE_MAGIC);
- ASSERT((be32_to_cpu(free->hdr.firstdb) %
- XFS_DIR2_MAX_FREE_BESTS(mp)) ==
- 0);
- ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
- ASSERT(curdb <
- be32_to_cpu(free->hdr.firstdb) +
- be32_to_cpu(free->hdr.nvalid));
- }
- /*
- * Get the index for our entry.
- */
- fi = xfs_dir2_db_to_fdindex(mp, curdb);
- /*
- * If it has room, return it.
- */
- if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
- XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
- XFS_ERRLEVEL_LOW, mp);
- if (curfdb != newfdb)
- xfs_da_brelse(tp, curbp);
- return XFS_ERROR(EFSCORRUPTED);
- }
- curfdb = newfdb;
- if (be16_to_cpu(free->bests[fi]) >= length) {
- *indexp = index;
- state->extravalid = 1;
- state->extrablk.bp = curbp;
- state->extrablk.blkno = curfdb;
- state->extrablk.index = fi;
- state->extrablk.magic =
- XFS_DIR2_FREE_MAGIC;
- ASSERT(args->oknoent);
- return XFS_ERROR(ENOENT);
- }
- }
- }
- /*
- * Not adding a new entry, so we really want to find
- * the name given to us.
- */
- else {
+ newfdb = xfs_dir2_db_to_fdb(mp, newdb);
/*
- * If it's a different data block, go get it.
+ * If it's not the one we have in hand, read it in.
*/
- if (newdb != curdb) {
+ if (newfdb != curfdb) {
/*
- * If we had a block before, drop it.
+ * If we had one before, drop it.
*/
if (curbp)
xfs_da_brelse(tp, curbp);
/*
- * Read the data block.
+ * Read the free block.
*/
- if ((error =
- xfs_da_read_buf(tp, dp,
- xfs_dir2_db_to_da(mp, newdb), -1,
- &curbp, XFS_DATA_FORK))) {
+ error = xfs_da_read_buf(tp, dp,
+ xfs_dir2_db_to_da(mp, newfdb),
+ -1, &curbp, XFS_DATA_FORK);
+ if (error)
return error;
- }
- xfs_dir2_data_check(dp, curbp);
- curdb = newdb;
+ free = curbp->data;
+ ASSERT(be32_to_cpu(free->hdr.magic) ==
+ XFS_DIR2_FREE_MAGIC);
+ ASSERT((be32_to_cpu(free->hdr.firstdb) %
+ XFS_DIR2_MAX_FREE_BESTS(mp)) == 0);
+ ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
+ ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) +
+ be32_to_cpu(free->hdr.nvalid));
}
/*
- * Point to the data entry.
+ * Get the index for our entry.
*/
- dep = (xfs_dir2_data_entry_t *)
- ((char *)curbp->data +
- xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+ fi = xfs_dir2_db_to_fdindex(mp, curdb);
/*
- * Compare the entry, return it if it matches.
+ * If it has room, return it.
*/
- if (dep->namelen == args->namelen &&
- dep->name[0] == args->name[0] &&
- memcmp(dep->name, args->name, args->namelen) == 0) {
- args->inumber = be64_to_cpu(dep->inumber);
- *indexp = index;
- state->extravalid = 1;
- state->extrablk.bp = curbp;
- state->extrablk.blkno = curdb;
- state->extrablk.index =
- (int)((char *)dep -
- (char *)curbp->data);
- state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
- return XFS_ERROR(EEXIST);
+ if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
+ XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
+ XFS_ERRLEVEL_LOW, mp);
+ if (curfdb != newfdb)
+ xfs_da_brelse(tp, curbp);
+ return XFS_ERROR(EFSCORRUPTED);
}
+ curfdb = newfdb;
+ if (be16_to_cpu(free->bests[fi]) >= length)
+ goto out;
}
}
+ /* Didn't find any space */
+ fi = -1;
+out:
+ ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
+ if (curbp) {
+ /* Giving back a free block. */
+ state->extravalid = 1;
+ state->extrablk.bp = curbp;
+ state->extrablk.index = fi;
+ state->extrablk.blkno = curfdb;
+ state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
+ } else {
+ state->extravalid = 0;
+ }
/*
- * Didn't find a match.
- * If we are holding a buffer, give it back in case our caller
- * finds it useful.
+ * Return the index, that will be the insertion point.
*/
- if ((state->extravalid = (curbp != NULL))) {
- state->extrablk.bp = curbp;
- state->extrablk.index = -1;
+ *indexp = index;
+ return XFS_ERROR(ENOENT);
+}
+
+/*
+ * Look up a leaf entry in a node-format leaf block.
+ * The extrablk in state a data block.
+ */
+STATIC int
+xfs_dir2_leafn_lookup_for_entry(
+ xfs_dabuf_t *bp, /* leaf buffer */
+ xfs_da_args_t *args, /* operation arguments */
+ int *indexp, /* out: leaf entry index */
+ xfs_da_state_t *state) /* state to fill in */
+{
+ xfs_dabuf_t *curbp = NULL; /* current data/free buffer */
+ xfs_dir2_db_t curdb = -1; /* current data block number */
+ xfs_dir2_data_entry_t *dep; /* data block entry */
+ xfs_inode_t *dp; /* incore directory inode */
+ int error; /* error return value */
+ int index; /* leaf entry index */
+ xfs_dir2_leaf_t *leaf; /* leaf structure */
+ xfs_dir2_leaf_entry_t *lep; /* leaf entry */
+ xfs_mount_t *mp; /* filesystem mount point */
+ xfs_dir2_db_t newdb; /* new data block number */
+ xfs_trans_t *tp; /* transaction pointer */
+ enum xfs_dacmp cmp; /* comparison result */
+
+ dp = args->dp;
+ tp = args->trans;
+ mp = dp->i_mount;
+ leaf = bp->data;
+ ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+#ifdef __KERNEL__
+ ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
+#endif
+ xfs_dir2_leafn_check(dp, bp);
+ /*
+ * Look up the hash value in the leaf entries.
+ */
+ index = xfs_dir2_leaf_search_hash(args, bp);
+ /*
+ * Do we have a buffer coming in?
+ */
+ if (state->extravalid) {
+ curbp = state->extrablk.bp;
+ curdb = state->extrablk.blkno;
+ }
+ /*
+ * Loop over leaf entries with the right hash value.
+ */
+ for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
+ be32_to_cpu(lep->hashval) == args->hashval;
+ lep++, index++) {
/*
- * For addname, giving back a free block.
+ * Skip stale leaf entries.
*/
- if (args->addname) {
- state->extrablk.blkno = curfdb;
- state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
+ if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
+ continue;
+ /*
+ * Pull the data block number from the entry.
+ */
+ newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
+ /*
+ * Not adding a new entry, so we really want to find
+ * the name given to us.
+ *
+ * If it's a different data block, go get it.
+ */
+ if (newdb != curdb) {
+ /*
+ * If we had a block before that we aren't saving
+ * for a CI name, drop it
+ */
+ if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT ||
+ curdb != state->extrablk.blkno))
+ xfs_da_brelse(tp, curbp);
+ /*
+ * If needing the block that is saved with a CI match,
+ * use it otherwise read in the new data block.
+ */
+ if (args->cmpresult != XFS_CMP_DIFFERENT &&
+ newdb == state->extrablk.blkno) {
+ ASSERT(state->extravalid);
+ curbp = state->extrablk.bp;
+ } else {
+ error = xfs_da_read_buf(tp, dp,
+ xfs_dir2_db_to_da(mp, newdb),
+ -1, &curbp, XFS_DATA_FORK);
+ if (error)
+ return error;
+ }
+ xfs_dir2_data_check(dp, curbp);
+ curdb = newdb;
}
/*
- * For other callers, giving back a data block.
+ * Point to the data entry.
*/
- else {
+ dep = (xfs_dir2_data_entry_t *)((char *)curbp->data +
+ xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+ /*
+ * Compare the entry and if it's an exact match, return
+ * EEXIST immediately. If it's the first case-insensitive
+ * match, store the block & inode number and continue looking.
+ */
+ cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
+ if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+ /* If there is a CI match block, drop it */
+ if (args->cmpresult != XFS_CMP_DIFFERENT &&
+ curdb != state->extrablk.blkno)
+ xfs_da_brelse(tp, state->extrablk.bp);
+ args->cmpresult = cmp;
+ args->inumber = be64_to_cpu(dep->inumber);
+ *indexp = index;
+ state->extravalid = 1;
+ state->extrablk.bp = curbp;
state->extrablk.blkno = curdb;
+ state->extrablk.index = (int)((char *)dep -
+ (char *)curbp->data);
state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+ if (cmp == XFS_CMP_EXACT)
+ return XFS_ERROR(EEXIST);
}
}
- /*
- * Return the final index, that will be the insertion point.
- */
+ ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
+ (args->op_flags & XFS_DA_OP_OKNOENT));
+ if (curbp) {
+ if (args->cmpresult == XFS_CMP_DIFFERENT) {
+ /* Giving back last used data block. */
+ state->extravalid = 1;
+ state->extrablk.bp = curbp;
+ state->extrablk.index = -1;
+ state->extrablk.blkno = curdb;
+ state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+ } else {
+ /* If the curbp is not the CI match block, drop it */
+ if (state->extrablk.bp != curbp)
+ xfs_da_brelse(tp, curbp);
+ }
+ } else {
+ state->extravalid = 0;
+ }
*indexp = index;
- ASSERT(index == be16_to_cpu(leaf->hdr.count) || args->oknoent);
return XFS_ERROR(ENOENT);
}
/*
+ * Look up a leaf entry in a node-format leaf block.
+ * If this is an addname then the extrablk in state is a freespace block,
+ * otherwise it's a data block.
+ */
+int
+xfs_dir2_leafn_lookup_int(
+ xfs_dabuf_t *bp, /* leaf buffer */
+ xfs_da_args_t *args, /* operation arguments */
+ int *indexp, /* out: leaf entry index */
+ xfs_da_state_t *state) /* state to fill in */
+{
+ if (args->op_flags & XFS_DA_OP_ADDNAME)
+ return xfs_dir2_leafn_lookup_for_addname(bp, args, indexp,
+ state);
+ return xfs_dir2_leafn_lookup_for_entry(bp, args, indexp, state);
+}
+
+/*
* Move count leaf entries from source to destination leaf.
* Log entries and headers. Stale entries are preserved.
*/
@@ -823,9 +891,10 @@ xfs_dir2_leafn_rebalance(
*/
if (!state->inleaf)
blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count);
-
- /*
- * Finally sanity check just to make sure we are not returning a negative index
+
+ /*
+ * Finally sanity check just to make sure we are not returning a
+ * negative index
*/
if(blk2->index < 0) {
state->inleaf = 1;
@@ -1332,7 +1401,7 @@ xfs_dir2_node_addname(
/*
* It worked, fix the hash values up the btree.
*/
- if (!args->justcheck)
+ if (!(args->op_flags & XFS_DA_OP_JUSTCHECK))
xfs_da_fixhashpath(state, &state->path);
} else {
/*
@@ -1515,7 +1584,8 @@ xfs_dir2_node_addname_int(
/*
* Not allowed to allocate, return failure.
*/
- if (args->justcheck || args->total == 0) {
+ if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
+ args->total == 0) {
/*
* Drop the freespace buffer unless it came from our
* caller.
@@ -1661,7 +1731,7 @@ xfs_dir2_node_addname_int(
/*
* If just checking, we succeeded.
*/
- if (args->justcheck) {
+ if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
xfs_da_buf_done(fbp);
return 0;
@@ -1767,6 +1837,14 @@ xfs_dir2_node_lookup(
error = xfs_da_node_lookup_int(state, &rval);
if (error)
rval = error;
+ else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) {
+ /* If a CI match, dup the actual name and return EEXIST */
+ xfs_dir2_data_entry_t *dep;
+
+ dep = (xfs_dir2_data_entry_t *)((char *)state->extrablk.bp->
+ data + state->extrablk.index);
+ rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
+ }
/*
* Release the btree blocks and leaf block.
*/
@@ -1810,9 +1888,8 @@ xfs_dir2_node_removename(
* Look up the entry we're deleting, set up the cursor.
*/
error = xfs_da_node_lookup_int(state, &rval);
- if (error) {
+ if (error)
rval = error;
- }
/*
* Didn't find it, upper layer screwed up.
*/
@@ -1829,9 +1906,8 @@ xfs_dir2_node_removename(
*/
error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
&state->extrablk, &rval);
- if (error) {
+ if (error)
return error;
- }
/*
* Fix the hash values up the btree.
*/
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 919d275a1ce..b46af0013ec 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -255,7 +255,7 @@ xfs_dir2_block_to_sf(
xfs_dir2_sf_check(args);
out:
xfs_trans_log_inode(args->trans, dp, logflags);
- kmem_free(block, mp->m_dirblksize);
+ kmem_free(block);
return error;
}
@@ -332,7 +332,7 @@ xfs_dir2_sf_addname(
/*
* Just checking or no space reservation, it doesn't fit.
*/
- if (args->justcheck || args->total == 0)
+ if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
return XFS_ERROR(ENOSPC);
/*
* Convert to block form then add the name.
@@ -345,7 +345,7 @@ xfs_dir2_sf_addname(
/*
* Just checking, it fits.
*/
- if (args->justcheck)
+ if (args->op_flags & XFS_DA_OP_JUSTCHECK)
return 0;
/*
* Do it the easy way - just add it at the end.
@@ -512,7 +512,7 @@ xfs_dir2_sf_addname_hard(
sfep = xfs_dir2_sf_nextentry(sfp, sfep);
memcpy(sfep, oldsfep, old_isize - nbytes);
}
- kmem_free(buf, old_isize);
+ kmem_free(buf);
dp->i_d.di_size = new_isize;
xfs_dir2_sf_check(args);
}
@@ -812,8 +812,11 @@ xfs_dir2_sf_lookup(
{
xfs_inode_t *dp; /* incore directory inode */
int i; /* entry index */
+ int error;
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
xfs_dir2_sf_t *sfp; /* shortform structure */
+ enum xfs_dacmp cmp; /* comparison result */
+ xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */
xfs_dir2_trace_args("sf_lookup", args);
xfs_dir2_sf_check(args);
@@ -836,6 +839,7 @@ xfs_dir2_sf_lookup(
*/
if (args->namelen == 1 && args->name[0] == '.') {
args->inumber = dp->i_ino;
+ args->cmpresult = XFS_CMP_EXACT;
return XFS_ERROR(EEXIST);
}
/*
@@ -844,28 +848,41 @@ xfs_dir2_sf_lookup(
if (args->namelen == 2 &&
args->name[0] == '.' && args->name[1] == '.') {
args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+ args->cmpresult = XFS_CMP_EXACT;
return XFS_ERROR(EEXIST);
}
/*
* Loop over all the entries trying to match ours.
*/
- for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
- i < sfp->hdr.count;
- i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
- if (sfep->namelen == args->namelen &&
- sfep->name[0] == args->name[0] &&
- memcmp(args->name, sfep->name, args->namelen) == 0) {
- args->inumber =
- xfs_dir2_sf_get_inumber(sfp,
- xfs_dir2_sf_inumberp(sfep));
- return XFS_ERROR(EEXIST);
+ ci_sfep = NULL;
+ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
+ i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+ /*
+ * Compare name and if it's an exact match, return the inode
+ * number. If it's the first case-insensitive match, store the
+ * inode number and continue looking for an exact match.
+ */
+ cmp = dp->i_mount->m_dirnameops->compname(args, sfep->name,
+ sfep->namelen);
+ if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+ args->cmpresult = cmp;
+ args->inumber = xfs_dir2_sf_get_inumber(sfp,
+ xfs_dir2_sf_inumberp(sfep));
+ if (cmp == XFS_CMP_EXACT)
+ return XFS_ERROR(EEXIST);
+ ci_sfep = sfep;
}
}
+ ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
/*
- * Didn't find it.
+ * Here, we can only be doing a lookup (not a rename or replace).
+ * If a case-insensitive match was not found, return ENOENT.
*/
- ASSERT(args->oknoent);
- return XFS_ERROR(ENOENT);
+ if (!ci_sfep)
+ return XFS_ERROR(ENOENT);
+ /* otherwise process the CI match as required by the caller */
+ error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen);
+ return XFS_ERROR(error);
}
/*
@@ -904,24 +921,21 @@ xfs_dir2_sf_removename(
* Loop over the old directory entries.
* Find the one we're deleting.
*/
- for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
- i < sfp->hdr.count;
- i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
- if (sfep->namelen == args->namelen &&
- sfep->name[0] == args->name[0] &&
- memcmp(sfep->name, args->name, args->namelen) == 0) {
+ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
+ i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+ if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
+ XFS_CMP_EXACT) {
ASSERT(xfs_dir2_sf_get_inumber(sfp,
- xfs_dir2_sf_inumberp(sfep)) ==
- args->inumber);
+ xfs_dir2_sf_inumberp(sfep)) ==
+ args->inumber);
break;
}
}
/*
* Didn't find it.
*/
- if (i == sfp->hdr.count) {
+ if (i == sfp->hdr.count)
return XFS_ERROR(ENOENT);
- }
/*
* Calculate sizes.
*/
@@ -1042,11 +1056,10 @@ xfs_dir2_sf_replace(
*/
else {
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
- i < sfp->hdr.count;
- i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
- if (sfep->namelen == args->namelen &&
- sfep->name[0] == args->name[0] &&
- memcmp(args->name, sfep->name, args->namelen) == 0) {
+ i < sfp->hdr.count;
+ i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+ if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
+ XFS_CMP_EXACT) {
#if XFS_BIG_INUMS || defined(DEBUG)
ino = xfs_dir2_sf_get_inumber(sfp,
xfs_dir2_sf_inumberp(sfep));
@@ -1061,7 +1074,7 @@ xfs_dir2_sf_replace(
* Didn't find it.
*/
if (i == sfp->hdr.count) {
- ASSERT(args->oknoent);
+ ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
#if XFS_BIG_INUMS
if (i8elevated)
xfs_dir2_sf_toino4(args);
@@ -1174,7 +1187,7 @@ xfs_dir2_sf_toino4(
/*
* Clean up the inode.
*/
- kmem_free(buf, oldsize);
+ kmem_free(buf);
dp->i_d.di_size = newsize;
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
}
@@ -1251,7 +1264,7 @@ xfs_dir2_sf_toino8(
/*
* Clean up the inode.
*/
- kmem_free(buf, oldsize);
+ kmem_free(buf);
dp->i_d.di_size = newsize;
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
}
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
index 005629d702d..deecc9d238f 100644
--- a/fs/xfs/xfs_dir2_sf.h
+++ b/fs/xfs/xfs_dir2_sf.h
@@ -62,7 +62,7 @@ typedef union {
* Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
* Only need 16 bits, this is the byte offset into the single block form.
*/
-typedef struct { __uint8_t i[2]; } xfs_dir2_sf_off_t;
+typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
/*
* The parent directory has a dedicated field, and the self-pointer must
@@ -76,14 +76,14 @@ typedef struct xfs_dir2_sf_hdr {
__uint8_t count; /* count of entries */
__uint8_t i8count; /* count of 8-byte inode #s */
xfs_dir2_inou_t parent; /* parent dir inode number */
-} xfs_dir2_sf_hdr_t;
+} __arch_pack xfs_dir2_sf_hdr_t;
typedef struct xfs_dir2_sf_entry {
__uint8_t namelen; /* actual name length */
xfs_dir2_sf_off_t offset; /* saved offset */
__uint8_t name[1]; /* name, variable size */
xfs_dir2_inou_t inumber; /* inode number, var. offset */
-} xfs_dir2_sf_entry_t;
+} __arch_pack xfs_dir2_sf_entry_t;
typedef struct xfs_dir2_sf {
xfs_dir2_sf_hdr_t hdr; /* shortform header */
diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c
index f3fb2ffd6f5..6cc7c0c681a 100644
--- a/fs/xfs/xfs_dir2_trace.c
+++ b/fs/xfs/xfs_dir2_trace.c
@@ -85,7 +85,8 @@ xfs_dir2_trace_args(
(void *)((unsigned long)(args->inumber >> 32)),
(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
(void *)args->dp, (void *)args->trans,
- (void *)(unsigned long)args->justcheck, NULL, NULL);
+ (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
+ NULL, NULL);
}
void
@@ -100,7 +101,7 @@ xfs_dir2_trace_args_b(
(void *)((unsigned long)(args->inumber >> 32)),
(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
(void *)args->dp, (void *)args->trans,
- (void *)(unsigned long)args->justcheck,
+ (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
(void *)(bp ? bp->bps[0] : NULL), NULL);
}
@@ -117,7 +118,7 @@ xfs_dir2_trace_args_bb(
(void *)((unsigned long)(args->inumber >> 32)),
(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
(void *)args->dp, (void *)args->trans,
- (void *)(unsigned long)args->justcheck,
+ (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
(void *)(lbp ? lbp->bps[0] : NULL),
(void *)(dbp ? dbp->bps[0] : NULL));
}
@@ -157,8 +158,8 @@ xfs_dir2_trace_args_db(
(void *)((unsigned long)(args->inumber >> 32)),
(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
(void *)args->dp, (void *)args->trans,
- (void *)(unsigned long)args->justcheck, (void *)(long)db,
- (void *)dbp);
+ (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
+ (void *)(long)db, (void *)dbp);
}
void
@@ -173,7 +174,7 @@ xfs_dir2_trace_args_i(
(void *)((unsigned long)(args->inumber >> 32)),
(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
(void *)args->dp, (void *)args->trans,
- (void *)(unsigned long)args->justcheck,
+ (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
(void *)((unsigned long)(i >> 32)),
(void *)((unsigned long)(i & 0xFFFFFFFF)));
}
@@ -190,7 +191,8 @@ xfs_dir2_trace_args_s(
(void *)((unsigned long)(args->inumber >> 32)),
(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
(void *)args->dp, (void *)args->trans,
- (void *)(unsigned long)args->justcheck, (void *)(long)s, NULL);
+ (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
+ (void *)(long)s, NULL);
}
void
@@ -208,7 +210,7 @@ xfs_dir2_trace_args_sb(
(void *)((unsigned long)(args->inumber >> 32)),
(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
(void *)args->dp, (void *)args->trans,
- (void *)(unsigned long)args->justcheck, (void *)(long)s,
- (void *)dbp);
+ (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
+ (void *)(long)s, (void *)dbp);
}
#endif /* XFS_DIR2_TRACE */
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index f71784ab6a6..cdc2d3464a1 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -166,6 +166,6 @@ typedef enum {
#define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \
DM_FLAGS_NDELAY : 0)
-#define AT_DELAY_FLAG(f) ((f&ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
+#define AT_DELAY_FLAG(f) ((f & XFS_ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
#endif /* __XFS_DMAPI_H__ */
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 05e5365d3c3..f66756cfb5e 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -66,14 +66,6 @@ int xfs_etest[XFS_NUM_INJECT_ERROR];
int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
-void
-xfs_error_test_init(void)
-{
- memset(xfs_etest, 0, sizeof(xfs_etest));
- memset(xfs_etest_fsid, 0, sizeof(xfs_etest_fsid));
- memset(xfs_etest_fsname, 0, sizeof(xfs_etest_fsname));
-}
-
int
xfs_error_test(int error_tag, int *fsidp, char *expression,
int line, char *file, unsigned long randfactor)
@@ -150,8 +142,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
xfs_etest[i]);
xfs_etest[i] = 0;
xfs_etest_fsid[i] = 0LL;
- kmem_free(xfs_etest_fsname[i],
- strlen(xfs_etest_fsname[i]) + 1);
+ kmem_free(xfs_etest_fsname[i]);
xfs_etest_fsname[i] = NULL;
}
}
@@ -175,7 +166,7 @@ xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap)
newfmt = kmem_alloc(len, KM_SLEEP);
sprintf(newfmt, "Filesystem \"%s\": %s", mp->m_fsname, fmt);
icmn_err(level, newfmt, ap);
- kmem_free(newfmt, len);
+ kmem_free(newfmt);
} else {
icmn_err(level, fmt, ap);
}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 6490d2a9f8e..d8559d132ef 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -127,7 +127,6 @@ extern void xfs_corruption_error(char *tag, int level, struct xfs_mount *mp,
#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
-extern void xfs_error_test_init(void);
#define XFS_NUM_INJECT_ERROR 10
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 132bd07b9bb..8aa28f751b2 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -41,8 +41,7 @@ xfs_efi_item_free(xfs_efi_log_item_t *efip)
int nexts = efip->efi_format.efi_nextents;
if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
- kmem_free(efip, sizeof(xfs_efi_log_item_t) +
- (nexts - 1) * sizeof(xfs_extent_t));
+ kmem_free(efip);
} else {
kmem_zone_free(xfs_efi_zone, efip);
}
@@ -374,8 +373,7 @@ xfs_efd_item_free(xfs_efd_log_item_t *efdp)
int nexts = efdp->efd_format.efd_nextents;
if (nexts > XFS_EFD_MAX_FAST_EXTENTS) {
- kmem_free(efdp, sizeof(xfs_efd_log_item_t) +
- (nexts - 1) * sizeof(xfs_extent_t));
+ kmem_free(efdp);
} else {
kmem_zone_free(xfs_efd_zone, efdp);
}
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 3f3785b1080..c38fd14fca2 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -397,10 +397,12 @@ int
xfs_filestream_init(void)
{
item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
+ if (!item_zone)
+ return -ENOMEM;
#ifdef XFS_FILESTREAMS_TRACE
xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP);
#endif
- return item_zone ? 0 : -ENOMEM;
+ return 0;
}
/*
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 3bed6433d05..01c0cc88d3f 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -239,6 +239,7 @@ typedef struct xfs_fsop_resblks {
#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */
#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */
#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */
+#define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */
#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */
@@ -371,6 +372,9 @@ typedef struct xfs_fsop_attrlist_handlereq {
typedef struct xfs_attr_multiop {
__u32 am_opcode;
+#define ATTR_OP_GET 1 /* return the indicated attr's value */
+#define ATTR_OP_SET 2 /* set/create the indicated attr/value pair */
+#define ATTR_OP_REMOVE 3 /* remove the indicated attr */
__s32 am_error;
void __user *am_attrname;
void __user *am_attrvalue;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 381ebda4f7b..84583cf73db 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -95,6 +95,8 @@ xfs_fs_geometry(
XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
(xfs_sb_version_hassector(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
+ (xfs_sb_version_hasasciici(&mp->m_sb) ?
+ XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) |
(xfs_sb_version_haslazysbcount(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
(xfs_sb_version_hasattr2(&mp->m_sb) ?
@@ -625,7 +627,7 @@ xfs_fs_goingdown(
xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
thaw_bdev(sb->s_bdev, sb);
}
-
+
break;
}
case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index e569bf5d6cf..bedc6616317 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1763,67 +1763,6 @@ xfs_itruncate_finish(
return 0;
}
-
-/*
- * xfs_igrow_start
- *
- * Do the first part of growing a file: zero any data in the last
- * block that is beyond the old EOF. We need to do this before
- * the inode is joined to the transaction to modify the i_size.
- * That way we can drop the inode lock and call into the buffer
- * cache to get the buffer mapping the EOF.
- */
-int
-xfs_igrow_start(
- xfs_inode_t *ip,
- xfs_fsize_t new_size,
- cred_t *credp)
-{
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
- ASSERT(new_size > ip->i_size);
-
- /*
- * Zero any pages that may have been created by
- * xfs_write_file() beyond the end of the file
- * and any blocks between the old and new file sizes.
- */
- return xfs_zero_eof(ip, new_size, ip->i_size);
-}
-
-/*
- * xfs_igrow_finish
- *
- * This routine is called to extend the size of a file.
- * The inode must have both the iolock and the ilock locked
- * for update and it must be a part of the current transaction.
- * The xfs_igrow_start() function must have been called previously.
- * If the change_flag is not zero, the inode change timestamp will
- * be updated.
- */
-void
-xfs_igrow_finish(
- xfs_trans_t *tp,
- xfs_inode_t *ip,
- xfs_fsize_t new_size,
- int change_flag)
-{
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
- ASSERT(ip->i_transp == tp);
- ASSERT(new_size > ip->i_size);
-
- /*
- * Update the file size. Update the inode change timestamp
- * if change_flag set.
- */
- ip->i_d.di_size = new_size;
- ip->i_size = new_size;
- if (change_flag)
- xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-}
-
-
/*
* This is called when the inode's link count goes to 0.
* We place the on-disk inode on a list in the AGI. It
@@ -2258,7 +2197,7 @@ xfs_ifree_cluster(
xfs_trans_binval(tp, bp);
}
- kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *));
+ kmem_free(ip_found);
xfs_put_perag(mp, pag);
}
@@ -2470,7 +2409,7 @@ xfs_iroot_realloc(
(int)new_size);
memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
}
- kmem_free(ifp->if_broot, ifp->if_broot_bytes);
+ kmem_free(ifp->if_broot);
ifp->if_broot = new_broot;
ifp->if_broot_bytes = (int)new_size;
ASSERT(ifp->if_broot_bytes <=
@@ -2514,7 +2453,7 @@ xfs_idata_realloc(
if (new_size == 0) {
if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
- kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+ kmem_free(ifp->if_u1.if_data);
}
ifp->if_u1.if_data = NULL;
real_size = 0;
@@ -2529,7 +2468,7 @@ xfs_idata_realloc(
ASSERT(ifp->if_real_bytes != 0);
memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
new_size);
- kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+ kmem_free(ifp->if_u1.if_data);
ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
}
real_size = 0;
@@ -2636,7 +2575,7 @@ xfs_idestroy_fork(
ifp = XFS_IFORK_PTR(ip, whichfork);
if (ifp->if_broot != NULL) {
- kmem_free(ifp->if_broot, ifp->if_broot_bytes);
+ kmem_free(ifp->if_broot);
ifp->if_broot = NULL;
}
@@ -2650,7 +2589,7 @@ xfs_idestroy_fork(
if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
(ifp->if_u1.if_data != NULL)) {
ASSERT(ifp->if_real_bytes != 0);
- kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+ kmem_free(ifp->if_u1.if_data);
ifp->if_u1.if_data = NULL;
ifp->if_real_bytes = 0;
}
@@ -3058,7 +2997,7 @@ xfs_iflush_cluster(
out_free:
read_unlock(&pag->pag_ici_lock);
- kmem_free(ilist, ilist_size);
+ kmem_free(ilist);
return 0;
@@ -3102,7 +3041,7 @@ cluster_corrupt_out:
* Unlocks the flush lock
*/
xfs_iflush_abort(iq);
- kmem_free(ilist, ilist_size);
+ kmem_free(ilist);
return XFS_ERROR(EFSCORRUPTED);
}
@@ -3143,8 +3082,6 @@ xfs_iflush(
* flush lock and do nothing.
*/
if (xfs_inode_clean(ip)) {
- ASSERT((iip != NULL) ?
- !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1);
xfs_ifunlock(ip);
return 0;
}
@@ -3836,7 +3773,7 @@ xfs_iext_add_indirect_multi(
erp = xfs_iext_irec_new(ifp, erp_idx);
}
memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
- kmem_free(nex2_ep, byte_diff);
+ kmem_free(nex2_ep);
erp->er_extcount += nex2;
xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
}
@@ -4112,7 +4049,7 @@ xfs_iext_direct_to_inline(
*/
memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
nextents * sizeof(xfs_bmbt_rec_t));
- kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
+ kmem_free(ifp->if_u1.if_extents);
ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
ifp->if_real_bytes = 0;
}
@@ -4186,7 +4123,7 @@ xfs_iext_indirect_to_direct(
ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
ep = ifp->if_u1.if_ext_irec->er_extbuf;
- kmem_free(ifp->if_u1.if_ext_irec, sizeof(xfs_ext_irec_t));
+ kmem_free(ifp->if_u1.if_ext_irec);
ifp->if_flags &= ~XFS_IFEXTIREC;
ifp->if_u1.if_extents = ep;
ifp->if_bytes = size;
@@ -4212,7 +4149,7 @@ xfs_iext_destroy(
}
ifp->if_flags &= ~XFS_IFEXTIREC;
} else if (ifp->if_real_bytes) {
- kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
+ kmem_free(ifp->if_u1.if_extents);
} else if (ifp->if_bytes) {
memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
sizeof(xfs_bmbt_rec_t));
@@ -4483,7 +4420,7 @@ xfs_iext_irec_remove(
if (erp->er_extbuf) {
xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
-erp->er_extcount);
- kmem_free(erp->er_extbuf, XFS_IEXT_BUFSZ);
+ kmem_free(erp->er_extbuf);
}
/* Compact extent records */
erp = ifp->if_u1.if_ext_irec;
@@ -4501,8 +4438,7 @@ xfs_iext_irec_remove(
xfs_iext_realloc_indirect(ifp,
nlists * sizeof(xfs_ext_irec_t));
} else {
- kmem_free(ifp->if_u1.if_ext_irec,
- sizeof(xfs_ext_irec_t));
+ kmem_free(ifp->if_u1.if_ext_irec);
}
ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
}
@@ -4571,7 +4507,7 @@ xfs_iext_irec_compact_pages(
* so er_extoffs don't get modified in
* xfs_iext_irec_remove.
*/
- kmem_free(erp_next->er_extbuf, XFS_IEXT_BUFSZ);
+ kmem_free(erp_next->er_extbuf);
erp_next->er_extbuf = NULL;
xfs_iext_irec_remove(ifp, erp_idx + 1);
nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
@@ -4596,40 +4532,63 @@ xfs_iext_irec_compact_full(
int nlists; /* number of irec's (ex lists) */
ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+
nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
erp = ifp->if_u1.if_ext_irec;
ep = &erp->er_extbuf[erp->er_extcount];
erp_next = erp + 1;
ep_next = erp_next->er_extbuf;
+
while (erp_idx < nlists - 1) {
+ /*
+ * Check how many extent records are available in this irec.
+ * If there is none skip the whole exercise.
+ */
ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
- ext_diff = MIN(ext_avail, erp_next->er_extcount);
- memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
- erp->er_extcount += ext_diff;
- erp_next->er_extcount -= ext_diff;
- /* Remove next page */
- if (erp_next->er_extcount == 0) {
+ if (ext_avail) {
+
/*
- * Free page before removing extent record
- * so er_extoffs don't get modified in
- * xfs_iext_irec_remove.
+ * Copy over as many as possible extent records into
+ * the previous page.
*/
- kmem_free(erp_next->er_extbuf,
- erp_next->er_extcount * sizeof(xfs_bmbt_rec_t));
- erp_next->er_extbuf = NULL;
- xfs_iext_irec_remove(ifp, erp_idx + 1);
- erp = &ifp->if_u1.if_ext_irec[erp_idx];
- nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
- /* Update next page */
- } else {
- /* Move rest of page up to become next new page */
- memmove(erp_next->er_extbuf, ep_next,
- erp_next->er_extcount * sizeof(xfs_bmbt_rec_t));
- ep_next = erp_next->er_extbuf;
- memset(&ep_next[erp_next->er_extcount], 0,
- (XFS_LINEAR_EXTS - erp_next->er_extcount) *
- sizeof(xfs_bmbt_rec_t));
+ ext_diff = MIN(ext_avail, erp_next->er_extcount);
+ memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
+ erp->er_extcount += ext_diff;
+ erp_next->er_extcount -= ext_diff;
+
+ /*
+ * If the next irec is empty now we can simply
+ * remove it.
+ */
+ if (erp_next->er_extcount == 0) {
+ /*
+ * Free page before removing extent record
+ * so er_extoffs don't get modified in
+ * xfs_iext_irec_remove.
+ */
+ kmem_free(erp_next->er_extbuf);
+ erp_next->er_extbuf = NULL;
+ xfs_iext_irec_remove(ifp, erp_idx + 1);
+ erp = &ifp->if_u1.if_ext_irec[erp_idx];
+ nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+
+ /*
+ * If the next irec is not empty move up the content
+ * that has not been copied to the previous page to
+ * the beggining of this one.
+ */
+ } else {
+ memmove(erp_next->er_extbuf, &ep_next[ext_diff],
+ erp_next->er_extcount *
+ sizeof(xfs_bmbt_rec_t));
+ ep_next = erp_next->er_extbuf;
+ memset(&ep_next[erp_next->er_extcount], 0,
+ (XFS_LINEAR_EXTS -
+ erp_next->er_extcount) *
+ sizeof(xfs_bmbt_rec_t));
+ }
}
+
if (erp->er_extcount == XFS_LINEAR_EXTS) {
erp_idx++;
if (erp_idx < nlists)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 0a999fee4f0..17a04b6321e 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -507,9 +507,6 @@ int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
xfs_fsize_t, int, int);
int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
-int xfs_igrow_start(xfs_inode_t *, xfs_fsize_t, struct cred *);
-void xfs_igrow_finish(struct xfs_trans *, xfs_inode_t *,
- xfs_fsize_t, int);
void xfs_idestroy_fork(xfs_inode_t *, int);
void xfs_idestroy(xfs_inode_t *);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 167b33f1577..0eee08a32c2 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -686,7 +686,7 @@ xfs_inode_item_unlock(
ASSERT(ip->i_d.di_nextents > 0);
ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT);
ASSERT(ip->i_df.if_bytes > 0);
- kmem_free(iip->ili_extents_buf, ip->i_df.if_bytes);
+ kmem_free(iip->ili_extents_buf);
iip->ili_extents_buf = NULL;
}
if (iip->ili_aextents_buf != NULL) {
@@ -694,7 +694,7 @@ xfs_inode_item_unlock(
ASSERT(ip->i_d.di_anextents > 0);
ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT);
ASSERT(ip->i_afp->if_bytes > 0);
- kmem_free(iip->ili_aextents_buf, ip->i_afp->if_bytes);
+ kmem_free(iip->ili_aextents_buf);
iip->ili_aextents_buf = NULL;
}
@@ -957,8 +957,7 @@ xfs_inode_item_destroy(
{
#ifdef XFS_TRANS_DEBUG
if (ip->i_itemp->ili_root_size != 0) {
- kmem_free(ip->i_itemp->ili_orig_root,
- ip->i_itemp->ili_root_size);
+ kmem_free(ip->i_itemp->ili_orig_root);
}
#endif
kmem_zone_free(xfs_ili_zone, ip->i_itemp);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 7edcde691d1..67f22b2b44b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -889,6 +889,16 @@ xfs_iomap_write_unwritten(
count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
+ /*
+ * Reserve enough blocks in this transaction for two complete extent
+ * btree splits. We may be converting the middle part of an unwritten
+ * extent and in this case we will insert two new extents in the btree
+ * each of which could cause a full split.
+ *
+ * This reservation amount will be used in the first call to
+ * xfs_bmbt_split() to select an AG with enough space to satisfy the
+ * rest of the operation.
+ */
resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
do {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 419de15aeb4..9a3ef9dcaeb 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -257,7 +257,7 @@ xfs_bulkstat_one(
*ubused = error;
out_free:
- kmem_free(buf, sizeof(*buf));
+ kmem_free(buf);
return error;
}
@@ -708,7 +708,7 @@ xfs_bulkstat(
/*
* Done, we're either out of filesystem or space to put the data.
*/
- kmem_free(irbuf, irbsize);
+ kmem_free(irbuf);
*ubcountp = ubelem;
/*
* Found some inodes, return them now and return the error next time.
@@ -914,7 +914,7 @@ xfs_inumbers(
}
*lastino = XFS_AGINO_TO_INO(mp, agno, agino);
}
- kmem_free(buffer, bcount * sizeof(*buffer));
+ kmem_free(buffer);
if (cur)
xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR :
XFS_BTREE_NOERROR));
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ad3d26ddfe3..91b00a5686c 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -226,20 +226,24 @@ xlog_grant_sub_space(struct log *log, int bytes)
static void
xlog_grant_add_space_write(struct log *log, int bytes)
{
- log->l_grant_write_bytes += bytes;
- if (log->l_grant_write_bytes > log->l_logsize) {
- log->l_grant_write_bytes -= log->l_logsize;
+ int tmp = log->l_logsize - log->l_grant_write_bytes;
+ if (tmp > bytes)
+ log->l_grant_write_bytes += bytes;
+ else {
log->l_grant_write_cycle++;
+ log->l_grant_write_bytes = bytes - tmp;
}
}
static void
xlog_grant_add_space_reserve(struct log *log, int bytes)
{
- log->l_grant_reserve_bytes += bytes;
- if (log->l_grant_reserve_bytes > log->l_logsize) {
- log->l_grant_reserve_bytes -= log->l_logsize;
+ int tmp = log->l_logsize - log->l_grant_reserve_bytes;
+ if (tmp > bytes)
+ log->l_grant_reserve_bytes += bytes;
+ else {
log->l_grant_reserve_cycle++;
+ log->l_grant_reserve_bytes = bytes - tmp;
}
}
@@ -1228,7 +1232,7 @@ xlog_alloc_log(xfs_mount_t *mp,
spin_lock_init(&log->l_icloglock);
spin_lock_init(&log->l_grant_lock);
- initnsema(&log->l_flushsema, 0, "ic-flush");
+ sv_init(&log->l_flush_wait, 0, "flush_wait");
/* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1570,10 +1574,9 @@ xlog_dealloc_log(xlog_t *log)
}
#endif
next_iclog = iclog->ic_next;
- kmem_free(iclog, sizeof(xlog_in_core_t));
+ kmem_free(iclog);
iclog = next_iclog;
}
- freesema(&log->l_flushsema);
spinlock_destroy(&log->l_icloglock);
spinlock_destroy(&log->l_grant_lock);
@@ -1587,7 +1590,7 @@ xlog_dealloc_log(xlog_t *log)
}
#endif
log->l_mp->m_log = NULL;
- kmem_free(log, sizeof(xlog_t));
+ kmem_free(log);
} /* xlog_dealloc_log */
/*
@@ -2097,6 +2100,7 @@ xlog_state_do_callback(
int funcdidcallbacks; /* flag: function did callbacks */
int repeats; /* for issuing console warnings if
* looping too many times */
+ int wake = 0;
spin_lock(&log->l_icloglock);
first_iclog = iclog = log->l_iclog;
@@ -2278,15 +2282,13 @@ xlog_state_do_callback(
}
#endif
- flushcnt = 0;
- if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) {
- flushcnt = log->l_flushcnt;
- log->l_flushcnt = 0;
- }
+ if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
+ wake = 1;
spin_unlock(&log->l_icloglock);
- while (flushcnt--)
- vsema(&log->l_flushsema);
-} /* xlog_state_do_callback */
+
+ if (wake)
+ sv_broadcast(&log->l_flush_wait);
+}
/*
@@ -2384,16 +2386,15 @@ restart:
}
iclog = log->l_iclog;
- if (! (iclog->ic_state == XLOG_STATE_ACTIVE)) {
- log->l_flushcnt++;
- spin_unlock(&log->l_icloglock);
+ if (iclog->ic_state != XLOG_STATE_ACTIVE) {
xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH);
XFS_STATS_INC(xs_log_noiclogs);
- /* Ensure that log writes happen */
- psema(&log->l_flushsema, PINOD);
+
+ /* Wait for log writes to have flushed */
+ sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0);
goto restart;
}
- ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
+
head = &iclog->ic_header;
atomic_inc(&iclog->ic_refcnt); /* prevents sync */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 8952a392b5f..6245913196b 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -423,10 +423,8 @@ typedef struct log {
int l_logBBsize; /* size of log in BB chunks */
/* The following block of fields are changed while holding icloglock */
- sema_t l_flushsema ____cacheline_aligned_in_smp;
- /* iclog flushing semaphore */
- int l_flushcnt; /* # of procs waiting on this
- * sema */
+ sv_t l_flush_wait ____cacheline_aligned_in_smp;
+ /* waiting for iclog flush */
int l_covered_state;/* state of "covering disk
* log entries" */
xlog_in_core_t *l_iclog; /* head log queue */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index e65ab4af095..9eb722ec744 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1715,8 +1715,7 @@ xlog_check_buffer_cancelled(
} else {
prevp->bc_next = bcp->bc_next;
}
- kmem_free(bcp,
- sizeof(xfs_buf_cancel_t));
+ kmem_free(bcp);
}
}
return 1;
@@ -2519,7 +2518,7 @@ write_inode_buffer:
error:
if (need_free)
- kmem_free(in_f, sizeof(*in_f));
+ kmem_free(in_f);
return XFS_ERROR(error);
}
@@ -2830,16 +2829,14 @@ xlog_recover_free_trans(
item = item->ri_next;
/* Free the regions in the item. */
for (i = 0; i < free_item->ri_cnt; i++) {
- kmem_free(free_item->ri_buf[i].i_addr,
- free_item->ri_buf[i].i_len);
+ kmem_free(free_item->ri_buf[i].i_addr);
}
/* Free the item itself */
- kmem_free(free_item->ri_buf,
- (free_item->ri_total * sizeof(xfs_log_iovec_t)));
- kmem_free(free_item, sizeof(xlog_recover_item_t));
+ kmem_free(free_item->ri_buf);
+ kmem_free(free_item);
} while (first_item != item);
/* Free the transaction recover structure */
- kmem_free(trans, sizeof(xlog_recover_t));
+ kmem_free(trans);
}
STATIC int
@@ -3786,8 +3783,7 @@ xlog_do_log_recovery(
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
XLOG_RECOVER_PASS1);
if (error != 0) {
- kmem_free(log->l_buf_cancel_table,
- XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
+ kmem_free(log->l_buf_cancel_table);
log->l_buf_cancel_table = NULL;
return error;
}
@@ -3806,8 +3802,7 @@ xlog_do_log_recovery(
}
#endif /* DEBUG */
- kmem_free(log->l_buf_cancel_table,
- XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
+ kmem_free(log->l_buf_cancel_table);
log->l_buf_cancel_table = NULL;
return error;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da3988453b7..6c5d1325e7f 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -47,12 +47,10 @@
STATIC int xfs_mount_log_sb(xfs_mount_t *, __int64_t);
STATIC int xfs_uuid_mount(xfs_mount_t *);
-STATIC void xfs_uuid_unmount(xfs_mount_t *mp);
STATIC void xfs_unmountfs_wait(xfs_mount_t *);
#ifdef HAVE_PERCPU_SB
-STATIC void xfs_icsb_destroy_counters(xfs_mount_t *);
STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
int);
STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
@@ -63,7 +61,6 @@ STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
#else
-#define xfs_icsb_destroy_counters(mp) do { } while (0)
#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
@@ -126,33 +123,11 @@ static const struct {
};
/*
- * Return a pointer to an initialized xfs_mount structure.
- */
-xfs_mount_t *
-xfs_mount_init(void)
-{
- xfs_mount_t *mp;
-
- mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP);
-
- if (xfs_icsb_init_counters(mp)) {
- mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
- }
-
- spin_lock_init(&mp->m_sb_lock);
- mutex_init(&mp->m_ilock);
- mutex_init(&mp->m_growlock);
- atomic_set(&mp->m_active_trans, 0);
-
- return mp;
-}
-
-/*
* Free up the resources associated with a mount structure. Assume that
* the structure was initially zeroed, so we can tell which fields got
* initialized.
*/
-void
+STATIC void
xfs_mount_free(
xfs_mount_t *mp)
{
@@ -161,11 +136,8 @@ xfs_mount_free(
for (agno = 0; agno < mp->m_maxagi; agno++)
if (mp->m_perag[agno].pagb_list)
- kmem_free(mp->m_perag[agno].pagb_list,
- sizeof(xfs_perag_busy_t) *
- XFS_PAGB_NUM_SLOTS);
- kmem_free(mp->m_perag,
- sizeof(xfs_perag_t) * mp->m_sb.sb_agcount);
+ kmem_free(mp->m_perag[agno].pagb_list);
+ kmem_free(mp->m_perag);
}
spinlock_destroy(&mp->m_ail_lock);
@@ -176,13 +148,11 @@ xfs_mount_free(
XFS_QM_DONE(mp);
if (mp->m_fsname != NULL)
- kmem_free(mp->m_fsname, mp->m_fsname_len);
+ kmem_free(mp->m_fsname);
if (mp->m_rtname != NULL)
- kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1);
+ kmem_free(mp->m_rtname);
if (mp->m_logname != NULL)
- kmem_free(mp->m_logname, strlen(mp->m_logname) + 1);
-
- xfs_icsb_destroy_counters(mp);
+ kmem_free(mp->m_logname);
}
/*
@@ -288,6 +258,19 @@ xfs_mount_validate_sb(
return XFS_ERROR(EFSCORRUPTED);
}
+ /*
+ * Until this is fixed only page-sized or smaller data blocks work.
+ */
+ if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
+ xfs_fs_mount_cmn_err(flags,
+ "file system with blocksize %d bytes",
+ sbp->sb_blocksize);
+ xfs_fs_mount_cmn_err(flags,
+ "only pagesize (%ld) or less will currently work.",
+ PAGE_SIZE);
+ return XFS_ERROR(ENOSYS);
+ }
+
if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
xfs_fs_mount_cmn_err(flags,
@@ -309,19 +292,6 @@ xfs_mount_validate_sb(
return XFS_ERROR(ENOSYS);
}
- /*
- * Until this is fixed only page-sized or smaller data blocks work.
- */
- if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
- xfs_fs_mount_cmn_err(flags,
- "file system with blocksize %d bytes",
- sbp->sb_blocksize);
- xfs_fs_mount_cmn_err(flags,
- "only pagesize (%ld) or less will currently work.",
- PAGE_SIZE);
- return XFS_ERROR(ENOSYS);
- }
-
return 0;
}
@@ -994,9 +964,19 @@ xfs_mountfs(
* Re-check for ATTR2 in case it was found in bad_features2
* slot.
*/
- if (xfs_sb_version_hasattr2(&mp->m_sb))
+ if (xfs_sb_version_hasattr2(&mp->m_sb) &&
+ !(mp->m_flags & XFS_MOUNT_NOATTR2))
mp->m_flags |= XFS_MOUNT_ATTR2;
+ }
+
+ if (xfs_sb_version_hasattr2(&mp->m_sb) &&
+ (mp->m_flags & XFS_MOUNT_NOATTR2)) {
+ xfs_sb_version_removeattr2(&mp->m_sb);
+ update_flags |= XFS_SB_FEATURES2;
+ /* update sb_versionnum for the clearing of the morebits */
+ if (!sbp->sb_features2)
+ update_flags |= XFS_SB_VERSIONNUM;
}
/*
@@ -1255,15 +1235,13 @@ xfs_mountfs(
error2:
for (agno = 0; agno < sbp->sb_agcount; agno++)
if (mp->m_perag[agno].pagb_list)
- kmem_free(mp->m_perag[agno].pagb_list,
- sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS);
- kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t));
+ kmem_free(mp->m_perag[agno].pagb_list);
+ kmem_free(mp->m_perag);
mp->m_perag = NULL;
/* FALLTHROUGH */
error1:
if (uuid_mounted)
- xfs_uuid_unmount(mp);
- xfs_freesb(mp);
+ uuid_table_remove(&mp->m_sb.sb_uuid);
return error;
}
@@ -1274,7 +1252,7 @@ xfs_mountfs(
* log and makes sure that incore structures are freed.
*/
int
-xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
+xfs_unmountfs(xfs_mount_t *mp)
{
__uint64_t resblks;
int error = 0;
@@ -1341,9 +1319,8 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
*/
ASSERT(mp->m_inodes == NULL);
- xfs_unmountfs_close(mp, cr);
if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
- xfs_uuid_unmount(mp);
+ uuid_table_remove(&mp->m_sb.sb_uuid);
#if defined(DEBUG) || defined(INDUCE_IO_ERROR)
xfs_errortag_clearall(mp, 0);
@@ -1352,16 +1329,6 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
return 0;
}
-void
-xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr)
-{
- if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
- xfs_free_buftarg(mp->m_logdev_targp, 1);
- if (mp->m_rtdev_targp)
- xfs_free_buftarg(mp->m_rtdev_targp, 1);
- xfs_free_buftarg(mp->m_ddev_targp, 0);
-}
-
STATIC void
xfs_unmountfs_wait(xfs_mount_t *mp)
{
@@ -1905,16 +1872,6 @@ xfs_uuid_mount(
}
/*
- * Remove filesystem from the UUID table.
- */
-STATIC void
-xfs_uuid_unmount(
- xfs_mount_t *mp)
-{
- uuid_table_remove(&mp->m_sb.sb_uuid);
-}
-
-/*
* Used to log changes to the superblock unit and width fields which could
* be altered by the mount options, as well as any potential sb_features2
* fixup. Only the first superblock is updated.
@@ -1928,7 +1885,8 @@ xfs_mount_log_sb(
int error;
ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID |
- XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2));
+ XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2 |
+ XFS_SB_VERSIONNUM));
tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
@@ -2109,7 +2067,7 @@ xfs_icsb_reinit_counters(
xfs_icsb_unlock(mp);
}
-STATIC void
+void
xfs_icsb_destroy_counters(
xfs_mount_t *mp)
{
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 63e0693a358..5269bd6e3df 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -61,6 +61,7 @@ struct xfs_bmap_free;
struct xfs_extdelta;
struct xfs_swapext;
struct xfs_mru_cache;
+struct xfs_nameops;
/*
* Prototypes and functions for the Data Migration subsystem.
@@ -210,12 +211,14 @@ typedef struct xfs_icsb_cnts {
extern int xfs_icsb_init_counters(struct xfs_mount *);
extern void xfs_icsb_reinit_counters(struct xfs_mount *);
+extern void xfs_icsb_destroy_counters(struct xfs_mount *);
extern void xfs_icsb_sync_counters(struct xfs_mount *, int);
extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
#else
-#define xfs_icsb_init_counters(mp) (0)
-#define xfs_icsb_reinit_counters(mp) do { } while (0)
+#define xfs_icsb_init_counters(mp) (0)
+#define xfs_icsb_destroy_counters(mp) do { } while (0)
+#define xfs_icsb_reinit_counters(mp) do { } while (0)
#define xfs_icsb_sync_counters(mp, flags) do { } while (0)
#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
#endif
@@ -313,6 +316,7 @@ typedef struct xfs_mount {
__uint8_t m_inode_quiesce;/* call quiesce on new inodes.
field governed by m_ilock */
__uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
+ const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
int m_dirblksize; /* directory block sz--bytes */
int m_dirblkfsbs; /* directory block sz--fsbs */
xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */
@@ -378,6 +382,7 @@ typedef struct xfs_mount {
counters */
#define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams
allocator */
+#define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */
/*
@@ -510,15 +515,12 @@ typedef struct xfs_mod_sb {
#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock))
#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock))
-extern xfs_mount_t *xfs_mount_init(void);
extern void xfs_mod_sb(xfs_trans_t *, __int64_t);
extern int xfs_log_sbcount(xfs_mount_t *, uint);
-extern void xfs_mount_free(xfs_mount_t *mp);
extern int xfs_mountfs(xfs_mount_t *mp, int);
extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
-extern int xfs_unmountfs(xfs_mount_t *, struct cred *);
-extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *);
+extern int xfs_unmountfs(xfs_mount_t *);
extern int xfs_unmountfs_writesb(xfs_mount_t *);
extern int xfs_unmount_flush(xfs_mount_t *, int);
extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
@@ -544,9 +546,6 @@ extern void xfs_qmops_put(struct xfs_mount *);
extern struct xfs_dmops xfs_dmcore_xfs;
-extern int xfs_init(void);
-extern void xfs_cleanup(void);
-
#endif /* __KERNEL__ */
#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index a0b2c0a2589..afee7eb2432 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -307,15 +307,18 @@ xfs_mru_cache_init(void)
xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t),
"xfs_mru_cache_elem");
if (!xfs_mru_elem_zone)
- return ENOMEM;
+ goto out;
xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache");
- if (!xfs_mru_reap_wq) {
- kmem_zone_destroy(xfs_mru_elem_zone);
- return ENOMEM;
- }
+ if (!xfs_mru_reap_wq)
+ goto out_destroy_mru_elem_zone;
return 0;
+
+ out_destroy_mru_elem_zone:
+ kmem_zone_destroy(xfs_mru_elem_zone);
+ out:
+ return -ENOMEM;
}
void
@@ -382,9 +385,9 @@ xfs_mru_cache_create(
exit:
if (err && mru && mru->lists)
- kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
+ kmem_free(mru->lists);
if (err && mru)
- kmem_free(mru, sizeof(*mru));
+ kmem_free(mru);
return err;
}
@@ -424,8 +427,8 @@ xfs_mru_cache_destroy(
xfs_mru_cache_flush(mru);
- kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
- kmem_free(mru, sizeof(*mru));
+ kmem_free(mru->lists);
+ kmem_free(mru);
}
/*
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index d8063e1ad29..d700dacdb10 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -336,22 +336,18 @@ xfs_rename(
ASSERT(error != EEXIST);
if (error)
goto abort_return;
- xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-
- } else {
- /*
- * We always want to hit the ctime on the source inode.
- * We do it in the if clause above for the 'new_parent &&
- * src_is_directory' case, and here we get all the other
- * cases. This isn't strictly required by the standards
- * since the source inode isn't really being changed,
- * but old unix file systems did it and some incremental
- * backup programs won't work without it.
- */
- xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
}
/*
+ * We always want to hit the ctime on the source inode.
+ *
+ * This isn't strictly required by the standards since the source
+ * inode isn't really being changed, but old unix file systems did
+ * it and some incremental backup programs won't work without it.
+ */
+ xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
+
+ /*
* Adjust the link count on src_dp. This is necessary when
* renaming a directory, either within one parent when
* the target existed, or across two parent directories.
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index a0dc6e5bc5b..bf87a591350 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -2062,7 +2062,7 @@ xfs_growfs_rt(
/*
* Free the fake mp structure.
*/
- kmem_free(nmp, sizeof(*nmp));
+ kmem_free(nmp);
return error;
}
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index d904efe7f87..3f8cf1587f4 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -46,10 +46,12 @@ struct xfs_mount;
#define XFS_SB_VERSION_SECTORBIT 0x0800
#define XFS_SB_VERSION_EXTFLGBIT 0x1000
#define XFS_SB_VERSION_DIRV2BIT 0x2000
+#define XFS_SB_VERSION_BORGBIT 0x4000 /* ASCII only case-insens. */
#define XFS_SB_VERSION_MOREBITSBIT 0x8000
#define XFS_SB_VERSION_OKSASHFBITS \
(XFS_SB_VERSION_EXTFLGBIT | \
- XFS_SB_VERSION_DIRV2BIT)
+ XFS_SB_VERSION_DIRV2BIT | \
+ XFS_SB_VERSION_BORGBIT)
#define XFS_SB_VERSION_OKREALFBITS \
(XFS_SB_VERSION_ATTRBIT | \
XFS_SB_VERSION_NLINKBIT | \
@@ -437,6 +439,12 @@ static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
}
+static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp)
+{
+ return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+ (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
+}
+
static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
{
return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
@@ -473,6 +481,13 @@ static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
((sbp)->sb_features2 | XFS_SB_VERSION2_ATTR2BIT)));
}
+static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
+{
+ sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT;
+ if (!sbp->sb_features2)
+ sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT;
+}
+
/*
* end of superblock version macros
*/
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 140386434aa..e4ebddd3c50 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -889,7 +889,7 @@ shut_us_down:
tp->t_commit_lsn = commit_lsn;
if (nvec > XFS_TRANS_LOGVEC_COUNT) {
- kmem_free(log_vector, nvec * sizeof(xfs_log_iovec_t));
+ kmem_free(log_vector);
}
/*
@@ -1265,7 +1265,7 @@ xfs_trans_committed(
ASSERT(!XFS_LIC_ARE_ALL_FREE(licp));
xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
next_licp = licp->lic_next;
- kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+ kmem_free(licp);
licp = next_licp;
}
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 4c70bf5e998..2a1c0f071f9 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -291,7 +291,7 @@ xfs_trans_inode_broot_debug(
iip = ip->i_itemp;
if (iip->ili_root_size != 0) {
ASSERT(iip->ili_orig_root != NULL);
- kmem_free(iip->ili_orig_root, iip->ili_root_size);
+ kmem_free(iip->ili_orig_root);
iip->ili_root_size = 0;
iip->ili_orig_root = NULL;
}
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index 66a09f0d894..db5c8359552 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -161,7 +161,7 @@ xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
licpp = &((*licpp)->lic_next);
}
*licpp = licp->lic_next;
- kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+ kmem_free(licp);
tp->t_items_free -= XFS_LIC_NUM_SLOTS;
}
}
@@ -314,7 +314,7 @@ xfs_trans_free_items(
ASSERT(!XFS_LIC_ARE_ALL_FREE(licp));
(void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
next_licp = licp->lic_next;
- kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+ kmem_free(licp);
licp = next_licp;
}
@@ -363,7 +363,7 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn)
next_licp = licp->lic_next;
if (XFS_LIC_ARE_ALL_FREE(licp)) {
*licpp = next_licp;
- kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+ kmem_free(licp);
freed -= XFS_LIC_NUM_SLOTS;
} else {
licpp = &(licp->lic_next);
@@ -530,7 +530,7 @@ xfs_trans_free_busy(xfs_trans_t *tp)
lbcp = tp->t_busy.lbc_next;
while (lbcp != NULL) {
lbcq = lbcp->lbc_next;
- kmem_free(lbcp, sizeof(xfs_log_busy_chunk_t));
+ kmem_free(lbcp);
lbcp = lbcq;
}
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 30bacd8bb0e..4a9a43315a8 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -58,586 +58,6 @@
#include "xfs_utils.h"
-int __init
-xfs_init(void)
-{
-#ifdef XFS_DABUF_DEBUG
- extern spinlock_t xfs_dabuf_global_lock;
- spin_lock_init(&xfs_dabuf_global_lock);
-#endif
-
- /*
- * Initialize all of the zone allocators we use.
- */
- xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
- "xfs_log_ticket");
- xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
- "xfs_bmap_free_item");
- xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
- "xfs_btree_cur");
- xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
- "xfs_da_state");
- xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
- xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
- xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
- xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
- xfs_mru_cache_init();
- xfs_filestream_init();
-
- /*
- * The size of the zone allocated buf log item is the maximum
- * size possible under XFS. This wastes a little bit of memory,
- * but it is much faster.
- */
- xfs_buf_item_zone =
- kmem_zone_init((sizeof(xfs_buf_log_item_t) +
- (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
- NBWORD) * sizeof(int))),
- "xfs_buf_item");
- xfs_efd_zone =
- kmem_zone_init((sizeof(xfs_efd_log_item_t) +
- ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
- sizeof(xfs_extent_t))),
- "xfs_efd_item");
- xfs_efi_zone =
- kmem_zone_init((sizeof(xfs_efi_log_item_t) +
- ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
- sizeof(xfs_extent_t))),
- "xfs_efi_item");
-
- /*
- * These zones warrant special memory allocator hints
- */
- xfs_inode_zone =
- kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
- KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
- KM_ZONE_SPREAD, NULL);
- xfs_ili_zone =
- kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
- KM_ZONE_SPREAD, NULL);
-
- /*
- * Allocate global trace buffers.
- */
-#ifdef XFS_ALLOC_TRACE
- xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_BMAP_TRACE
- xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_BMBT_TRACE
- xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_ATTR_TRACE
- xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_DIR2_TRACE
- xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_SLEEP);
-#endif
-
- xfs_dir_startup();
-
-#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
- xfs_error_test_init();
-#endif /* DEBUG || INDUCE_IO_ERROR */
-
- xfs_init_procfs();
- xfs_sysctl_register();
- return 0;
-}
-
-void __exit
-xfs_cleanup(void)
-{
- extern kmem_zone_t *xfs_inode_zone;
- extern kmem_zone_t *xfs_efd_zone;
- extern kmem_zone_t *xfs_efi_zone;
-
- xfs_cleanup_procfs();
- xfs_sysctl_unregister();
- xfs_filestream_uninit();
- xfs_mru_cache_uninit();
- xfs_acl_zone_destroy(xfs_acl_zone);
-
-#ifdef XFS_DIR2_TRACE
- ktrace_free(xfs_dir2_trace_buf);
-#endif
-#ifdef XFS_ATTR_TRACE
- ktrace_free(xfs_attr_trace_buf);
-#endif
-#ifdef XFS_BMBT_TRACE
- ktrace_free(xfs_bmbt_trace_buf);
-#endif
-#ifdef XFS_BMAP_TRACE
- ktrace_free(xfs_bmap_trace_buf);
-#endif
-#ifdef XFS_ALLOC_TRACE
- ktrace_free(xfs_alloc_trace_buf);
-#endif
-
- kmem_zone_destroy(xfs_bmap_free_item_zone);
- kmem_zone_destroy(xfs_btree_cur_zone);
- kmem_zone_destroy(xfs_inode_zone);
- kmem_zone_destroy(xfs_trans_zone);
- kmem_zone_destroy(xfs_da_state_zone);
- kmem_zone_destroy(xfs_dabuf_zone);
- kmem_zone_destroy(xfs_buf_item_zone);
- kmem_zone_destroy(xfs_efd_zone);
- kmem_zone_destroy(xfs_efi_zone);
- kmem_zone_destroy(xfs_ifork_zone);
- kmem_zone_destroy(xfs_ili_zone);
- kmem_zone_destroy(xfs_log_ticket_zone);
-}
-
-/*
- * xfs_start_flags
- *
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock has _not_ yet been read in.
- */
-STATIC int
-xfs_start_flags(
- struct xfs_mount_args *ap,
- struct xfs_mount *mp)
-{
- /* Values are in BBs */
- if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
- /*
- * At this point the superblock has not been read
- * in, therefore we do not know the block size.
- * Before the mount call ends we will convert
- * these to FSBs.
- */
- mp->m_dalign = ap->sunit;
- mp->m_swidth = ap->swidth;
- }
-
- if (ap->logbufs != -1 &&
- ap->logbufs != 0 &&
- (ap->logbufs < XLOG_MIN_ICLOGS ||
- ap->logbufs > XLOG_MAX_ICLOGS)) {
- cmn_err(CE_WARN,
- "XFS: invalid logbufs value: %d [not %d-%d]",
- ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
- return XFS_ERROR(EINVAL);
- }
- mp->m_logbufs = ap->logbufs;
- if (ap->logbufsize != -1 &&
- ap->logbufsize != 0 &&
- (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
- ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
- !is_power_of_2(ap->logbufsize))) {
- cmn_err(CE_WARN,
- "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
- ap->logbufsize);
- return XFS_ERROR(EINVAL);
- }
- mp->m_logbsize = ap->logbufsize;
- mp->m_fsname_len = strlen(ap->fsname) + 1;
- mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
- strcpy(mp->m_fsname, ap->fsname);
- if (ap->rtname[0]) {
- mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP);
- strcpy(mp->m_rtname, ap->rtname);
- }
- if (ap->logname[0]) {
- mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP);
- strcpy(mp->m_logname, ap->logname);
- }
-
- if (ap->flags & XFSMNT_WSYNC)
- mp->m_flags |= XFS_MOUNT_WSYNC;
-#if XFS_BIG_INUMS
- if (ap->flags & XFSMNT_INO64) {
- mp->m_flags |= XFS_MOUNT_INO64;
- mp->m_inoadd = XFS_INO64_OFFSET;
- }
-#endif
- if (ap->flags & XFSMNT_RETERR)
- mp->m_flags |= XFS_MOUNT_RETERR;
- if (ap->flags & XFSMNT_NOALIGN)
- mp->m_flags |= XFS_MOUNT_NOALIGN;
- if (ap->flags & XFSMNT_SWALLOC)
- mp->m_flags |= XFS_MOUNT_SWALLOC;
- if (ap->flags & XFSMNT_OSYNCISOSYNC)
- mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
- if (ap->flags & XFSMNT_32BITINODES)
- mp->m_flags |= XFS_MOUNT_32BITINODES;
-
- if (ap->flags & XFSMNT_IOSIZE) {
- if (ap->iosizelog > XFS_MAX_IO_LOG ||
- ap->iosizelog < XFS_MIN_IO_LOG) {
- cmn_err(CE_WARN,
- "XFS: invalid log iosize: %d [not %d-%d]",
- ap->iosizelog, XFS_MIN_IO_LOG,
- XFS_MAX_IO_LOG);
- return XFS_ERROR(EINVAL);
- }
-
- mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
- mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
- }
-
- if (ap->flags & XFSMNT_IKEEP)
- mp->m_flags |= XFS_MOUNT_IKEEP;
- if (ap->flags & XFSMNT_DIRSYNC)
- mp->m_flags |= XFS_MOUNT_DIRSYNC;
- if (ap->flags & XFSMNT_ATTR2)
- mp->m_flags |= XFS_MOUNT_ATTR2;
-
- if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
- mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-
- /*
- * no recovery flag requires a read-only mount
- */
- if (ap->flags & XFSMNT_NORECOVERY) {
- if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
- cmn_err(CE_WARN,
- "XFS: tried to mount a FS read-write without recovery!");
- return XFS_ERROR(EINVAL);
- }
- mp->m_flags |= XFS_MOUNT_NORECOVERY;
- }
-
- if (ap->flags & XFSMNT_NOUUID)
- mp->m_flags |= XFS_MOUNT_NOUUID;
- if (ap->flags & XFSMNT_BARRIER)
- mp->m_flags |= XFS_MOUNT_BARRIER;
- else
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
-
- if (ap->flags2 & XFSMNT2_FILESTREAMS)
- mp->m_flags |= XFS_MOUNT_FILESTREAMS;
-
- if (ap->flags & XFSMNT_DMAPI)
- mp->m_flags |= XFS_MOUNT_DMAPI;
- return 0;
-}
-
-/*
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock _has_ now been read in.
- */
-STATIC int
-xfs_finish_flags(
- struct xfs_mount_args *ap,
- struct xfs_mount *mp)
-{
- int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
-
- /* Fail a mount where the logbuf is smaller then the log stripe */
- if (xfs_sb_version_haslogv2(&mp->m_sb)) {
- if ((ap->logbufsize <= 0) &&
- (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
- mp->m_logbsize = mp->m_sb.sb_logsunit;
- } else if (ap->logbufsize > 0 &&
- ap->logbufsize < mp->m_sb.sb_logsunit) {
- cmn_err(CE_WARN,
- "XFS: logbuf size must be greater than or equal to log stripe size");
- return XFS_ERROR(EINVAL);
- }
- } else {
- /* Fail a mount if the logbuf is larger than 32K */
- if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
- cmn_err(CE_WARN,
- "XFS: logbuf size for version 1 logs must be 16K or 32K");
- return XFS_ERROR(EINVAL);
- }
- }
-
- if (xfs_sb_version_hasattr2(&mp->m_sb))
- mp->m_flags |= XFS_MOUNT_ATTR2;
-
- /*
- * prohibit r/w mounts of read-only filesystems
- */
- if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
- cmn_err(CE_WARN,
- "XFS: cannot mount a read-only filesystem as read-write");
- return XFS_ERROR(EROFS);
- }
-
- /*
- * check for shared mount.
- */
- if (ap->flags & XFSMNT_SHARED) {
- if (!xfs_sb_version_hasshared(&mp->m_sb))
- return XFS_ERROR(EINVAL);
-
- /*
- * For IRIX 6.5, shared mounts must have the shared
- * version bit set, have the persistent readonly
- * field set, must be version 0 and can only be mounted
- * read-only.
- */
- if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
- (mp->m_sb.sb_shared_vn != 0))
- return XFS_ERROR(EINVAL);
-
- mp->m_flags |= XFS_MOUNT_SHARED;
-
- /*
- * Shared XFS V0 can't deal with DMI. Return EINVAL.
- */
- if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
- return XFS_ERROR(EINVAL);
- }
-
- if (ap->flags & XFSMNT_UQUOTA) {
- mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
- if (ap->flags & XFSMNT_UQUOTAENF)
- mp->m_qflags |= XFS_UQUOTA_ENFD;
- }
-
- if (ap->flags & XFSMNT_GQUOTA) {
- mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
- if (ap->flags & XFSMNT_GQUOTAENF)
- mp->m_qflags |= XFS_OQUOTA_ENFD;
- } else if (ap->flags & XFSMNT_PQUOTA) {
- mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
- if (ap->flags & XFSMNT_PQUOTAENF)
- mp->m_qflags |= XFS_OQUOTA_ENFD;
- }
-
- return 0;
-}
-
-/*
- * xfs_mount
- *
- * The file system configurations are:
- * (1) device (partition) with data and internal log
- * (2) logical volume with data and log subvolumes.
- * (3) logical volume with data, log, and realtime subvolumes.
- *
- * We only have to handle opening the log and realtime volumes here if
- * they are present. The data subvolume has already been opened by
- * get_sb_bdev() and is stored in vfsp->vfs_super->s_bdev.
- */
-int
-xfs_mount(
- struct xfs_mount *mp,
- struct xfs_mount_args *args,
- cred_t *credp)
-{
- struct block_device *ddev, *logdev, *rtdev;
- int flags = 0, error;
-
- ddev = mp->m_super->s_bdev;
- logdev = rtdev = NULL;
-
- error = xfs_dmops_get(mp, args);
- if (error)
- return error;
- error = xfs_qmops_get(mp, args);
- if (error)
- return error;
-
- if (args->flags & XFSMNT_QUIET)
- flags |= XFS_MFSI_QUIET;
-
- /*
- * Open real time and log devices - order is important.
- */
- if (args->logname[0]) {
- error = xfs_blkdev_get(mp, args->logname, &logdev);
- if (error)
- return error;
- }
- if (args->rtname[0]) {
- error = xfs_blkdev_get(mp, args->rtname, &rtdev);
- if (error) {
- xfs_blkdev_put(logdev);
- return error;
- }
-
- if (rtdev == ddev || rtdev == logdev) {
- cmn_err(CE_WARN,
- "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
- xfs_blkdev_put(logdev);
- xfs_blkdev_put(rtdev);
- return EINVAL;
- }
- }
-
- /*
- * Setup xfs_mount buffer target pointers
- */
- error = ENOMEM;
- mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
- if (!mp->m_ddev_targp) {
- xfs_blkdev_put(logdev);
- xfs_blkdev_put(rtdev);
- return error;
- }
- if (rtdev) {
- mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
- if (!mp->m_rtdev_targp) {
- xfs_blkdev_put(logdev);
- xfs_blkdev_put(rtdev);
- goto error0;
- }
- }
- mp->m_logdev_targp = (logdev && logdev != ddev) ?
- xfs_alloc_buftarg(logdev, 1) : mp->m_ddev_targp;
- if (!mp->m_logdev_targp) {
- xfs_blkdev_put(logdev);
- xfs_blkdev_put(rtdev);
- goto error0;
- }
-
- /*
- * Setup flags based on mount(2) options and then the superblock
- */
- error = xfs_start_flags(args, mp);
- if (error)
- goto error1;
- error = xfs_readsb(mp, flags);
- if (error)
- goto error1;
- error = xfs_finish_flags(args, mp);
- if (error)
- goto error2;
-
- /*
- * Setup xfs_mount buffer target pointers based on superblock
- */
- error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
- mp->m_sb.sb_sectsize);
- if (!error && logdev && logdev != ddev) {
- unsigned int log_sector_size = BBSIZE;
-
- if (xfs_sb_version_hassector(&mp->m_sb))
- log_sector_size = mp->m_sb.sb_logsectsize;
- error = xfs_setsize_buftarg(mp->m_logdev_targp,
- mp->m_sb.sb_blocksize,
- log_sector_size);
- }
- if (!error && rtdev)
- error = xfs_setsize_buftarg(mp->m_rtdev_targp,
- mp->m_sb.sb_blocksize,
- mp->m_sb.sb_sectsize);
- if (error)
- goto error2;
-
- if (mp->m_flags & XFS_MOUNT_BARRIER)
- xfs_mountfs_check_barriers(mp);
-
- if ((error = xfs_filestream_mount(mp)))
- goto error2;
-
- error = xfs_mountfs(mp, flags);
- if (error)
- goto error2;
-
- XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
-
- return 0;
-
-error2:
- if (mp->m_sb_bp)
- xfs_freesb(mp);
-error1:
- xfs_binval(mp->m_ddev_targp);
- if (logdev && logdev != ddev)
- xfs_binval(mp->m_logdev_targp);
- if (rtdev)
- xfs_binval(mp->m_rtdev_targp);
-error0:
- xfs_unmountfs_close(mp, credp);
- xfs_qmops_put(mp);
- xfs_dmops_put(mp);
- return error;
-}
-
-int
-xfs_unmount(
- xfs_mount_t *mp,
- int flags,
- cred_t *credp)
-{
- xfs_inode_t *rip;
- bhv_vnode_t *rvp;
- int unmount_event_wanted = 0;
- int unmount_event_flags = 0;
- int xfs_unmountfs_needed = 0;
- int error;
-
- rip = mp->m_rootip;
- rvp = XFS_ITOV(rip);
-
-#ifdef HAVE_DMAPI
- if (mp->m_flags & XFS_MOUNT_DMAPI) {
- error = XFS_SEND_PREUNMOUNT(mp,
- rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
- NULL, NULL, 0, 0,
- (mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))?
- 0:DM_FLAGS_UNWANTED);
- if (error)
- return XFS_ERROR(error);
- unmount_event_wanted = 1;
- unmount_event_flags = (mp->m_dmevmask & (1<<DM_EVENT_UNMOUNT))?
- 0 : DM_FLAGS_UNWANTED;
- }
-#endif
-
- /*
- * Blow away any referenced inode in the filestreams cache.
- * This can and will cause log traffic as inodes go inactive
- * here.
- */
- xfs_filestream_unmount(mp);
-
- XFS_bflush(mp->m_ddev_targp);
- error = xfs_unmount_flush(mp, 0);
- if (error)
- goto out;
-
- ASSERT(vn_count(rvp) == 1);
-
- /*
- * Drop the reference count
- */
- IRELE(rip);
-
- /*
- * If we're forcing a shutdown, typically because of a media error,
- * we want to make sure we invalidate dirty pages that belong to
- * referenced vnodes as well.
- */
- if (XFS_FORCED_SHUTDOWN(mp)) {
- error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
- ASSERT(error != EFSCORRUPTED);
- }
- xfs_unmountfs_needed = 1;
-
-out:
- /* Send DMAPI event, if required.
- * Then do xfs_unmountfs() if needed.
- * Then return error (or zero).
- */
- if (unmount_event_wanted) {
- /* Note: mp structure must still exist for
- * XFS_SEND_UNMOUNT() call.
- */
- XFS_SEND_UNMOUNT(mp, error == 0 ? rip : NULL,
- DM_RIGHT_NULL, 0, error, unmount_event_flags);
- }
- if (xfs_unmountfs_needed) {
- /*
- * Call common unmount function to flush to disk
- * and free the super block buffer & mount structures.
- */
- xfs_unmountfs(mp, credp);
- xfs_qmops_put(mp);
- xfs_dmops_put(mp);
- kmem_free(mp, sizeof(xfs_mount_t));
- }
-
- return XFS_ERROR(error);
-}
-
STATIC void
xfs_quiesce_fs(
xfs_mount_t *mp)
@@ -694,30 +114,6 @@ xfs_attr_quiesce(
xfs_unmountfs_writesb(mp);
}
-int
-xfs_mntupdate(
- struct xfs_mount *mp,
- int *flags,
- struct xfs_mount_args *args)
-{
- if (!(*flags & MS_RDONLY)) { /* rw/ro -> rw */
- if (mp->m_flags & XFS_MOUNT_RDONLY)
- mp->m_flags &= ~XFS_MOUNT_RDONLY;
- if (args->flags & XFSMNT_BARRIER) {
- mp->m_flags |= XFS_MOUNT_BARRIER;
- xfs_mountfs_check_barriers(mp);
- } else {
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- }
- } else if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { /* rw -> ro */
- xfs_filestream_flush(mp);
- xfs_sync(mp, SYNC_DATA_QUIESCE);
- xfs_attr_quiesce(mp);
- mp->m_flags |= XFS_MOUNT_RDONLY;
- }
- return 0;
-}
-
/*
* xfs_unmount_flush implements a set of flush operation on special
* inodes, which are needed as a separate set of operations so that
@@ -1048,7 +444,7 @@ xfs_sync_inodes(
if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) {
XFS_MOUNT_IUNLOCK(mp);
- kmem_free(ipointer, sizeof(xfs_iptr_t));
+ kmem_free(ipointer);
return 0;
}
@@ -1194,7 +590,7 @@ xfs_sync_inodes(
}
XFS_MOUNT_IUNLOCK(mp);
ASSERT(ipointer_in == B_FALSE);
- kmem_free(ipointer, sizeof(xfs_iptr_t));
+ kmem_free(ipointer);
return XFS_ERROR(error);
}
@@ -1224,7 +620,7 @@ xfs_sync_inodes(
ASSERT(ipointer_in == B_FALSE);
- kmem_free(ipointer, sizeof(xfs_iptr_t));
+ kmem_free(ipointer);
return XFS_ERROR(last_error);
}
diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h
index 1688817c55e..a74b05087da 100644
--- a/fs/xfs/xfs_vfsops.h
+++ b/fs/xfs/xfs_vfsops.h
@@ -8,11 +8,6 @@ struct kstatfs;
struct xfs_mount;
struct xfs_mount_args;
-int xfs_mount(struct xfs_mount *mp, struct xfs_mount_args *args,
- struct cred *credp);
-int xfs_unmount(struct xfs_mount *mp, int flags, struct cred *credp);
-int xfs_mntupdate(struct xfs_mount *mp, int *flags,
- struct xfs_mount_args *args);
int xfs_sync(struct xfs_mount *mp, int flags);
void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
int lnnum);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index e475e3717eb..76a1166af82 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -75,26 +75,23 @@ xfs_open(
return 0;
}
-/*
- * xfs_setattr
- */
int
xfs_setattr(
- xfs_inode_t *ip,
- bhv_vattr_t *vap,
+ struct xfs_inode *ip,
+ struct iattr *iattr,
int flags,
cred_t *credp)
{
xfs_mount_t *mp = ip->i_mount;
+ struct inode *inode = XFS_ITOV(ip);
+ int mask = iattr->ia_valid;
xfs_trans_t *tp;
- int mask;
int code;
uint lock_flags;
uint commit_flags=0;
uid_t uid=0, iuid=0;
gid_t gid=0, igid=0;
int timeflags = 0;
- xfs_prid_t projid=0, iprojid=0;
struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
int file_owner;
int need_iolock = 1;
@@ -104,30 +101,9 @@ xfs_setattr(
if (mp->m_flags & XFS_MOUNT_RDONLY)
return XFS_ERROR(EROFS);
- /*
- * Cannot set certain attributes.
- */
- mask = vap->va_mask;
- if (mask & XFS_AT_NOSET) {
- return XFS_ERROR(EINVAL);
- }
-
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
- /*
- * Timestamps do not need to be logged and hence do not
- * need to be done within a transaction.
- */
- if (mask & XFS_AT_UPDTIMES) {
- ASSERT((mask & ~XFS_AT_UPDTIMES) == 0);
- timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) |
- ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) |
- ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0);
- xfs_ichgtime(ip, timeflags);
- return 0;
- }
-
olddquot1 = olddquot2 = NULL;
udqp = gdqp = NULL;
@@ -139,28 +115,22 @@ xfs_setattr(
* If the IDs do change before we take the ilock, we're covered
* because the i_*dquot fields will get updated anyway.
*/
- if (XFS_IS_QUOTA_ON(mp) &&
- (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) {
+ if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
uint qflags = 0;
- if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) {
- uid = vap->va_uid;
+ if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
+ uid = iattr->ia_uid;
qflags |= XFS_QMOPT_UQUOTA;
} else {
uid = ip->i_d.di_uid;
}
- if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) {
- gid = vap->va_gid;
+ if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
+ gid = iattr->ia_gid;
qflags |= XFS_QMOPT_GQUOTA;
} else {
gid = ip->i_d.di_gid;
}
- if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) {
- projid = vap->va_projid;
- qflags |= XFS_QMOPT_PQUOTA;
- } else {
- projid = ip->i_d.di_projid;
- }
+
/*
* We take a reference when we initialize udqp and gdqp,
* so it is important that we never blindly double trip on
@@ -168,8 +138,8 @@ xfs_setattr(
*/
ASSERT(udqp == NULL);
ASSERT(gdqp == NULL);
- code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags,
- &udqp, &gdqp);
+ code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid,
+ qflags, &udqp, &gdqp);
if (code)
return code;
}
@@ -180,10 +150,10 @@ xfs_setattr(
*/
tp = NULL;
lock_flags = XFS_ILOCK_EXCL;
- if (flags & ATTR_NOLOCK)
+ if (flags & XFS_ATTR_NOLOCK)
need_iolock = 0;
- if (!(mask & XFS_AT_SIZE)) {
- if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) ||
+ if (!(mask & ATTR_SIZE)) {
+ if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) ||
(mp->m_flags & XFS_MOUNT_WSYNC)) {
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
commit_flags = 0;
@@ -196,10 +166,10 @@ xfs_setattr(
}
} else {
if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
- !(flags & ATTR_DMI)) {
+ !(flags & XFS_ATTR_DMI)) {
int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip,
- vap->va_size, 0, dmflags, NULL);
+ iattr->ia_size, 0, dmflags, NULL);
if (code) {
lock_flags = 0;
goto error_return;
@@ -219,9 +189,7 @@ xfs_setattr(
* Only the owner or users with CAP_FOWNER
* capability may do these things.
*/
- if (mask &
- (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID|
- XFS_AT_GID|XFS_AT_PROJID)) {
+ if (mask & (ATTR_MODE|ATTR_UID|ATTR_GID)) {
/*
* CAP_FOWNER overrides the following restrictions:
*
@@ -245,21 +213,21 @@ xfs_setattr(
* IDs of the calling process shall match the group owner of
* the file when setting the set-group-ID bit on that file
*/
- if (mask & XFS_AT_MODE) {
+ if (mask & ATTR_MODE) {
mode_t m = 0;
- if ((vap->va_mode & S_ISUID) && !file_owner)
+ if ((iattr->ia_mode & S_ISUID) && !file_owner)
m |= S_ISUID;
- if ((vap->va_mode & S_ISGID) &&
+ if ((iattr->ia_mode & S_ISGID) &&
!in_group_p((gid_t)ip->i_d.di_gid))
m |= S_ISGID;
#if 0
/* Linux allows this, Irix doesn't. */
- if ((vap->va_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
+ if ((iattr->ia_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
m |= S_ISVTX;
#endif
if (m && !capable(CAP_FSETID))
- vap->va_mode &= ~m;
+ iattr->ia_mode &= ~m;
}
}
@@ -270,7 +238,7 @@ xfs_setattr(
* and can change the group id only to a group of which he
* or she is a member.
*/
- if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
+ if (mask & (ATTR_UID|ATTR_GID)) {
/*
* These IDs could have changed since we last looked at them.
* But, we're assured that if the ownership did change
@@ -278,12 +246,9 @@ xfs_setattr(
* would have changed also.
*/
iuid = ip->i_d.di_uid;
- iprojid = ip->i_d.di_projid;
igid = ip->i_d.di_gid;
- gid = (mask & XFS_AT_GID) ? vap->va_gid : igid;
- uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid;
- projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid :
- iprojid;
+ gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
+ uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
/*
* CAP_CHOWN overrides the following restrictions:
@@ -303,11 +268,10 @@ xfs_setattr(
goto error_return;
}
/*
- * Do a quota reservation only if uid/projid/gid is actually
+ * Do a quota reservation only if uid/gid is actually
* going to change.
*/
if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
- (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) ||
(XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
ASSERT(tp);
code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
@@ -321,13 +285,13 @@ xfs_setattr(
/*
* Truncate file. Must have write permission and not be a directory.
*/
- if (mask & XFS_AT_SIZE) {
+ if (mask & ATTR_SIZE) {
/* Short circuit the truncate case for zero length files */
- if ((vap->va_size == 0) &&
- (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) {
+ if (iattr->ia_size == 0 &&
+ ip->i_size == 0 && ip->i_d.di_nextents == 0) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
lock_flags &= ~XFS_ILOCK_EXCL;
- if (mask & XFS_AT_CTIME)
+ if (mask & ATTR_CTIME)
xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
code = 0;
goto error_return;
@@ -350,9 +314,9 @@ xfs_setattr(
/*
* Change file access or modified times.
*/
- if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) {
+ if (mask & (ATTR_ATIME|ATTR_MTIME)) {
if (!file_owner) {
- if ((flags & ATTR_UTIME) &&
+ if ((mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)) &&
!capable(CAP_FOWNER)) {
code = XFS_ERROR(EPERM);
goto error_return;
@@ -361,90 +325,23 @@ xfs_setattr(
}
/*
- * Change extent size or realtime flag.
- */
- if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
- /*
- * Can't change extent size if any extents are allocated.
- */
- if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) &&
- ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
- vap->va_extsize) ) {
- code = XFS_ERROR(EINVAL); /* EFBIG? */
- goto error_return;
- }
-
- /*
- * Can't change realtime flag if any extents are allocated.
- */
- if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
- (mask & XFS_AT_XFLAGS) &&
- (XFS_IS_REALTIME_INODE(ip)) !=
- (vap->va_xflags & XFS_XFLAG_REALTIME)) {
- code = XFS_ERROR(EINVAL); /* EFBIG? */
- goto error_return;
- }
- /*
- * Extent size must be a multiple of the appropriate block
- * size, if set at all.
- */
- if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) {
- xfs_extlen_t size;
-
- if (XFS_IS_REALTIME_INODE(ip) ||
- ((mask & XFS_AT_XFLAGS) &&
- (vap->va_xflags & XFS_XFLAG_REALTIME))) {
- size = mp->m_sb.sb_rextsize <<
- mp->m_sb.sb_blocklog;
- } else {
- size = mp->m_sb.sb_blocksize;
- }
- if (vap->va_extsize % size) {
- code = XFS_ERROR(EINVAL);
- goto error_return;
- }
- }
- /*
- * If realtime flag is set then must have realtime data.
- */
- if ((mask & XFS_AT_XFLAGS) &&
- (vap->va_xflags & XFS_XFLAG_REALTIME)) {
- if ((mp->m_sb.sb_rblocks == 0) ||
- (mp->m_sb.sb_rextsize == 0) ||
- (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
- code = XFS_ERROR(EINVAL);
- goto error_return;
- }
- }
-
- /*
- * Can't modify an immutable/append-only file unless
- * we have appropriate permission.
- */
- if ((mask & XFS_AT_XFLAGS) &&
- (ip->i_d.di_flags &
- (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
- (vap->va_xflags &
- (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
- !capable(CAP_LINUX_IMMUTABLE)) {
- code = XFS_ERROR(EPERM);
- goto error_return;
- }
- }
-
- /*
* Now we can make the changes. Before we join the inode
- * to the transaction, if XFS_AT_SIZE is set then take care of
+ * to the transaction, if ATTR_SIZE is set then take care of
* the part of the truncation that must be done without the
* inode lock. This needs to be done before joining the inode
* to the transaction, because the inode cannot be unlocked
* once it is a part of the transaction.
*/
- if (mask & XFS_AT_SIZE) {
+ if (mask & ATTR_SIZE) {
code = 0;
- if ((vap->va_size > ip->i_size) &&
- (flags & ATTR_NOSIZETOK) == 0) {
- code = xfs_igrow_start(ip, vap->va_size, credp);
+ if (iattr->ia_size > ip->i_size) {
+ /*
+ * Do the first part of growing a file: zero any data
+ * in the last block that is beyond the old EOF. We
+ * need to do this before the inode is joined to the
+ * transaction to modify the i_size.
+ */
+ code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -461,10 +358,10 @@ xfs_setattr(
* not within the range we care about here.
*/
if (!code &&
- (ip->i_size != ip->i_d.di_size) &&
- (vap->va_size > ip->i_d.di_size)) {
+ ip->i_size != ip->i_d.di_size &&
+ iattr->ia_size > ip->i_d.di_size) {
code = xfs_flush_pages(ip,
- ip->i_d.di_size, vap->va_size,
+ ip->i_d.di_size, iattr->ia_size,
XFS_B_ASYNC, FI_NONE);
}
@@ -472,7 +369,7 @@ xfs_setattr(
vn_iowait(ip);
if (!code)
- code = xfs_itruncate_data(ip, vap->va_size);
+ code = xfs_itruncate_data(ip, iattr->ia_size);
if (code) {
ASSERT(tp == NULL);
lock_flags &= ~XFS_ILOCK_EXCL;
@@ -501,28 +398,30 @@ xfs_setattr(
/*
* Truncate file. Must have write permission and not be a directory.
*/
- if (mask & XFS_AT_SIZE) {
+ if (mask & ATTR_SIZE) {
/*
* Only change the c/mtime if we are changing the size
* or we are explicitly asked to change it. This handles
* the semantic difference between truncate() and ftruncate()
* as implemented in the VFS.
*/
- if (vap->va_size != ip->i_size || (mask & XFS_AT_CTIME))
+ if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME))
timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
- if (vap->va_size > ip->i_size) {
- xfs_igrow_finish(tp, ip, vap->va_size,
- !(flags & ATTR_DMI));
- } else if ((vap->va_size <= ip->i_size) ||
- ((vap->va_size == 0) && ip->i_d.di_nextents)) {
+ if (iattr->ia_size > ip->i_size) {
+ ip->i_d.di_size = iattr->ia_size;
+ ip->i_size = iattr->ia_size;
+ if (!(flags & XFS_ATTR_DMI))
+ xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ } else if (iattr->ia_size <= ip->i_size ||
+ (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
/*
* signal a sync transaction unless
* we're truncating an already unlinked
* file on a wsync filesystem
*/
- code = xfs_itruncate_finish(&tp, ip,
- (xfs_fsize_t)vap->va_size,
+ code = xfs_itruncate_finish(&tp, ip, iattr->ia_size,
XFS_DATA_FORK,
((ip->i_d.di_nlink != 0 ||
!(mp->m_flags & XFS_MOUNT_WSYNC))
@@ -544,9 +443,12 @@ xfs_setattr(
/*
* Change file access modes.
*/
- if (mask & XFS_AT_MODE) {
+ if (mask & ATTR_MODE) {
ip->i_d.di_mode &= S_IFMT;
- ip->i_d.di_mode |= vap->va_mode & ~S_IFMT;
+ ip->i_d.di_mode |= iattr->ia_mode & ~S_IFMT;
+
+ inode->i_mode &= S_IFMT;
+ inode->i_mode |= iattr->ia_mode & ~S_IFMT;
xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
timeflags |= XFS_ICHGTIME_CHG;
@@ -559,7 +461,7 @@ xfs_setattr(
* and can change the group id only to a group of which he
* or she is a member.
*/
- if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
+ if (mask & (ATTR_UID|ATTR_GID)) {
/*
* CAP_FSETID overrides the following restrictions:
*
@@ -577,39 +479,24 @@ xfs_setattr(
*/
if (iuid != uid) {
if (XFS_IS_UQUOTA_ON(mp)) {
- ASSERT(mask & XFS_AT_UID);
+ ASSERT(mask & ATTR_UID);
ASSERT(udqp);
olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
&ip->i_udquot, udqp);
}
ip->i_d.di_uid = uid;
+ inode->i_uid = uid;
}
if (igid != gid) {
if (XFS_IS_GQUOTA_ON(mp)) {
ASSERT(!XFS_IS_PQUOTA_ON(mp));
- ASSERT(mask & XFS_AT_GID);
+ ASSERT(mask & ATTR_GID);
ASSERT(gdqp);
olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
&ip->i_gdquot, gdqp);
}
ip->i_d.di_gid = gid;
- }
- if (iprojid != projid) {
- if (XFS_IS_PQUOTA_ON(mp)) {
- ASSERT(!XFS_IS_GQUOTA_ON(mp));
- ASSERT(mask & XFS_AT_PROJID);
- ASSERT(gdqp);
- olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
- &ip->i_gdquot, gdqp);
- }
- ip->i_d.di_projid = projid;
- /*
- * We may have to rev the inode as well as
- * the superblock version number since projids didn't
- * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
- */
- if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
- xfs_bump_ino_vers2(tp, ip);
+ inode->i_gid = gid;
}
xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
@@ -620,82 +507,34 @@ xfs_setattr(
/*
* Change file access or modified times.
*/
- if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) {
- if (mask & XFS_AT_ATIME) {
- ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec;
- ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec;
+ if (mask & (ATTR_ATIME|ATTR_MTIME)) {
+ if (mask & ATTR_ATIME) {
+ inode->i_atime = iattr->ia_atime;
+ ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
+ ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
ip->i_update_core = 1;
timeflags &= ~XFS_ICHGTIME_ACC;
}
- if (mask & XFS_AT_MTIME) {
- ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec;
- ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec;
+ if (mask & ATTR_MTIME) {
+ inode->i_mtime = iattr->ia_mtime;
+ ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+ ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
timeflags &= ~XFS_ICHGTIME_MOD;
timeflags |= XFS_ICHGTIME_CHG;
}
- if (tp && (flags & ATTR_UTIME))
+ if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)))
xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
}
/*
- * Change XFS-added attributes.
- */
- if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
- if (mask & XFS_AT_EXTSIZE) {
- /*
- * Converting bytes to fs blocks.
- */
- ip->i_d.di_extsize = vap->va_extsize >>
- mp->m_sb.sb_blocklog;
- }
- if (mask & XFS_AT_XFLAGS) {
- uint di_flags;
-
- /* can't set PREALLOC this way, just preserve it */
- di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
- if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
- di_flags |= XFS_DIFLAG_IMMUTABLE;
- if (vap->va_xflags & XFS_XFLAG_APPEND)
- di_flags |= XFS_DIFLAG_APPEND;
- if (vap->va_xflags & XFS_XFLAG_SYNC)
- di_flags |= XFS_DIFLAG_SYNC;
- if (vap->va_xflags & XFS_XFLAG_NOATIME)
- di_flags |= XFS_DIFLAG_NOATIME;
- if (vap->va_xflags & XFS_XFLAG_NODUMP)
- di_flags |= XFS_DIFLAG_NODUMP;
- if (vap->va_xflags & XFS_XFLAG_PROJINHERIT)
- di_flags |= XFS_DIFLAG_PROJINHERIT;
- if (vap->va_xflags & XFS_XFLAG_NODEFRAG)
- di_flags |= XFS_DIFLAG_NODEFRAG;
- if (vap->va_xflags & XFS_XFLAG_FILESTREAM)
- di_flags |= XFS_DIFLAG_FILESTREAM;
- if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
- if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
- di_flags |= XFS_DIFLAG_RTINHERIT;
- if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
- di_flags |= XFS_DIFLAG_NOSYMLINKS;
- if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT)
- di_flags |= XFS_DIFLAG_EXTSZINHERIT;
- } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
- if (vap->va_xflags & XFS_XFLAG_REALTIME)
- di_flags |= XFS_DIFLAG_REALTIME;
- if (vap->va_xflags & XFS_XFLAG_EXTSIZE)
- di_flags |= XFS_DIFLAG_EXTSIZE;
- }
- ip->i_d.di_flags = di_flags;
- }
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- timeflags |= XFS_ICHGTIME_CHG;
- }
-
- /*
- * Change file inode change time only if XFS_AT_CTIME set
+ * Change file inode change time only if ATTR_CTIME set
* AND we have been called by a DMI function.
*/
- if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) {
- ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec;
- ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec;
+ if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
+ inode->i_ctime = iattr->ia_ctime;
+ ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+ ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
ip->i_update_core = 1;
timeflags &= ~XFS_ICHGTIME_CHG;
}
@@ -704,7 +543,7 @@ xfs_setattr(
* Send out timestamp changes that need to be set to the
* current time. Not done when called by a DMI function.
*/
- if (timeflags && !(flags & ATTR_DMI))
+ if (timeflags && !(flags & XFS_ATTR_DMI))
xfs_ichgtime(ip, timeflags);
XFS_STATS_INC(xs_ig_attrchg);
@@ -742,7 +581,7 @@ xfs_setattr(
}
if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
- !(flags & ATTR_DMI)) {
+ !(flags & XFS_ATTR_DMI)) {
(void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
NULL, DM_RIGHT_NULL, NULL, NULL,
0, 0, AT_DELAY_FLAG(flags));
@@ -1601,12 +1440,18 @@ xfs_inactive(
return VN_INACTIVE_CACHE;
}
-
+/*
+ * Lookups up an inode from "name". If ci_name is not NULL, then a CI match
+ * is allowed, otherwise it has to be an exact match. If a CI match is found,
+ * ci_name->name will point to a the actual name (caller must free) or
+ * will be set to NULL if an exact match is found.
+ */
int
xfs_lookup(
xfs_inode_t *dp,
struct xfs_name *name,
- xfs_inode_t **ipp)
+ xfs_inode_t **ipp,
+ struct xfs_name *ci_name)
{
xfs_ino_t inum;
int error;
@@ -1618,7 +1463,7 @@ xfs_lookup(
return XFS_ERROR(EIO);
lock_mode = xfs_ilock_map_shared(dp);
- error = xfs_dir_lookup(NULL, dp, name, &inum);
+ error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
xfs_iunlock_map_shared(dp, lock_mode);
if (error)
@@ -1626,12 +1471,15 @@ xfs_lookup(
error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
if (error)
- goto out;
+ goto out_free_name;
xfs_itrace_ref(*ipp);
return 0;
- out:
+out_free_name:
+ if (ci_name)
+ kmem_free(ci_name->name);
+out:
*ipp = NULL;
return error;
}
@@ -2098,13 +1946,6 @@ again:
#endif
}
-#ifdef DEBUG
-#define REMOVE_DEBUG_TRACE(x) {remove_which_error_return = (x);}
-int remove_which_error_return = 0;
-#else /* ! DEBUG */
-#define REMOVE_DEBUG_TRACE(x)
-#endif /* ! DEBUG */
-
int
xfs_remove(
xfs_inode_t *dp,
@@ -2113,6 +1954,7 @@ xfs_remove(
{
xfs_mount_t *mp = dp->i_mount;
xfs_trans_t *tp = NULL;
+ int is_dir = S_ISDIR(ip->i_d.di_mode);
int error = 0;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
@@ -2120,8 +1962,10 @@ xfs_remove(
int committed;
int link_zero;
uint resblks;
+ uint log_count;
xfs_itrace_entry(dp);
+ xfs_itrace_entry(ip);
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
@@ -2134,19 +1978,23 @@ xfs_remove(
return error;
}
- xfs_itrace_entry(ip);
- xfs_itrace_ref(ip);
-
error = XFS_QM_DQATTACH(mp, dp, 0);
- if (!error)
- error = XFS_QM_DQATTACH(mp, ip, 0);
- if (error) {
- REMOVE_DEBUG_TRACE(__LINE__);
+ if (error)
+ goto std_return;
+
+ error = XFS_QM_DQATTACH(mp, ip, 0);
+ if (error)
goto std_return;
- }
- tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
+ if (is_dir) {
+ tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
+ log_count = XFS_DEFAULT_LOG_COUNT;
+ } else {
+ tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
+ log_count = XFS_REMOVE_LOG_COUNT;
+ }
cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
+
/*
* We try to get the real space reservation first,
* allowing for directory btree deletion(s) implying
@@ -2158,25 +2006,21 @@ xfs_remove(
*/
resblks = XFS_REMOVE_SPACE_RES(mp);
error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
- XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
+ XFS_TRANS_PERM_LOG_RES, log_count);
if (error == ENOSPC) {
resblks = 0;
error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
- XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
+ XFS_TRANS_PERM_LOG_RES, log_count);
}
if (error) {
ASSERT(error != ENOSPC);
- REMOVE_DEBUG_TRACE(__LINE__);
- xfs_trans_cancel(tp, 0);
- return error;
+ cancel_flags = 0;
+ goto out_trans_cancel;
}
error = xfs_lock_dir_and_entry(dp, ip);
- if (error) {
- REMOVE_DEBUG_TRACE(__LINE__);
- xfs_trans_cancel(tp, cancel_flags);
- goto std_return;
- }
+ if (error)
+ goto out_trans_cancel;
/*
* At this point, we've gotten both the directory and the entry
@@ -2189,6 +2033,21 @@ xfs_remove(
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
/*
+ * If we're removing a directory perform some additional validation.
+ */
+ if (is_dir) {
+ ASSERT(ip->i_d.di_nlink >= 2);
+ if (ip->i_d.di_nlink != 2) {
+ error = XFS_ERROR(ENOTEMPTY);
+ goto out_trans_cancel;
+ }
+ if (!xfs_dir_isempty(ip)) {
+ error = XFS_ERROR(ENOTEMPTY);
+ goto out_trans_cancel;
+ }
+ }
+
+ /*
* Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
*/
XFS_BMAP_INIT(&free_list, &first_block);
@@ -2196,39 +2055,64 @@ xfs_remove(
&first_block, &free_list, resblks);
if (error) {
ASSERT(error != ENOENT);
- REMOVE_DEBUG_TRACE(__LINE__);
- goto error1;
+ goto out_bmap_cancel;
}
xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ /*
+ * Bump the in memory generation count on the parent
+ * directory so that other can know that it has changed.
+ */
dp->i_gen++;
xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
- error = xfs_droplink(tp, ip);
- if (error) {
- REMOVE_DEBUG_TRACE(__LINE__);
- goto error1;
+ if (is_dir) {
+ /*
+ * Drop the link from ip's "..".
+ */
+ error = xfs_droplink(tp, dp);
+ if (error)
+ goto out_bmap_cancel;
+
+ /*
+ * Drop the link from dp to ip.
+ */
+ error = xfs_droplink(tp, ip);
+ if (error)
+ goto out_bmap_cancel;
+ } else {
+ /*
+ * When removing a non-directory we need to log the parent
+ * inode here for the i_gen update. For a directory this is
+ * done implicitly by the xfs_droplink call for the ".." entry.
+ */
+ xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
}
- /* Determine if this is the last link while
+ /*
+ * Drop the "." link from ip to self.
+ */
+ error = xfs_droplink(tp, ip);
+ if (error)
+ goto out_bmap_cancel;
+
+ /*
+ * Determine if this is the last link while
* we are in the transaction.
*/
- link_zero = (ip)->i_d.di_nlink==0;
+ link_zero = (ip->i_d.di_nlink == 0);
/*
* If this is a synchronous mount, make sure that the
* remove transaction goes to disk before returning to
* the user.
*/
- if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
+ if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
xfs_trans_set_sync(tp);
- }
error = xfs_bmap_finish(&tp, &free_list, &committed);
- if (error) {
- REMOVE_DEBUG_TRACE(__LINE__);
- goto error_rele;
- }
+ if (error)
+ goto out_bmap_cancel;
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
if (error)
@@ -2240,38 +2124,26 @@ xfs_remove(
* will get killed on last close in xfs_close() so we don't
* have to worry about that.
*/
- if (link_zero && xfs_inode_is_filestream(ip))
+ if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
xfs_filestream_deassociate(ip);
xfs_itrace_exit(ip);
+ xfs_itrace_exit(dp);
-/* Fall through to std_return with error = 0 */
std_return:
if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
- (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
- dp, DM_RIGHT_NULL,
- NULL, DM_RIGHT_NULL,
- name->name, NULL, ip->i_d.di_mode, error, 0);
+ XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL,
+ NULL, DM_RIGHT_NULL, name->name, NULL,
+ ip->i_d.di_mode, error, 0);
}
- return error;
- error1:
- xfs_bmap_cancel(&free_list);
- cancel_flags |= XFS_TRANS_ABORT;
- xfs_trans_cancel(tp, cancel_flags);
- goto std_return;
+ return error;
- error_rele:
- /*
- * In this case make sure to not release the inode until after
- * the current transaction is aborted. Releasing it beforehand
- * can cause us to go to xfs_inactive and start a recursive
- * transaction which can easily deadlock with the current one.
- */
+ out_bmap_cancel:
xfs_bmap_cancel(&free_list);
cancel_flags |= XFS_TRANS_ABORT;
+ out_trans_cancel:
xfs_trans_cancel(tp, cancel_flags);
-
goto std_return;
}
@@ -2638,186 +2510,6 @@ std_return:
}
int
-xfs_rmdir(
- xfs_inode_t *dp,
- struct xfs_name *name,
- xfs_inode_t *cdp)
-{
- xfs_mount_t *mp = dp->i_mount;
- xfs_trans_t *tp;
- int error;
- xfs_bmap_free_t free_list;
- xfs_fsblock_t first_block;
- int cancel_flags;
- int committed;
- int last_cdp_link;
- uint resblks;
-
- xfs_itrace_entry(dp);
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
-
- if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
- error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE,
- dp, DM_RIGHT_NULL,
- NULL, DM_RIGHT_NULL, name->name,
- NULL, cdp->i_d.di_mode, 0, 0);
- if (error)
- return XFS_ERROR(error);
- }
-
- /*
- * Get the dquots for the inodes.
- */
- error = XFS_QM_DQATTACH(mp, dp, 0);
- if (!error)
- error = XFS_QM_DQATTACH(mp, cdp, 0);
- if (error) {
- REMOVE_DEBUG_TRACE(__LINE__);
- goto std_return;
- }
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
- cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
- /*
- * We try to get the real space reservation first,
- * allowing for directory btree deletion(s) implying
- * possible bmap insert(s). If we can't get the space
- * reservation then we use 0 instead, and avoid the bmap
- * btree insert(s) in the directory code by, if the bmap
- * insert tries to happen, instead trimming the LAST
- * block from the directory.
- */
- resblks = XFS_REMOVE_SPACE_RES(mp);
- error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
- XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
- if (error == ENOSPC) {
- resblks = 0;
- error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
- XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
- }
- if (error) {
- ASSERT(error != ENOSPC);
- cancel_flags = 0;
- goto error_return;
- }
- XFS_BMAP_INIT(&free_list, &first_block);
-
- /*
- * Now lock the child directory inode and the parent directory
- * inode in the proper order. This will take care of validating
- * that the directory entry for the child directory inode has
- * not changed while we were obtaining a log reservation.
- */
- error = xfs_lock_dir_and_entry(dp, cdp);
- if (error) {
- xfs_trans_cancel(tp, cancel_flags);
- goto std_return;
- }
-
- IHOLD(dp);
- xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
-
- IHOLD(cdp);
- xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL);
-
- ASSERT(cdp->i_d.di_nlink >= 2);
- if (cdp->i_d.di_nlink != 2) {
- error = XFS_ERROR(ENOTEMPTY);
- goto error_return;
- }
- if (!xfs_dir_isempty(cdp)) {
- error = XFS_ERROR(ENOTEMPTY);
- goto error_return;
- }
-
- error = xfs_dir_removename(tp, dp, name, cdp->i_ino,
- &first_block, &free_list, resblks);
- if (error)
- goto error1;
-
- xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-
- /*
- * Bump the in memory generation count on the parent
- * directory so that other can know that it has changed.
- */
- dp->i_gen++;
-
- /*
- * Drop the link from cdp's "..".
- */
- error = xfs_droplink(tp, dp);
- if (error) {
- goto error1;
- }
-
- /*
- * Drop the link from dp to cdp.
- */
- error = xfs_droplink(tp, cdp);
- if (error) {
- goto error1;
- }
-
- /*
- * Drop the "." link from cdp to self.
- */
- error = xfs_droplink(tp, cdp);
- if (error) {
- goto error1;
- }
-
- /* Determine these before committing transaction */
- last_cdp_link = (cdp)->i_d.di_nlink==0;
-
- /*
- * If this is a synchronous mount, make sure that the
- * rmdir transaction goes to disk before returning to
- * the user.
- */
- if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
- xfs_trans_set_sync(tp);
- }
-
- error = xfs_bmap_finish (&tp, &free_list, &committed);
- if (error) {
- xfs_bmap_cancel(&free_list);
- xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
- XFS_TRANS_ABORT));
- goto std_return;
- }
-
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
- if (error) {
- goto std_return;
- }
-
-
- /* Fall through to std_return with error = 0 or the errno
- * from xfs_trans_commit. */
- std_return:
- if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
- (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
- dp, DM_RIGHT_NULL,
- NULL, DM_RIGHT_NULL,
- name->name, NULL, cdp->i_d.di_mode,
- error, 0);
- }
- return error;
-
- error1:
- xfs_bmap_cancel(&free_list);
- cancel_flags |= XFS_TRANS_ABORT;
- /* FALLTHROUGH */
-
- error_return:
- xfs_trans_cancel(tp, cancel_flags);
- goto std_return;
-}
-
-int
xfs_symlink(
xfs_inode_t *dp,
struct xfs_name *link_name,
@@ -3242,7 +2934,6 @@ xfs_finish_reclaim(
{
xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
bhv_vnode_t *vp = XFS_ITOV_NULL(ip);
- int error;
if (vp && VN_BAD(vp))
goto reclaim;
@@ -3285,29 +2976,16 @@ xfs_finish_reclaim(
xfs_iflock(ip);
}
- if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- if (ip->i_update_core ||
- ((ip->i_itemp != NULL) &&
- (ip->i_itemp->ili_format.ilf_fields != 0))) {
- error = xfs_iflush(ip, sync_mode);
- /*
- * If we hit an error, typically because of filesystem
- * shutdown, we don't need to let vn_reclaim to know
- * because we're gonna reclaim the inode anyway.
- */
- if (error) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- goto reclaim;
- }
- xfs_iflock(ip); /* synchronize with xfs_iflush_done */
- }
-
- ASSERT(ip->i_update_core == 0);
- ASSERT(ip->i_itemp == NULL ||
- ip->i_itemp->ili_format.ilf_fields == 0);
+ /*
+ * In the case of a forced shutdown we rely on xfs_iflush() to
+ * wait for the inode to be unpinned before returning an error.
+ */
+ if (xfs_iflush(ip, sync_mode) == 0) {
+ /* synchronize with xfs_iflush_done */
+ xfs_iflock(ip);
+ xfs_ifunlock(ip);
}
- xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
reclaim:
@@ -3418,7 +3096,7 @@ xfs_alloc_file_space(
/* Generate a DMAPI event if needed. */
if (alloc_type != 0 && offset < ip->i_size &&
- (attr_flags&ATTR_DMI) == 0 &&
+ (attr_flags & XFS_ATTR_DMI) == 0 &&
DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
xfs_off_t end_dmi_offset;
@@ -3532,7 +3210,7 @@ retry:
allocatesize_fsb -= allocated_fsb;
}
dmapi_enospc_check:
- if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 &&
+ if (error == ENOSPC && (attr_flags & XFS_ATTR_DMI) == 0 &&
DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) {
error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
ip, DM_RIGHT_NULL,
@@ -3679,7 +3357,7 @@ xfs_free_file_space(
end_dmi_offset = offset + len;
endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
- if (offset < ip->i_size && (attr_flags & ATTR_DMI) == 0 &&
+ if (offset < ip->i_size && (attr_flags & XFS_ATTR_DMI) == 0 &&
DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
if (end_dmi_offset > ip->i_size)
end_dmi_offset = ip->i_size;
@@ -3690,7 +3368,7 @@ xfs_free_file_space(
return error;
}
- if (attr_flags & ATTR_NOLOCK)
+ if (attr_flags & XFS_ATTR_NOLOCK)
need_iolock = 0;
if (need_iolock) {
xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -3867,7 +3545,7 @@ xfs_change_file_space(
xfs_off_t startoffset;
xfs_off_t llen;
xfs_trans_t *tp;
- bhv_vattr_t va;
+ struct iattr iattr;
xfs_itrace_entry(ip);
@@ -3941,10 +3619,10 @@ xfs_change_file_space(
break;
}
- va.va_mask = XFS_AT_SIZE;
- va.va_size = startoffset;
+ iattr.ia_valid = ATTR_SIZE;
+ iattr.ia_size = startoffset;
- error = xfs_setattr(ip, &va, attr_flags, credp);
+ error = xfs_setattr(ip, &iattr, attr_flags, credp);
if (error)
return error;
@@ -3974,7 +3652,7 @@ xfs_change_file_space(
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_ihold(tp, ip);
- if ((attr_flags & ATTR_DMI) == 0) {
+ if ((attr_flags & XFS_ATTR_DMI) == 0) {
ip->i_d.di_mode &= ~S_ISUID;
/*
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 57335ba4ce5..e932a96bec5 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -2,9 +2,9 @@
#define _XFS_VNODEOPS_H 1
struct attrlist_cursor_kern;
-struct bhv_vattr;
struct cred;
struct file;
+struct iattr;
struct inode;
struct iovec;
struct kiocb;
@@ -15,14 +15,18 @@ struct xfs_iomap;
int xfs_open(struct xfs_inode *ip);
-int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags,
+int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags,
struct cred *credp);
+#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */
+#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
+#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */
+
int xfs_readlink(struct xfs_inode *ip, char *link);
int xfs_fsync(struct xfs_inode *ip);
int xfs_release(struct xfs_inode *ip);
int xfs_inactive(struct xfs_inode *ip);
int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
- struct xfs_inode **ipp);
+ struct xfs_inode **ipp, struct xfs_name *ci_name);
int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp);
int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
@@ -31,8 +35,6 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
struct xfs_name *target_name);
int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name,
mode_t mode, struct xfs_inode **ipp, struct cred *credp);
-int xfs_rmdir(struct xfs_inode *dp, struct xfs_name *name,
- struct xfs_inode *cdp);
int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize,
xfs_off_t *offset, filldir_t filldir);
int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,