12 files changed, 381 insertions, 218 deletions
diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.txt
index bf8080640eb..57e0b80a527 100644
--- a/Documentation/filesystems/9p.txt
+++ b/Documentation/filesystems/9p.txt
@@ -18,11 +18,11 @@ the 9p client is available in the form of a USENIX paper:
 
 Other applications are described in the following papers:
 	* XCPU & Clustering
-		http://www.xcpu.org/xcpu-talk.pdf
+		http://xcpu.org/papers/xcpu-talk.pdf
 	* KVMFS: control file system for KVM
-		http://www.xcpu.org/kvmfs.pdf
-	* CellFS: A New ProgrammingModel for the Cell BE
-		http://www.xcpu.org/cellfs-talk.pdf
+		http://xcpu.org/papers/kvmfs.pdf
+	* CellFS: A New Programming Model for the Cell BE
+		http://xcpu.org/papers/cellfs-talk.pdf
 	* PROSE I/O: Using 9p to enable Application Partitions
 		http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf
 
@@ -48,6 +48,7 @@ OPTIONS
                                 (see rfdno and wfdno)
 			virtio	- connect to the next virtio channel available
 				(from lguest or KVM with trans_virtio module)
+			rdma	- connect to a specified RDMA channel
 
   uname=name	user name to attempt mount as on the remote server.  The
   		server may override or ignore this value.  Certain user
@@ -59,16 +60,22 @@ OPTIONS
   cache=mode	specifies a caching policy.  By default, no caches are used.
 			loose = no attempts are made at consistency,
                                 intended for exclusive, read-only mounts
+			fscache = use FS-Cache for a persistent, read-only
+				cache backend.
 
   debug=n	specifies debug level.  The debug level is a bitmask.
-  			0x01 = display verbose error messages
-			0x02 = developer debug (DEBUG_CURRENT)
-			0x04 = display 9p trace
-			0x08 = display VFS trace
-			0x10 = display Marshalling debug
-			0x20 = display RPC debug
-			0x40 = display transport debug
-			0x80 = display allocation debug
+			0x01  = display verbose error messages
+			0x02  = developer debug (DEBUG_CURRENT)
+			0x04  = display 9p trace
+			0x08  = display VFS trace
+			0x10  = display Marshalling debug
+			0x20  = display RPC debug
+			0x40  = display transport debug
+			0x80  = display allocation debug
+			0x100 = display protocol message debug
+			0x200 = display Fid debug
+			0x400 = display packet debug
+			0x800 = display fscache tracing debug
 
   rfdno=n	the file descriptor for reading with trans=fd
 
@@ -100,6 +107,10 @@ OPTIONS
 			any   = v9fs does single attach and performs all
 				operations as one user
 
+  cachetag	cache tag to use the specified persistent cache.
+		cache tags for existing cache sessions can be listed at
+		/sys/fs/9p/caches. (applies only to cache=fscache)
+
 RESOURCES
 =========
 
@@ -118,12 +129,16 @@ and export.
 A Linux version of the 9p server is now maintained under the npfs project
 on sourceforge (http://sourceforge.net/projects/npfs).  The currently
 maintained version is the single-threaded version of the server (named spfs)
-available from the same CVS repository.
+available from the same SVN repository.
 
 There are user and developer mailing lists available through the v9fs project
 on sourceforge (http://sourceforge.net/projects/v9fs).
 
-News and other information is maintained on SWiK (http://swik.net/v9fs).
+A stand-alone version of the module (which should build for any 2.6 kernel)
+is available via (http://github.com/ericvh/9p-sac/tree/master)
+
+News and other information is maintained on SWiK (http://swik.net/v9fs)
+and the Wiki (http://sf.net/apps/mediawiki/v9fs/index.php).
 
 Bug reports may be issued through the kernel.org bugzilla 
 (http://bugzilla.kernel.org)
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 7be02ac5fa3..bf4f4b7e11b 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -134,15 +134,9 @@ ro                   	Mount filesystem read only. Note that ext4 will
                      	mount options "ro,noload" can be used to prevent
 		     	writes to the filesystem.
 
-journal_checksum	Enable checksumming of the journal transactions.
-			This will allow the recovery code in e2fsck and the
-			kernel to detect corruption in the kernel.  It is a
-			compatible change and will be ignored by older kernels.
-
 journal_async_commit	Commit block can be written to disk without waiting
 			for descriptor blocks. If enabled older kernels cannot
-			mount the device. This will enable 'journal_checksum'
-			internally.
+			mount the device.
 
 journal=update		Update the ext4 file system's journal to the current
 			format.
@@ -263,10 +257,18 @@ resuid=n		The user ID which may use the reserved blocks.
 
 sb=n			Use alternate superblock at this location.
 
-quota
-noquota
-grpquota
-usrquota
+quota			These options are ignored by the filesystem. They
+noquota			are used only by quota tools to recognize volumes
+grpquota		where quota should be turned on. See documentation
+usrquota		in the quota-tools package for more details
+			(http://sourceforge.net/projects/linuxquota).
+
+jqfmt=<quota type>	These options tell filesystem details about quota
+usrjquota=<file>	so that quota information can be properly updated
+grpjquota=<file>	during journal replay. They replace the above
+			quota options. See documentation in the quota-tools
+			package for more details
+			(http://sourceforge.net/projects/linuxquota).
 
 bh		(*)	ext4 associates buffer heads to data pages to
 nobh			(a) cache disk block mapping information
@@ -280,9 +282,16 @@ stripe=n		Number of filesystem blocks that mballoc will try
 			to use for allocation size and alignment. For RAID5/6
 			systems this should be the number of data
 			disks *  RAID chunk size in file system blocks.
-delalloc	(*)	Deferring block allocation until write-out time.
-nodelalloc		Disable delayed allocation. Blocks are allocation
-			when data is copied from user to page cache.
+
+delalloc	(*)	Defer block allocation until just before ext4
+			writes out the block(s) in question.  This
+			allows ext4 to better allocation decisions
+			more efficiently.
+nodelalloc		Disable delayed allocation.  Blocks are allocated
+			when the data is copied from userspace to the
+			page cache, either via the write(2) system call
+			or when an mmap'ed page which was previously
+			unallocated is written for the first time.
 
 max_batch_time=usec	Maximum amount of time ext4 should wait for
 			additional filesystem operations to be batch
diff --git a/Documentation/filesystems/gfs2-uevents.txt b/Documentation/filesystems/gfs2-uevents.txt
new file mode 100644
index 00000000000..fd966dc9979
--- /dev/null
+++ b/Documentation/filesystems/gfs2-uevents.txt
@@ -0,0 +1,100 @@
+                              uevents and GFS2
+                             ==================
+
+During the lifetime of a GFS2 mount, a number of uevents are generated.
+This document explains what the events are and what they are used
+for (by gfs_controld in gfs2-utils).
+
+A list of GFS2 uevents
+-----------------------
+
+1. ADD
+
+The ADD event occurs at mount time. It will always be the first
+uevent generated by the newly created filesystem. If the mount
+is successful, an ONLINE uevent will follow.  If it is not successful
+then a REMOVE uevent will follow.
+
+The ADD uevent has two environment variables: SPECTATOR=[0|1]
+and RDONLY=[0|1] that specify the spectator status (a read-only mount
+with no journal assigned), and read-only (with journal assigned) status
+of the filesystem respectively.
+
+2. ONLINE
+
+The ONLINE uevent is generated after a successful mount or remount. It
+has the same environment variables as the ADD uevent. The ONLINE
+uevent, along with the two environment variables for spectator and
+RDONLY are a relatively recent addition (2.6.32-rc+) and will not
+be generated by older kernels.
+
+3. CHANGE
+
+The CHANGE uevent is used in two places. One is when reporting the
+successful mount of the filesystem by the first node (FIRSTMOUNT=Done).
+This is used as a signal by gfs_controld that it is then ok for other
+nodes in the cluster to mount the filesystem.
+
+The other CHANGE uevent is used to inform of the completion
+of journal recovery for one of the filesystems journals. It has
+two environment variables, JID= which specifies the journal id which
+has just been recovered, and RECOVERY=[Done|Failed] to indicate the
+success (or otherwise) of the operation. These uevents are generated
+for every journal recovered, whether it is during the initial mount
+process or as the result of gfs_controld requesting a specific journal
+recovery via the /sys/fs/gfs2/<fsname>/lock_module/recovery file.
+
+Because the CHANGE uevent was used (in early versions of gfs_controld)
+without checking the environment variables to discover the state, we
+cannot add any more functions to it without running the risk of
+someone using an older version of the user tools and breaking their
+cluster. For this reason the ONLINE uevent was used when adding a new
+uevent for a successful mount or remount.
+
+4. OFFLINE
+
+The OFFLINE uevent is only generated due to filesystem errors and is used
+as part of the "withdraw" mechanism. Currently this doesn't give any
+information about what the error is, which is something that needs to
+be fixed.
+
+5. REMOVE
+
+The REMOVE uevent is generated at the end of an unsuccessful mount
+or at the end of a umount of the filesystem. All REMOVE uevents will
+have been preceeded by at least an ADD uevent for the same fileystem,
+and unlike the other uevents is generated automatically by the kernel's
+kobject subsystem.
+
+
+Information common to all GFS2 uevents (uevent environment variables)
+----------------------------------------------------------------------
+
+1. LOCKTABLE=
+
+The LOCKTABLE is a string, as supplied on the mount command
+line (locktable=) or via fstab. It is used as a filesystem label
+as well as providing the information for a lock_dlm mount to be
+able to join the cluster.
+
+2. LOCKPROTO=
+
+The LOCKPROTO is a string, and its value depends on what is set
+on the mount command line, or via fstab. It will be either
+lock_nolock or lock_dlm. In the future other lock managers
+may be supported.
+
+3. JOURNALID=
+
+If a journal is in use by the filesystem (journals are not
+assigned for spectator mounts) then this will give the
+numeric journal id in all GFS2 uevents.
+
+4. UUID=
+
+With recent versions of gfs2-utils, mkfs.gfs2 writes a UUID
+into the filesystem superblock. If it exists, this will
+be included in every uevent relating to the filesystem.
+
+
+
diff --git a/Documentation/filesystems/ncpfs.txt b/Documentation/filesystems/ncpfs.txt
index f12c30c93f2..5af164f4b37 100644
--- a/Documentation/filesystems/ncpfs.txt
+++ b/Documentation/filesystems/ncpfs.txt
@@ -7,6 +7,6 @@ ftp.gwdg.de/pub/linux/misc/ncpfs, but sunsite and its many mirrors
 will have it as well.
 
 Related products are linware and mars_nwe, which will give Linux partial
-NetWare server functionality.  Linware's home site is
-klokan.sh.cvut.cz/pub/linux/linware; mars_nwe can be found on
-ftp.gwdg.de/pub/linux/misc/ncpfs.
+NetWare server functionality.
+
+mars_nwe can be found on ftp.gwdg.de/pub/linux/misc/ncpfs.
diff --git a/Documentation/filesystems/nfs.txt b/Documentation/filesystems/nfs.txt
new file mode 100644
index 00000000000..f50f26ce6cd
--- /dev/null
+++ b/Documentation/filesystems/nfs.txt
@@ -0,0 +1,98 @@
+
+The NFS client
+==============
+
+The NFS version 2 protocol was first documented in RFC1094 (March 1989).
+Since then two more major releases of NFS have been published, with NFSv3
+being documented in RFC1813 (June 1995), and NFSv4 in RFC3530 (April
+2003).
+
+The Linux NFS client currently supports all the above published versions,
+and work is in progress on adding support for minor version 1 of the NFSv4
+protocol.
+
+The purpose of this document is to provide information on some of the
+upcall interfaces that are used in order to provide the NFS client with
+some of the information that it requires in order to fully comply with
+the NFS spec.
+
+The DNS resolver
+================
+
+NFSv4 allows for one server to refer the NFS client to data that has been
+migrated onto another server by means of the special "fs_locations"
+attribute. See
+	http://tools.ietf.org/html/rfc3530#section-6
+and
+	http://tools.ietf.org/html/draft-ietf-nfsv4-referrals-00
+
+The fs_locations information can take the form of either an ip address and
+a path, or a DNS hostname and a path. The latter requires the NFS client to
+do a DNS lookup in order to mount the new volume, and hence the need for an
+upcall to allow userland to provide this service.
+
+Assuming that the user has the 'rpc_pipefs' filesystem mounted in the usual
+/var/lib/nfs/rpc_pipefs, the upcall consists of the following steps:
+
+   (1) The process checks the dns_resolve cache to see if it contains a
+       valid entry. If so, it returns that entry and exits.
+
+   (2) If no valid entry exists, the helper script '/sbin/nfs_cache_getent'
+       (may be changed using the 'nfs.cache_getent' kernel boot parameter)
+       is run, with two arguments:
+		- the cache name, "dns_resolve"
+		- the hostname to resolve
+
+   (3) After looking up the corresponding ip address, the helper script
+       writes the result into the rpc_pipefs pseudo-file
+       '/var/lib/nfs/rpc_pipefs/cache/dns_resolve/channel'
+       in the following (text) format:
+
+		"<ip address> <hostname> <ttl>\n"
+
+       Where <ip address> is in the usual IPv4 (123.456.78.90) or IPv6
+       (ffee:ddcc:bbaa:9988:7766:5544:3322:1100, ffee::1100, ...) format.
+       <hostname> is identical to the second argument of the helper
+       script, and <ttl> is the 'time to live' of this cache entry (in
+       units of seconds).
+
+       Note: If <ip address> is invalid, say the string "0", then a negative
+       entry is created, which will cause the kernel to treat the hostname
+       as having no valid DNS translation.
+
+
+
+
+A basic sample /sbin/nfs_cache_getent
+=====================================
+
+#!/bin/bash
+#
+ttl=600
+#
+cut=/usr/bin/cut
+getent=/usr/bin/getent
+rpc_pipefs=/var/lib/nfs/rpc_pipefs
+#
+die()
+{
+	echo "Usage: $0 cache_name entry_name"
+	exit 1
+}
+
+[ $# -lt 2 ] && die
+cachename="$1"
+cache_path=${rpc_pipefs}/cache/${cachename}/channel
+
+case "${cachename}" in
+	dns_resolve)
+		name="$2"
+		result="$(${getent} hosts ${name} | ${cut} -f1 -d\ )"
+		[ -z "${result}" ] && result="0"
+		;;
+	*)
+		die
+		;;
+esac
+echo "${result} ${name} ${ttl}" >${cache_path}
+
diff --git a/Documentation/filesystems/nfs41-server.txt b/Documentation/filesystems/nfs41-server.txt
index 05d81cbcb2e..5920fe26e6f 100644
--- a/Documentation/filesystems/nfs41-server.txt
+++ b/Documentation/filesystems/nfs41-server.txt
@@ -11,6 +11,11 @@ the /proc/fs/nfsd/versions control file.  Note that to write this
 control file, the nfsd service must be taken down.  Use your user-mode
 nfs-utils to set this up; see rpc.nfsd(8)
 
+(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
+"-4", respectively.  Therefore, code meant to work on both new and old
+kernels must turn 4.1 on or off *before* turning support for version 4
+on or off; rpc.nfsd does this correctly.)
+
 The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
 on the latest NFSv4.1 Internet Draft:
 http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29
@@ -25,6 +30,49 @@ are still under development out of tree.
 See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design
 for more information.
 
+The current implementation is intended for developers only: while it
+does support ordinary file operations on clients we have tested against
+(including the linux client), it is incomplete in ways which may limit
+features unexpectedly, cause known bugs in rare cases, or cause
+interoperability problems with future clients.  Known issues:
+
+	- gss support is questionable: currently mounts with kerberos
+	  from a linux client are possible, but we aren't really
+	  conformant with the spec (for example, we don't use kerberos
+	  on the backchannel correctly).
+	- no trunking support: no clients currently take advantage of
+	  trunking, but this is a mandatory failure, and its use is
+	  recommended to clients in a number of places.  (E.g. to ensure
+	  timely renewal in case an existing connection's retry timeouts
+	  have gotten too long; see section 8.3 of the draft.)
+	  Therefore, lack of this feature may cause future clients to
+	  fail.
+	- Incomplete backchannel support: incomplete backchannel gss
+	  support and no support for BACKCHANNEL_CTL mean that
+	  callbacks (hence delegations and layouts) may not be
+	  available and clients confused by the incomplete
+	  implementation may fail.
+	- Server reboot recovery is unsupported; if the server reboots,
+	  clients may fail.
+	- We do not support SSV, which provides security for shared
+	  client-server state (thus preventing unauthorized tampering
+	  with locks and opens, for example).  It is mandatory for
+	  servers to support this, though no clients use it yet.
+	- Mandatory operations which we do not support, such as
+	  DESTROY_CLIENTID, FREE_STATEID, SECINFO_NO_NAME, and
+	  TEST_STATEID, are not currently used by clients, but will be
+	  (and the spec recommends their uses in common cases), and
+	  clients should not be expected to know how to recover from the
+	  case where they are not supported.  This will eventually cause
+	  interoperability failures.
+
+In addition, some limitations are inherited from the current NFSv4
+implementation:
+
+	- Incomplete delegation enforcement: if a file is renamed or
+	  unlinked, a client holding a delegation may continue to
+	  indefinitely allow opens of the file under the old name.
+
 The table below, taken from the NFSv4.1 document, lists
 the operations that are mandatory to implement (REQ), optional
 (OPT), and NFSv4.0 operations that are required not to implement (MNI)
@@ -142,6 +190,12 @@ NS*| CB_WANTS_CANCELLED      | OPT       | FDELG,      | Section 20.10 |
 
 Implementation notes:
 
+DELEGPURGE:
+* mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
+  CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
+  persist across client reboots).  Thus we need not implement this for
+  now.
+
 EXCHANGE_ID:
 * only SP4_NONE state protection supported
 * implementation ids are ignored
diff --git a/Documentation/filesystems/nfsroot.txt b/Documentation/filesystems/nfsroot.txt
index 68baddf3c3e..3ba0b945aaf 100644
--- a/Documentation/filesystems/nfsroot.txt
+++ b/Documentation/filesystems/nfsroot.txt
@@ -105,7 +105,7 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>
 		the client address and this parameter is NOT empty only
 		replies from the specified server are accepted.
 
-		Only required for for NFS root. That is autoconfiguration
+		Only required for NFS root. That is autoconfiguration
 		will not be triggered if it is missing and NFS root is not
 		in operation.
 
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index ffead13f944..2c48f945546 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -176,6 +176,7 @@ read the file /proc/PID/status:
   CapBnd: ffffffffffffffff
   voluntary_ctxt_switches:        0
   nonvoluntary_ctxt_switches:     1
+  Stack usage:    12 kB
 
 This shows you nearly the same information you would get if you viewed it with
 the ps  command.  In  fact,  ps  uses  the  proc  file  system  to  obtain its
@@ -229,6 +230,7 @@ Table 1-2: Contents of the statm files (as of 2.6.30-rc7)
  Mems_allowed_list           Same as previous, but in "list format"
  voluntary_ctxt_switches     number of voluntary context switches
  nonvoluntary_ctxt_switches  number of non voluntary context switches
+ Stack usage:                stack usage high water mark (round up to page size)
 ..............................................................................
 
 Table 1-3: Contents of the statm files (as of 2.6.8-rc3)
@@ -307,7 +309,7 @@ address           perms offset  dev   inode      pathname
 08049000-0804a000 rw-p 00001000 03:00 8312       /opt/test
 0804a000-0806b000 rw-p 00000000 00:00 0          [heap]
 a7cb1000-a7cb2000 ---p 00000000 00:00 0
-a7cb2000-a7eb2000 rw-p 00000000 00:00 0
+a7cb2000-a7eb2000 rw-p 00000000 00:00 0          [threadstack:001ff4b4]
 a7eb2000-a7eb3000 ---p 00000000 00:00 0
 a7eb3000-a7ed5000 rw-p 00000000 00:00 0
 a7ed5000-a8008000 r-xp 00000000 03:00 4222       /lib/libc.so.6
@@ -343,6 +345,7 @@ is not associated with a file:
  [stack]                  = the stack of the main process
  [vdso]                   = the "virtual dynamic shared object",
                             the kernel system call handler
+ [threadstack:xxxxxxxx]   = the stack of the thread, xxxxxxxx is the stack size
 
  or if empty, the mapping is anonymous.
 
@@ -375,6 +378,19 @@ of memory currently marked as referenced or accessed.
 This file is only present if the CONFIG_MMU kernel configuration option is
 enabled.
 
+The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG
+bits on both physical and virtual pages associated with a process.
+To clear the bits for all the pages associated with the process
+    > echo 1 > /proc/PID/clear_refs
+
+To clear the bits for the anonymous pages associated with the process
+    > echo 2 > /proc/PID/clear_refs
+
+To clear the bits for the file mapped pages associated with the process
+    > echo 3 > /proc/PID/clear_refs
+Any other value written to /proc/PID/clear_refs will have no effect.
+
+
 1.2 Kernel data
 ---------------
 
@@ -1032,9 +1048,9 @@ Various pieces   of  information about  kernel activity  are  available in the
 since the system first booted.  For a quick look, simply cat the file:
 
   > cat /proc/stat
-  cpu  2255 34 2290 22625563 6290 127 456 0
-  cpu0 1132 34 1441 11311718 3675 127 438 0
-  cpu1 1123 0 849 11313845 2614 0 18 0
+  cpu  2255 34 2290 22625563 6290 127 456 0 0
+  cpu0 1132 34 1441 11311718 3675 127 438 0 0
+  cpu1 1123 0 849 11313845 2614 0 18 0 0
   intr 114930548 113199788 3 0 5 263 0 4 [... lots more numbers ...]
   ctxt 1990473
   btime 1062191376
@@ -1056,6 +1072,7 @@ second).  The meanings of the columns are as follows, from left to right:
 - irq: servicing interrupts
 - softirq: servicing softirqs
 - steal: involuntary wait
+- guest: running a guest
 
 The "intr" line gives counts of interrupts  serviced since boot time, for each
 of the  possible system interrupts.   The first  column  is the  total of  all
@@ -1096,7 +1113,6 @@ Table 1-12: Files in /proc/fs/ext4/<devname>
 ..............................................................................
  File            Content                                        
  mb_groups       details of multiblock allocator buddy cache of free blocks
- mb_history      multiblock allocation history
 ..............................................................................
 
 
@@ -1191,7 +1207,7 @@ The following heuristics are then applied:
  * if the task was reniced, its score doubles
  * superuser or direct hardware access tasks (CAP_SYS_ADMIN, CAP_SYS_RESOURCE
  	or CAP_SYS_RAWIO) have their score divided by 4
- * if oom condition happened in one cpuset and checked task does not belong
+ * if oom condition happened in one cpuset and checked process does not belong
  	to it, its score is divided by 8
  * the resulting score is multiplied by two to the power of oom_adj, i.e.
 	points <<= oom_adj when it is positive and
diff --git a/Documentation/filesystems/seq_file.txt b/Documentation/filesystems/seq_file.txt
index b843743aa0b..0d15ebccf5b 100644
--- a/Documentation/filesystems/seq_file.txt
+++ b/Documentation/filesystems/seq_file.txt
@@ -46,7 +46,7 @@ better to do. The file is seekable, in that one can do something like the
 following:
 
     dd if=/proc/sequence of=out1 count=1
-    dd if=/proc/sequence skip=1 out=out2 count=1
+    dd if=/proc/sequence skip=1 of=out2 count=1
 
 Then concatenate the output files out1 and out2 and get the right
 result. Yes, it is a thoroughly useless module, but the point is to show
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt
index 736540045dc..23a181074f9 100644
--- a/Documentation/filesystems/sharedsubtree.txt
+++ b/Documentation/filesystems/sharedsubtree.txt
@@ -4,7 +4,7 @@ Shared Subtrees
 Contents:
 	1) Overview
 	2) Features
-	3) smount command
+	3) Setting mount states
 	4) Use-case
 	5) Detailed semantics
 	6) Quiz
@@ -41,14 +41,14 @@ replicas continue to be exactly same.
 
 	Here is an example:
 
-	Lets say /mnt has a mount that is shared.
+	Let's say /mnt has a mount that is shared.
 	mount --make-shared /mnt
 
-	note: mount command does not yet support the --make-shared flag.
-	I have included a small C program which does the same by executing
-	'smount /mnt shared'
+	Note: mount(8) command now supports the --make-shared flag,
+	so the sample 'smount' program is no longer needed and has been
+	removed.
 
-	#mount --bind /mnt /tmp
+	# mount --bind /mnt /tmp
 	The above command replicates the mount at /mnt to the mountpoint /tmp
 	and the contents of both the mounts remain identical.
 
@@ -58,8 +58,8 @@ replicas continue to be exactly same.
 	#ls /tmp
 	a b c
 
-	Now lets say we mount a device at /tmp/a
-	#mount /dev/sd0  /tmp/a
+	Now let's say we mount a device at /tmp/a
+	# mount /dev/sd0  /tmp/a
 
 	#ls /tmp/a
 	t1 t2 t2
@@ -80,21 +80,20 @@ replicas continue to be exactly same.
 
 	Here is an example:
 
-	Lets say /mnt has a mount which is shared.
-	#mount --make-shared /mnt
+	Let's say /mnt has a mount which is shared.
+	# mount --make-shared /mnt
 
-	Lets bind mount /mnt to /tmp
-	#mount --bind /mnt /tmp
+	Let's bind mount /mnt to /tmp
+	# mount --bind /mnt /tmp
 
 	the new mount at /tmp becomes a shared mount and it is a replica of
 	the mount at /mnt.
 
-	Now lets make the mount at /tmp; a slave of /mnt
-	#mount --make-slave /tmp
-	[or smount /tmp slave]
+	Now let's make the mount at /tmp; a slave of /mnt
+	# mount --make-slave /tmp
 
-	lets mount /dev/sd0 on /mnt/a
-	#mount /dev/sd0 /mnt/a
+	let's mount /dev/sd0 on /mnt/a
+	# mount /dev/sd0 /mnt/a
 
 	#ls /mnt/a
 	t1 t2 t3
@@ -104,9 +103,9 @@ replicas continue to be exactly same.
 
 	Note the mount event has propagated to the mount at /tmp
 
-	However lets see what happens if we mount something on the mount at /tmp
+	However let's see what happens if we mount something on the mount at /tmp
 
-	#mount /dev/sd1 /tmp/b
+	# mount /dev/sd1 /tmp/b
 
 	#ls /tmp/b
 	s1 s2 s3
@@ -124,12 +123,11 @@ replicas continue to be exactly same.
 
 2d) A unbindable mount is a unbindable private mount
 
-	lets say we have a mount at /mnt and we make is unbindable
+	let's say we have a mount at /mnt and we make is unbindable
 
-	#mount --make-unbindable /mnt
-	 [ smount /mnt  unbindable ]
+	# mount --make-unbindable /mnt
 
-	 Lets try to bind mount this mount somewhere else.
+	 Let's try to bind mount this mount somewhere else.
 	 # mount --bind /mnt /tmp
 	 mount: wrong fs type, bad option, bad superblock on /mnt,
 	        or too many mounted file systems
@@ -137,149 +135,15 @@ replicas continue to be exactly same.
 	Binding a unbindable mount is a invalid operation.
 
 
-3) smount command
+3) Setting mount states
 
-	Currently the mount command is not aware of shared subtree features.
-	Work is in progress to add the support in mount ( util-linux package ).
-	Till then use the following program.
+	The mount command (util-linux package) can be used to set mount
+	states:
 
-	------------------------------------------------------------------------
-	//
-	//this code was developed my Miklos Szeredi <miklos@szeredi.hu>
-	//and modified by Ram Pai <linuxram@us.ibm.com>
-	// sample usage:
-	//              smount /tmp shared
-	//
-	#include <stdio.h>
-	#include <stdlib.h>
-	#include <unistd.h>
-	#include <string.h>
-	#include <sys/mount.h>
-	#include <sys/fsuid.h>
-
-	#ifndef MS_REC
-	#define MS_REC		0x4000	/* 16384: Recursive loopback */
-	#endif
-
-	#ifndef MS_SHARED
-	#define MS_SHARED		1<<20	/* Shared */
-	#endif
-
-	#ifndef MS_PRIVATE
-	#define MS_PRIVATE		1<<18	/* Private */
-	#endif
-
-	#ifndef MS_SLAVE
-	#define MS_SLAVE		1<<19	/* Slave */
-	#endif
-
-	#ifndef MS_UNBINDABLE
-	#define MS_UNBINDABLE		1<<17	/* Unbindable */
-	#endif
-
-	int main(int argc, char *argv[])
-	{
-		int type;
-		if(argc != 3) {
-			fprintf(stderr, "usage: %s dir "
-			"<rshared|rslave|rprivate|runbindable|shared|slave"
-			"|private|unbindable>\n" , argv[0]);
-			return 1;
-		}
-
-		fprintf(stdout, "%s %s %s\n", argv[0], argv[1], argv[2]);
-
-		if (strcmp(argv[2],"rshared")==0)
-			type=(MS_SHARED|MS_REC);
-		else if (strcmp(argv[2],"rslave")==0)
-			type=(MS_SLAVE|MS_REC);
-		else if (strcmp(argv[2],"rprivate")==0)
-			type=(MS_PRIVATE|MS_REC);
-		else if (strcmp(argv[2],"runbindable")==0)
-			type=(MS_UNBINDABLE|MS_REC);
-		else if (strcmp(argv[2],"shared")==0)
-			type=MS_SHARED;
-		else if (strcmp(argv[2],"slave")==0)
-			type=MS_SLAVE;
-		else if (strcmp(argv[2],"private")==0)
-			type=MS_PRIVATE;
-		else if (strcmp(argv[2],"unbindable")==0)
-			type=MS_UNBINDABLE;
-		else {
-			fprintf(stderr, "invalid operation: %s\n", argv[2]);
-			return 1;
-		}
-		setfsuid(getuid());
-
-		if(mount("", argv[1], "dontcare", type, "") == -1) {
-			perror("mount");
-			return 1;
-		}
-		return 0;
-	}
-	-----------------------------------------------------------------------
-
-	Copy the above code snippet into smount.c
-	gcc -o smount smount.c
-
-
-	(i) To mark all the mounts under /mnt as shared execute the following
-	command:
-
-	 	smount /mnt rshared
-		the corresponding syntax planned for mount command is
-		mount --make-rshared /mnt
-
-	    just to mark a mount /mnt as shared, execute the following
-	    command:
-	 	smount /mnt shared
-		the corresponding syntax planned for mount command is
-		mount --make-shared /mnt
-
-	(ii) To mark all the shared mounts under /mnt as slave execute the
-	following
-
-	     command:
-		smount /mnt rslave
-		the corresponding syntax planned for mount command is
-		mount --make-rslave /mnt
-
-	    just to mark a mount /mnt as slave, execute the following
-	    command:
-	 	smount /mnt slave
-		the corresponding syntax planned for mount command is
-		mount --make-slave /mnt
-
-	(iii) To mark all the mounts under /mnt as private execute the
-	following command:
-
-		smount /mnt rprivate
-		the corresponding syntax planned for mount command is
-		mount --make-rprivate /mnt
-
-	    just to mark a mount /mnt as private, execute the following
-	    command:
-	 	smount /mnt private
-		the corresponding syntax planned for mount command is
-		mount --make-private /mnt
-
-	      NOTE: by default all the mounts are created as private. But if
-	      you want to change some shared/slave/unbindable  mount as
-	      private at a later point in time, this command can help.
-
-	(iv) To mark all the mounts under /mnt as unbindable execute the
-	following
-
-	     command:
-		smount /mnt runbindable
-		the corresponding syntax planned for mount command is
-		mount --make-runbindable /mnt
-
-	    just to mark a mount /mnt as unbindable, execute the following
-	    command:
-	 	smount /mnt unbindable
-		the corresponding syntax planned for mount command is
-		mount --make-unbindable /mnt
+	mount --make-shared mountpoint
+	mount --make-slave mountpoint
+	mount --make-private mountpoint
+	mount --make-unbindable mountpoint
 
 
 4) Use cases
@@ -350,7 +214,7 @@ replicas continue to be exactly same.
 		mount --rbind / /view/v3
 		mount --rbind / /view/v4
 
-		and if /usr has a versioning filesystem mounted, than that
+		and if /usr has a versioning filesystem mounted, then that
 		mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and
 		/view/v4/usr too
 
@@ -390,7 +254,7 @@ replicas continue to be exactly same.
 
 		For example:
 			mount --make-shared /mnt
-			mount --bin /mnt /tmp
+			mount --bind /mnt /tmp
 
 		The mount at /mnt and that at /tmp are both shared and belong
 		to the same peer group. Anything mounted or unmounted under
@@ -558,7 +422,7 @@ replicas continue to be exactly same.
 	then the subtree under the unbindable mount is pruned in the new
 	location.
 
-	eg: lets say we have the following mount tree.
+	eg: let's say we have the following mount tree.
 
 		A
 	      /   \
@@ -566,7 +430,7 @@ replicas continue to be exactly same.
 	     / \ / \
 	     D E F G
 
-	     Lets say all the mount except the mount C in the tree are
+	     Let's say all the mount except the mount C in the tree are
 	     of a type other than unbindable.
 
 	     If this tree is rbound to say Z
@@ -683,13 +547,13 @@ replicas continue to be exactly same.
 	'b' on mounts that receive propagation from mount 'B' and does not have
 	sub-mounts within them are unmounted.
 
-	Example: Lets say 'B1', 'B2', 'B3' are shared mounts that propagate to
+	Example: Let's say 'B1', 'B2', 'B3' are shared mounts that propagate to
 	each other.
 
-	lets say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount
+	let's say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount
 	'B1', 'B2' and 'B3' respectively.
 
-	lets say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on
+	let's say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on
 	mount 'B1', 'B2' and 'B3' respectively.
 
 	if 'C1' is unmounted, all the mounts that are most-recently-mounted on
@@ -710,7 +574,7 @@ replicas continue to be exactly same.
 	A cloned namespace contains all the mounts as that of the parent
 	namespace.
 
-	Lets say 'A' and 'B' are the corresponding mounts in the parent and the
+	Let's say 'A' and 'B' are the corresponding mounts in the parent and the
 	child namespace.
 
 	If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to
@@ -759,11 +623,11 @@ replicas continue to be exactly same.
 		mount --make-slave /mnt
 
 		At this point we have the first mount at /tmp and
-		its root dentry is 1. Lets call this mount 'A'
+		its root dentry is 1. Let's call this mount 'A'
 		And then we have a second mount at /tmp1 with root
-		dentry 2. Lets call this mount 'B'
+		dentry 2. Let's call this mount 'B'
 		Next we have a third mount at /mnt with root dentry
-		mnt. Lets call this mount 'C'
+		mnt. Let's call this mount 'C'
 
 		'B' is the slave of 'A' and 'C' is a slave of 'B'
 		A -> B -> C
@@ -794,7 +658,7 @@ replicas continue to be exactly same.
 
 	Q3 Why is unbindable mount needed?
 
-		Lets say we want to replicate the mount tree at multiple
+		Let's say we want to replicate the mount tree at multiple
 		locations within the same subtree.
 
 		if one rbind mounts a tree within the same subtree 'n' times
@@ -803,7 +667,7 @@ replicas continue to be exactly same.
 		mounts. Here is a example.
 
 		step 1:
-		   lets say the root tree has just two directories with
+		   let's say the root tree has just two directories with
 		   one vfsmount.
 				    root
 				   /    \
@@ -875,7 +739,7 @@ replicas continue to be exactly same.
 		Unclonable mounts come in handy here.
 
 		step 1:
-		   lets say the root tree has just two directories with
+		   let's say the root tree has just two directories with
 		   one vfsmount.
 				    root
 				   /    \
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index b58b84b50fa..eed520fd0c8 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -102,7 +102,7 @@ shortname=lower|win95|winnt|mixed
 		 winnt: emulate the Windows NT rule for display/create.
 		 mixed: emulate the Windows NT rule for display,
 			emulate the Windows 95 rule for create.
-		 Default setting is `lower'.
+		 Default setting is `mixed'.
 
 tz=UTC        -- Interpret timestamps as UTC rather than local time.
                  This option disables the conversion of timestamps
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index f49eecf2e57..623f094c9d8 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -536,6 +536,7 @@ struct address_space_operations {
 	/* migrate the contents of a page to the specified target */
 	int (*migratepage) (struct page *, struct page *);
 	int (*launder_page) (struct page *);
+	int (*error_remove_page) (struct mapping *mapping, struct page *page);
 };
 
   writepage: called by the VM to write a dirty page to backing store.
@@ -694,6 +695,12 @@ struct address_space_operations {
   	prevent redirtying the page, it is kept locked during the whole
 	operation.
 
+  error_remove_page: normally set to generic_error_remove_page if truncation
+	is ok for this address space. Used for memory failure handling.
+	Setting this implies you deal with pages going away under you,
+	unless you have them locked or reference counts increased.
+
+
 The File Object
 ===============