From b5934c531849ff4a51ce0f290141efe564290e40 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:25 -0800 Subject: mm: add_active_or_unevictable into rmap lru_cache_add_active_or_unevictable() and page_add_new_anon_rmap() always appear together. Save some symbol table space and some jumping around by removing lru_cache_add_active_or_unevictable(), folding its code into page_add_new_anon_rmap(): like how we add file pages to lru just after adding them to page cache. Remove the nearby "TODO: is this safe?" comments (yes, it is safe), and change page_add_new_anon_rmap()'s address BUG_ON to VM_BUG_ON as originally intended. Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Cc: Lee Schermerhorn Cc: Nick Piggin Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index a3af95b2cb6..48f309dc5a0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -174,8 +174,6 @@ extern unsigned int nr_free_pagecache_pages(void); /* linux/mm/swap.c */ extern void __lru_cache_add(struct page *, enum lru_list lru); extern void lru_cache_add_lru(struct page *, enum lru_list lru); -extern void lru_cache_add_active_or_unevictable(struct page *, - struct vm_area_struct *); extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); -- cgit From 7b1fe59793e61f826bef053107b57b23954833bb Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:34 -0800 Subject: mm: reuse_swap_page replaces can_share_swap_page A good place to free up old swap is where do_wp_page(), or do_swap_page(), is about to redirty the page: the data on disk is then stale and won't be read again; and if we do decide to write the page out later, using the previous swap location makes an unnecessary disk seek very likely. So give can_share_swap_page() the side-effect of delete_from_swap_cache() when it safely can. And can_share_swap_page() was always a misleading name, the more so if it has a side-effect: rename it reuse_swap_page(). Irrelevant cleanup nearby: remove swap_token_default_timeout definition from swap.h: it's used nowhere. Signed-off-by: Hugh Dickins Cc: Lee Schermerhorn Acked-by: Rik van Riel Cc: Nick Piggin Cc: KAMEZAWA Hiroyuki Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index 48f309dc5a0..366556c5b14 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -304,7 +304,7 @@ extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t); extern sector_t swapdev_block(int, pgoff_t); extern struct swap_info_struct *get_swap_info_struct(unsigned); -extern int can_share_swap_page(struct page *); +extern int reuse_swap_page(struct page *); extern int remove_exclusive_swap_page(struct page *); extern int remove_exclusive_swap_page_ref(struct page *); struct backing_dev_info; @@ -372,8 +372,6 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp) return NULL; } -#define can_share_swap_page(p) (page_mapcount(p) == 1) - static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) { @@ -388,7 +386,7 @@ static inline void delete_from_swap_cache(struct page *page) { } -#define swap_token_default_timeout 0 +#define reuse_swap_page(page) (page_mapcount(page) == 1) static inline int remove_exclusive_swap_page(struct page *p) { -- cgit From a2c43eed8334e878702fca713b212ae2a11d84b9 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:36 -0800 Subject: mm: try_to_free_swap replaces remove_exclusive_swap_page remove_exclusive_swap_page(): its problem is in living up to its name. It doesn't matter if someone else has a reference to the page (raised page_count); it doesn't matter if the page is mapped into userspace (raised page_mapcount - though that hints it may be worth keeping the swap): all that matters is that there be no more references to the swap (and no writeback in progress). swapoff (try_to_unuse) has been removing pages from swapcache for years, with no concern for page count or page mapcount, and we used to have a comment in lookup_swap_cache() recognizing that: if you go for a page of swapcache, you'll get the right page, but it could have been removed from swapcache by the time you get page lock. So, give up asking for exclusivity: get rid of remove_exclusive_swap_page(), and remove_exclusive_swap_page_ref() and remove_exclusive_swap_page_count() which were spawned for the recent LRU work: replace them by the simpler try_to_free_swap() which just checks page_swapcount(). Similarly, remove the page_count limitation from free_swap_and_count(), but assume that it's worth holding on to the swap if page is mapped and swap nowhere near full. Add a vm_swap_full() test in free_swap_cache()? It would be consistent, but I think we probably have enough for now. Signed-off-by: Hugh Dickins Cc: Lee Schermerhorn Cc: Rik van Riel Cc: Nick Piggin Cc: KAMEZAWA Hiroyuki Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index 366556c5b14..c3ecd478840 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -305,8 +305,7 @@ extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t); extern sector_t swapdev_block(int, pgoff_t); extern struct swap_info_struct *get_swap_info_struct(unsigned); extern int reuse_swap_page(struct page *); -extern int remove_exclusive_swap_page(struct page *); -extern int remove_exclusive_swap_page_ref(struct page *); +extern int try_to_free_swap(struct page *); struct backing_dev_info; /* linux/mm/thrash.c */ @@ -388,12 +387,7 @@ static inline void delete_from_swap_cache(struct page *page) #define reuse_swap_page(page) (page_mapcount(page) == 1) -static inline int remove_exclusive_swap_page(struct page *p) -{ - return 0; -} - -static inline int remove_exclusive_swap_page_ref(struct page *page) +static inline int try_to_free_swap(struct page *page) { return 0; } -- cgit From ac47b003d03c2a4f28aef1d505b66d24ad191c4f Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:39 -0800 Subject: mm: remove gfp_mask from add_to_swap Remove gfp_mask argument from add_to_swap(): it's misleading because its only caller, shrink_page_list(), is not atomic at that point; and in due course (implementing discard) we'll sometimes want to allocate some memory with GFP_NOIO (as is used in swap_writepage) when allocating swap. No change to the gfp_mask passed down to add_to_swap_cache(): still use __GFP_HIGH without __GFP_WAIT (with nomemalloc and nowarn as before): though it's not obvious if that's the best combination to ask for here. Signed-off-by: Hugh Dickins Cc: Lee Schermerhorn Cc: Rik van Riel Cc: Nick Piggin Cc: KAMEZAWA Hiroyuki Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index c3ecd478840..c38bd157695 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -278,7 +278,7 @@ extern void end_swap_bio_read(struct bio *bio, int err); extern struct address_space swapper_space; #define total_swapcache_pages swapper_space.nrpages extern void show_swap_cache_info(void); -extern int add_to_swap(struct page *, gfp_t); +extern int add_to_swap(struct page *); extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); extern void __delete_from_swap_cache(struct page *); extern void delete_from_swap_cache(struct page *); -- cgit From 60371d971a3d01afd102f0bbf2681f32ecc31d78 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:40 -0800 Subject: mm: add add_to_swap stub If we add a failing stub for add_to_swap(), then we can remove the #ifdef CONFIG_SWAP from mm/vmscan.c. This was intended as a source cleanup, but looking more closely, it turns out that the !CONFIG_SWAP case was going to keep_locked for an anonymous page, whereas now it goes to the more suitable activate_locked, like the CONFIG_SWAP nr_swap_pages 0 case. Signed-off-by: Hugh Dickins Cc: Lee Schermerhorn Acked-by: Rik van Riel Cc: Nick Piggin Cc: KAMEZAWA Hiroyuki Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index c38bd157695..c0d23ac710d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -371,6 +371,11 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp) return NULL; } +static inline int add_to_swap(struct page *page) +{ + return 0; +} + static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) { -- cgit From b962716b459505a8d83aea313fea0abe76749f42 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:41 -0800 Subject: mm: optimize get_scan_ratio for no swap Rik suggests a simplified get_scan_ratio() for !CONFIG_SWAP. Yes, the gcc optimizer gives us that, when nr_swap_pages is #defined as 0L. Move usual declaration to swapfile.c: it never belonged in page_alloc.c. Signed-off-by: Hugh Dickins Cc: Lee Schermerhorn Acked-by: Rik van Riel Cc: Nick Piggin Cc: KAMEZAWA Hiroyuki Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index c0d23ac710d..3a31cc25bd2 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -163,7 +163,6 @@ struct swap_list_t { /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; extern unsigned long totalreserve_pages; -extern long nr_swap_pages; extern unsigned int nr_free_buffer_pages(void); extern unsigned int nr_free_pagecache_pages(void); @@ -291,6 +290,7 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr); /* linux/mm/swapfile.c */ +extern long nr_swap_pages; extern long total_swap_pages; extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); @@ -331,7 +331,8 @@ static inline void disable_swap_token(void) #else /* CONFIG_SWAP */ -#define total_swap_pages 0 +#define nr_swap_pages 0L +#define total_swap_pages 0L #define total_swapcache_pages 0UL #define si_swapinfo(val) \ -- cgit From 22c6f8fdb31993cf49bdd4a47b64a7002391e1c7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:48 -0800 Subject: swapfile: remove SWP_ACTIVE mask Remove the SWP_ACTIVE mask: it just obscures the SWP_WRITEOK flag. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index 3a31cc25bd2..410c8e47372 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -120,7 +120,6 @@ struct swap_extent { enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ - SWP_ACTIVE = (SWP_USED | SWP_WRITEOK), /* add others here before... */ SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ }; -- cgit From ebebbbe904634b0ca1c674457b399f68db5e05b1 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:50 -0800 Subject: swapfile: rearrange scan and swap_info Before making functional changes, rearrange scan_swap_map() to simplify subsequent diffs. Actually, there is one functional change in there: leave cluster_nr negative while scanning for a new cluster - resetting it early increased the likelihood that when we have difficulty finding a free cluster, another task may come in and try doing exactly the same - just a waste of cpu. Before making functional changes, rearrange struct swap_info_struct slightly: flags will be needed as an unsigned long (for wait_on_bit), next is a good int to pair with prio, old_block_size is uninteresting so shift it to the end. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index 410c8e47372..9cabb8b21ab 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -133,14 +133,14 @@ enum { * The in-memory structure used to track swap areas. */ struct swap_info_struct { - unsigned int flags; + unsigned long flags; int prio; /* swap priority */ + int next; /* next entry on swap list */ struct file *swap_file; struct block_device *bdev; struct list_head extent_list; struct swap_extent *curr_swap_extent; - unsigned old_block_size; - unsigned short * swap_map; + unsigned short *swap_map; unsigned int lowest_bit; unsigned int highest_bit; unsigned int cluster_next; @@ -148,7 +148,7 @@ struct swap_info_struct { unsigned int pages; unsigned int max; unsigned int inuse_pages; - int next; /* next entry on swap list */ + unsigned int old_block_size; }; struct swap_list_t { -- cgit From 6a6ba83175c029c7820765bae44692266b29e67a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:51 -0800 Subject: swapfile: swapon use discard (trim) When adding swap, all the old data on swap can be forgotten: sys_swapon() discard all but the header page of the swap partition (or every extent but the header of the swap file), to give a solidstate swap device the opportunity to optimize its wear-levelling. If that succeeds, note SWP_DISCARDABLE for later use, and report it with a "D" at the right end of the kernel's "Adding ... swap" message. Perhaps something should be shown in /proc/swaps (swapon -s), but we have to be more cautious before making any addition to that format. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Nick Piggin Cc: David Woodhouse Cc: Jens Axboe Cc: Matthew Wilcox Cc: Joern Engel Cc: James Bottomley Cc: Donjun Shin Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index 9cabb8b21ab..0b9210ea96c 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -120,6 +120,7 @@ struct swap_extent { enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ + SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */ /* add others here before... */ SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ }; -- cgit From 7992fde72ce06c73280a1939b7a1e903bc95ef85 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:53 -0800 Subject: swapfile: swap allocation use discard When scan_swap_map() finds a free cluster of swap pages to allocate, discard the old contents of the cluster if the device supports discard. But don't bother when swap is so fragmented that we allocate single pages. Be careful about racing allocations made while we're scanning for a cluster; and hold up allocations made while we're discarding. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Nick Piggin Cc: David Woodhouse Cc: Jens Axboe Cc: Matthew Wilcox Cc: Joern Engel Cc: James Bottomley Cc: Donjun Shin Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index 0b9210ea96c..fe79f44c858 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -121,6 +121,7 @@ enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */ + SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ /* add others here before... */ SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ }; @@ -144,6 +145,8 @@ struct swap_info_struct { unsigned short *swap_map; unsigned int lowest_bit; unsigned int highest_bit; + unsigned int lowest_alloc; /* while preparing discard cluster */ + unsigned int highest_alloc; /* while preparing discard cluster */ unsigned int cluster_next; unsigned int cluster_nr; unsigned int pages; -- cgit From 20137a490f397d9c01fc9fadd83a8d198bda4477 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:54 -0800 Subject: swapfile: swapon randomize if nonrot Swap allocation has always started from the beginning of the swap area; but if we're dealing with a solidstate swap device which can only remap blocks within limited zones, that would sooner wear out the first zone. Therefore sys_swapon() test whether blk_queue is non-rotational, and if so randomize the cluster_next starting position for allocation. If blk_queue is nonrot, note SWP_SOLIDSTATE for later use, and report it with an "SS" at the right end of the kernel's "Adding ... swap" message (so that if it's both nonrot and discardable, "SSD" will be shown there). Perhaps something should be shown in /proc/swaps (swapon -s), but we have to be more cautious before making any addition to that format. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Nick Piggin Cc: David Woodhouse Cc: Jens Axboe Cc: Matthew Wilcox Cc: Joern Engel Cc: James Bottomley Cc: Donjun Shin Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index fe79f44c858..cbf7fbed3df 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -122,6 +122,7 @@ enum { SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */ SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ + SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */ /* add others here before... */ SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ }; -- cgit From 2509ef26db4699a5d9fa876e90ddfc107afcab84 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:40:10 -0800 Subject: badpage: zap print_bad_pte on swap and file Complete zap_pte_range()'s coverage of bad pagetable entries by calling print_bad_pte() on a pte_file in a linear vma and on a bad swap entry. That needs free_swap_and_cache() to tell it, which will also have shown one of those "swap_free" errors (but with much less information). Similar checks in fork's copy_one_pte()? No, that would be more noisy than helpful: we'll see them when parent and child exec or exit. Where do_nonlinear_fault() calls print_bad_pte(): omit !VM_CAN_NONLINEAR case, that could only be a bug in sys_remap_file_pages(), not a bad pte. VM_FAULT_OOM rather than VM_FAULT_SIGBUS? Well, okay, that is consistent with what happens if do_swap_page() operates a bad swap entry; but don't we have patches to be more careful about killing when VM_FAULT_OOM? Signed-off-by: Hugh Dickins Cc: Nick Piggin Cc: Christoph Lameter Cc: Mel Gorman Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index cbf7fbed3df..91dee50fe26 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -302,7 +302,7 @@ extern swp_entry_t get_swap_page_of_type(int); extern int swap_duplicate(swp_entry_t); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern void swap_free(swp_entry_t); -extern void free_swap_and_cache(swp_entry_t); +extern int free_swap_and_cache(swp_entry_t); extern int swap_type_of(dev_t, sector_t, struct block_device **); extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t); @@ -352,14 +352,8 @@ static inline void show_swap_cache_info(void) { } -static inline void free_swap_and_cache(swp_entry_t swp) -{ -} - -static inline int swap_duplicate(swp_entry_t swp) -{ - return 0; -} +#define free_swap_and_cache(swp) is_migration_entry(swp) +#define swap_duplicate(swp) is_migration_entry(swp) static inline void swap_free(swp_entry_t swp) { -- cgit