From df981d03eeff7971ac7e6ff37000bfa702327ef1 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 17 Aug 2012 09:48:17 -0400 Subject: ext4: add max_dir_size_kb mount option Very large directories can cause significant performance problems, or perhaps even invoke the OOM killer, if the process is running in a highly constrained memory environment (whether it is VM's with a small amount of memory or in a small memory cgroup). So it is useful, in cloud server/data center environments, to be able to set a filesystem-wide cap on the maximum size of a directory, to ensure that directories never get larger than a sane size. We do this via a new mount option, max_dir_size_kb. If there is an attempt to grow the directory larger than max_dir_size_kb, the system call will return ENOSPC instead. Google-Bug-Id: 6863013 Signed-off-by: "Theodore Ts'o" --- Documentation/filesystems/ext4.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 1b7f9acbcbbe..104322bf378c 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -375,6 +375,16 @@ dioread_nolock locking. If the dioread_nolock option is specified Because of the restrictions this options comprises it is off by default (e.g. dioread_lock). +max_dir_size_kb=n This limits the size of directories so that any + attempt to expand them beyond the specified + limit in kilobytes will cause an ENOSPC error. + This is useful in memory constrained + environments, where a very large directory can + cause severe performance problems or even + provoke the Out Of Memory killer. (For example, + if there is only 512mb memory available, a 176mb + directory may seriously cramp the system's style.) + i_version Enable 64-bit inode version support. This option is off by default. -- cgit v1.2.3 From d65226e2bfe9dd96f193d61892e20fcda9524d22 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 21 Aug 2012 16:15:46 -0700 Subject: Documentation: update mount option in filesystem/vfat.txt Update two mount options(discard, nfs) in vfat.txt. Signed-off-by: Namjae Jeon Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/vfat.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index ead764b2728f..de1e6c4dccff 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt @@ -137,6 +137,17 @@ errors=panic|continue|remount-ro without doing anything or remount the partition in read-only mode (default behavior). +discard -- If set, issues discard/TRIM commands to the block + device when blocks are freed. This is useful for SSD devices + and sparse/thinly-provisoned LUNs. + +nfs -- This option maintains an index (cache) of directory + inodes by i_logstart which is used by the nfs-related code to + improve look-ups. + + Enable this only if you want to export the FAT filesystem + over NFS + : 0,1,yes,no,true,false TODO -- cgit v1.2.3 From 82aceae4f0d42f03d9ad7d1e90389e731153898f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 27 Aug 2012 13:32:15 -0700 Subject: debugfs: more tightly restrict default mount mode Since the debugfs is mostly only used by root, make the default mount mode 0700. Most system owners do not need a more permissive value, but they can choose to weaken the restrictions via their fstab. Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/debugfs.txt | 4 ++-- fs/debugfs/inode.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt index 7a34f827989c..3a863f692728 100644 --- a/Documentation/filesystems/debugfs.txt +++ b/Documentation/filesystems/debugfs.txt @@ -15,8 +15,8 @@ Debugfs is typically mounted with a command like: mount -t debugfs none /sys/kernel/debug (Or an equivalent /etc/fstab line). -The debugfs root directory is accessible by anyone by default. To -restrict access to the tree the "uid", "gid" and "mode" mount +The debugfs root directory is accessible only to the root user by +default. To change access to the tree the "uid", "gid" and "mode" mount options can be used. Note that the debugfs API is exported GPL-only to modules. diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 2c9fafbe8425..6393fd61d5c4 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -28,7 +28,7 @@ #include #include -#define DEBUGFS_DEFAULT_MODE 0755 +#define DEBUGFS_DEFAULT_MODE 0700 static struct vfsmount *debugfs_mount; static int debugfs_mount_count; -- cgit v1.2.3 From b40c2e665cd552eae5fbdbb878bc29a34357668e Mon Sep 17 00:00:00 2001 From: Tino Reichardt Date: Mon, 17 Sep 2012 11:58:19 -0500 Subject: fs/jfs: TRIM support for JFS Filesystem This patch adds support for the two linux interfaces of the discard/TRIM command for SSD devices and sparse/thinly-provisioned LUNs. JFS will support batched discard via FITRIM ioctl and online discard with the discard mount option. Signed-off-by: Tino Reichardt Signed-off-by: Dave Kleikamp --- Documentation/filesystems/jfs.txt | 19 +++++- fs/jfs/Makefile | 2 +- fs/jfs/ioctl.c | 43 ++++++++++++- fs/jfs/jfs_discard.c | 117 +++++++++++++++++++++++++++++++++++ fs/jfs/jfs_discard.h | 26 ++++++++ fs/jfs/jfs_dmap.c | 125 ++++++++++++++++++++++++++++++++++++-- fs/jfs/jfs_dmap.h | 2 + fs/jfs/jfs_filsys.h | 3 + fs/jfs/jfs_incore.h | 1 + fs/jfs/super.c | 71 +++++++++++++++++----- 10 files changed, 385 insertions(+), 24 deletions(-) create mode 100644 fs/jfs/jfs_discard.c create mode 100644 fs/jfs/jfs_discard.h (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/jfs.txt b/Documentation/filesystems/jfs.txt index 26ebde77e821..2f94f9ca1794 100644 --- a/Documentation/filesystems/jfs.txt +++ b/Documentation/filesystems/jfs.txt @@ -3,6 +3,7 @@ IBM's Journaled File System (JFS) for Linux JFS Homepage: http://jfs.sourceforge.net/ The following mount options are supported: +(*) == default iocharset=name Character set to use for converting from Unicode to ASCII. The default is to do no conversion. Use @@ -21,12 +22,12 @@ nointegrity Do not write to the journal. The primary use of this option from backup media. The integrity of the volume is not guaranteed if the system abnormally abends. -integrity Default. Commit metadata changes to the journal. Use this - option to remount a volume where the nointegrity option was +integrity(*) Commit metadata changes to the journal. Use this option to + remount a volume where the nointegrity option was previously specified in order to restore normal behavior. errors=continue Keep going on a filesystem error. -errors=remount-ro Default. Remount the filesystem read-only on an error. +errors=remount-ro(*) Remount the filesystem read-only on an error. errors=panic Panic and halt the machine if an error occurs. uid=value Override on-disk uid with specified value @@ -35,6 +36,18 @@ umask=value Override on-disk umask with specified octal value. For directories, the execute bit will be set if the corresponding read bit is set. +discard=minlen This enables/disables the use of discard/TRIM commands. +discard The discard/TRIM commands are sent to the underlying +nodiscard(*) block device when blocks are freed. This is useful for SSD + devices and sparse/thinly-provisioned LUNs. The FITRIM ioctl + command is also available together with the nodiscard option. + The value of minlen specifies the minimum blockcount, when + a TRIM command to the block device is considered usefull. + When no value is given to the discard option, it defaults to + 64 blocks, which means 256KiB in JFS. + The minlen value of discard overrides the minlen value given + on an FITRIM ioctl(). + Please send bugs, comments, cards and letters to shaggy@linux.vnet.ibm.com. The JFS mailing list can be subscribed to by using the link labeled diff --git a/fs/jfs/Makefile b/fs/jfs/Makefile index a58fa72d7e59..d20d4737b3ef 100644 --- a/fs/jfs/Makefile +++ b/fs/jfs/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_JFS_FS) += jfs.o jfs-y := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \ jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \ - jfs_unicode.o jfs_dtree.o jfs_inode.o \ + jfs_unicode.o jfs_dtree.o jfs_inode.o jfs_discard.o \ jfs_extent.o symlink.o jfs_metapage.o \ jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o \ resize.o xattr.o ioctl.o diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index f19d1e04a374..bc555ff417e9 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -11,13 +11,17 @@ #include #include #include +#include #include #include +#include "jfs_filsys.h" +#include "jfs_debug.h" #include "jfs_incore.h" #include "jfs_dinode.h" #include "jfs_inode.h" - +#include "jfs_dmap.h" +#include "jfs_discard.h" static struct { long jfs_flag; @@ -123,6 +127,40 @@ setflags_out: mnt_drop_write_file(filp); return err; } + + case FITRIM: + { + struct super_block *sb = inode->i_sb; + struct request_queue *q = bdev_get_queue(sb->s_bdev); + struct fstrim_range range; + s64 ret = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!blk_queue_discard(q)) { + jfs_warn("FITRIM not supported on device"); + return -EOPNOTSUPP; + } + + if (copy_from_user(&range, (struct fstrim_range __user *)arg, + sizeof(range))) + return -EFAULT; + + range.minlen = max_t(unsigned int, range.minlen, + q->limits.discard_granularity); + + ret = jfs_ioc_trim(inode, &range); + if (ret < 0) + return ret; + + if (copy_to_user((struct fstrim_range __user *)arg, &range, + sizeof(range))) + return -EFAULT; + + return 0; + } + default: return -ENOTTY; } @@ -142,6 +180,9 @@ long jfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case JFS_IOC_SETFLAGS32: cmd = JFS_IOC_SETFLAGS; break; + case FITRIM: + cmd = FITRIM; + break; } return jfs_ioctl(filp, cmd, arg); } diff --git a/fs/jfs/jfs_discard.c b/fs/jfs/jfs_discard.c new file mode 100644 index 000000000000..9947563e4175 --- /dev/null +++ b/fs/jfs/jfs_discard.c @@ -0,0 +1,117 @@ +/* + * Copyright (C) Tino Reichardt, 2012 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include + +#include "jfs_incore.h" +#include "jfs_superblock.h" +#include "jfs_discard.h" +#include "jfs_dmap.h" +#include "jfs_debug.h" + + +/* + * NAME: jfs_issue_discard() + * + * FUNCTION: TRIM the specified block range on device, if supported + * + * PARAMETERS: + * ip - pointer to in-core inode + * blkno - starting block number to be trimmed (0..N) + * nblocks - number of blocks to be trimmed + * + * RETURN VALUES: + * none + * + * serialization: IREAD_LOCK(ipbmap) held on entry/exit; + */ +void jfs_issue_discard(struct inode *ip, u64 blkno, u64 nblocks) +{ + struct super_block *sb = ip->i_sb; + int r = 0; + + r = sb_issue_discard(sb, blkno, nblocks, GFP_NOFS, 0); + if (unlikely(r != 0)) { + jfs_err("JFS: sb_issue_discard" \ + "(%p, %llu, %llu, GFP_NOFS, 0) = %d => failed!\n", + sb, (unsigned long long)blkno, + (unsigned long long)nblocks, r); + } + + jfs_info("JFS: sb_issue_discard" \ + "(%p, %llu, %llu, GFP_NOFS, 0) = %d\n", + sb, (unsigned long long)blkno, + (unsigned long long)nblocks, r); + + return; +} + +/* + * NAME: jfs_ioc_trim() + * + * FUNCTION: attempt to discard (TRIM) all free blocks from the + * filesystem. + * + * PARAMETERS: + * ip - pointer to in-core inode; + * range - the range, given by user space + * + * RETURN VALUES: + * 0 - success + * -EIO - i/o error + */ +int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range) +{ + struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; + struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; + struct super_block *sb = ipbmap->i_sb; + int agno, agno_end; + s64 start, end, minlen; + u64 trimmed = 0; + + /** + * convert byte values to block size of filesystem: + * start: First Byte to trim + * len: number of Bytes to trim from start + * minlen: minimum extent length in Bytes + */ + start = range->start >> sb->s_blocksize_bits; + if (start < 0) + start = 0; + end = start + (range->len >> sb->s_blocksize_bits) - 1; + if (end >= bmp->db_mapsize) + end = bmp->db_mapsize - 1; + minlen = range->minlen >> sb->s_blocksize_bits; + if (minlen <= 0) + minlen = 1; + + /** + * we trim all ag's within the range + */ + agno = BLKTOAG(start, JFS_SBI(ip->i_sb)); + agno_end = BLKTOAG(end, JFS_SBI(ip->i_sb)); + while (agno <= agno_end) { + trimmed += dbDiscardAG(ip, agno, minlen); + agno++; + } + range->len = trimmed << sb->s_blocksize_bits; + + return 0; +} diff --git a/fs/jfs/jfs_discard.h b/fs/jfs/jfs_discard.h new file mode 100644 index 000000000000..40d1ee6081a0 --- /dev/null +++ b/fs/jfs/jfs_discard.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) Tino Reichardt, 2012 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _H_JFS_DISCARD +#define _H_JFS_DISCARD + +struct fstrim_range; + +extern void jfs_issue_discard(struct inode *ip, u64 blkno, u64 nblocks); +extern int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range); + +#endif /* _H_JFS_DISCARD */ diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 9cbd11a3f804..174feb6a73c1 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -1,5 +1,6 @@ /* * Copyright (C) International Business Machines Corp., 2000-2004 + * Portions Copyright (C) Tino Reichardt, 2012 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,6 +26,7 @@ #include "jfs_lock.h" #include "jfs_metapage.h" #include "jfs_debug.h" +#include "jfs_discard.h" /* * SERIALIZATION of the Block Allocation Map. @@ -104,7 +106,6 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks); static int dbMaxBud(u8 * cp); -s64 dbMapFileSizeToMapSize(struct inode *ipbmap); static int blkstol2(s64 nb); static int cntlz(u32 value); @@ -145,7 +146,6 @@ static const s8 budtab[256] = { 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1 }; - /* * NAME: dbMount() * @@ -310,7 +310,6 @@ int dbSync(struct inode *ipbmap) return (0); } - /* * NAME: dbFree() * @@ -337,6 +336,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) s64 lblkno, rem; struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; + struct super_block *sb = ipbmap->i_sb; IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); @@ -351,6 +351,13 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) return -EIO; } + /** + * TRIM the blocks, when mounted with discard option + */ + if (JFS_SBI(sb)->flag & JFS_DISCARD) + if (JFS_SBI(sb)->minblks_trim <= nblocks) + jfs_issue_discard(ipbmap, blkno, nblocks); + /* * free the blocks a dmap at a time. */ @@ -1095,7 +1102,6 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) /* we were not successful */ release_metapage(mp); - return (rc); } @@ -1589,6 +1595,117 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) } +/* + * NAME: dbDiscardAG() + * + * FUNCTION: attempt to discard (TRIM) all free blocks of specific AG + * + * algorithm: + * 1) allocate blocks, as large as possible and save them + * while holding IWRITE_LOCK on ipbmap + * 2) trim all these saved block/length values + * 3) mark the blocks free again + * + * benefit: + * - we work only on one ag at some time, minimizing how long we + * need to lock ipbmap + * - reading / writing the fs is possible most time, even on + * trimming + * + * downside: + * - we write two times to the dmapctl and dmap pages + * - but for me, this seems the best way, better ideas? + * /TR 2012 + * + * PARAMETERS: + * ip - pointer to in-core inode + * agno - ag to trim + * minlen - minimum value of contiguous blocks + * + * RETURN VALUES: + * s64 - actual number of blocks trimmed + */ +s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) +{ + struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; + struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; + s64 nblocks, blkno; + u64 trimmed = 0; + int rc, l2nb; + struct super_block *sb = ipbmap->i_sb; + + struct range2trim { + u64 blkno; + u64 nblocks; + } *totrim, *tt; + + /* max blkno / nblocks pairs to trim */ + int count = 0, range_cnt; + + /* prevent others from writing new stuff here, while trimming */ + IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); + + nblocks = bmp->db_agfree[agno]; + range_cnt = nblocks; + do_div(range_cnt, (int)minlen); + range_cnt = min(range_cnt + 1, 32 * 1024); + totrim = kmalloc(sizeof(struct range2trim) * range_cnt, GFP_NOFS); + if (totrim == NULL) { + jfs_error(bmp->db_ipbmap->i_sb, + "dbDiscardAG: no memory for trim array"); + IWRITE_UNLOCK(ipbmap); + return 0; + } + + tt = totrim; + while (nblocks >= minlen) { + l2nb = BLKSTOL2(nblocks); + + /* 0 = okay, -EIO = fatal, -ENOSPC -> try smaller block */ + rc = dbAllocAG(bmp, agno, nblocks, l2nb, &blkno); + if (rc == 0) { + tt->blkno = blkno; + tt->nblocks = nblocks; + tt++; count++; + + /* the whole ag is free, trim now */ + if (bmp->db_agfree[agno] == 0) + break; + + /* give a hint for the next while */ + nblocks = bmp->db_agfree[agno]; + continue; + } else if (rc == -ENOSPC) { + /* search for next smaller log2 block */ + l2nb = BLKSTOL2(nblocks) - 1; + nblocks = 1 << l2nb; + } else { + /* Trim any already allocated blocks */ + jfs_error(bmp->db_ipbmap->i_sb, + "dbDiscardAG: -EIO"); + break; + } + + /* check, if our trim array is full */ + if (unlikely(count >= range_cnt - 1)) + break; + } + IWRITE_UNLOCK(ipbmap); + + tt->nblocks = 0; /* mark the current end */ + for (tt = totrim; tt->nblocks != 0; tt++) { + /* when mounted with online discard, dbFree() will + * call jfs_issue_discard() itself */ + if (!(JFS_SBI(sb)->flag & JFS_DISCARD)) + jfs_issue_discard(ip, tt->blkno, tt->nblocks); + dbFree(ip, tt->blkno, tt->nblocks); + trimmed += tt->nblocks; + } + kfree(totrim); + + return trimmed; +} + /* * NAME: dbFindCtl() * diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h index 6dcb906c55d8..562b9a7e4311 100644 --- a/fs/jfs/jfs_dmap.h +++ b/fs/jfs/jfs_dmap.h @@ -311,4 +311,6 @@ extern int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks); extern int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks); extern void dbFinalizeBmap(struct inode *ipbmap); extern s64 dbMapFileSizeToMapSize(struct inode *ipbmap); +extern s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen); + #endif /* _H_JFS_DMAP */ diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h index b3f5463fbe52..b67d64671bb4 100644 --- a/fs/jfs/jfs_filsys.h +++ b/fs/jfs/jfs_filsys.h @@ -45,6 +45,9 @@ /* mount time flag to disable journaling to disk */ #define JFS_NOINTEGRITY 0x00000040 +/* mount time flag to enable TRIM to ssd disks */ +#define JFS_DISCARD 0x00000080 + /* commit option */ #define JFS_COMMIT 0x00000f00 /* commit option mask */ #define JFS_GROUPCOMMIT 0x00000100 /* group (of 1) commit */ diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 584a4a1a6e81..4fa958ae1986 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -195,6 +195,7 @@ struct jfs_sb_info { uint uid; /* uid to override on-disk uid */ uint gid; /* gid to override on-disk gid */ uint umask; /* umask to override on-disk umask */ + uint minblks_trim; /* minimum blocks, for online trim */ }; /* jfs_sb_info commit_state */ diff --git a/fs/jfs/super.c b/fs/jfs/super.c index c55c7452d285..6f4ac1c070f0 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_filsys.h" @@ -100,7 +101,7 @@ void jfs_error(struct super_block *sb, const char * function, ...) vsnprintf(error_buf, sizeof(error_buf), function, args); va_end(args); - printk(KERN_ERR "ERROR: (device %s): %s\n", sb->s_id, error_buf); + pr_err("ERROR: (device %s): %s\n", sb->s_id, error_buf); jfs_handle_error(sb); } @@ -197,7 +198,8 @@ static void jfs_put_super(struct super_block *sb) enum { Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota, - Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask + Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask, + Opt_discard, Opt_nodiscard, Opt_discard_minblk }; static const match_table_t tokens = { @@ -214,6 +216,9 @@ static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_umask, "umask=%u"}, + {Opt_discard, "discard"}, + {Opt_nodiscard, "nodiscard"}, + {Opt_discard_minblk, "discard=%u"}, {Opt_err, NULL} }; @@ -255,8 +260,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, else { nls_map = load_nls(args[0].from); if (!nls_map) { - printk(KERN_ERR - "JFS: charset not found\n"); + pr_err("JFS: charset not found\n"); goto cleanup; } } @@ -272,8 +276,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, *newLVSize = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; if (*newLVSize == 0) - printk(KERN_ERR - "JFS: Cannot determine volume size\n"); + pr_err("JFS: Cannot determine volume size\n"); break; } case Opt_errors: @@ -294,8 +297,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, *flag &= ~JFS_ERR_REMOUNT_RO; *flag |= JFS_ERR_PANIC; } else { - printk(KERN_ERR - "JFS: %s is an invalid error handler\n", + pr_err("JFS: %s is an invalid error handler\n", errors); goto cleanup; } @@ -314,8 +316,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, case Opt_usrquota: case Opt_grpquota: case Opt_quota: - printk(KERN_ERR - "JFS: quota operations not supported\n"); + pr_err("JFS: quota operations not supported\n"); break; #endif case Opt_uid: @@ -324,23 +325,61 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, sbi->uid = simple_strtoul(uid, &uid, 0); break; } + case Opt_gid: { char *gid = args[0].from; sbi->gid = simple_strtoul(gid, &gid, 0); break; } + case Opt_umask: { char *umask = args[0].from; sbi->umask = simple_strtoul(umask, &umask, 8); if (sbi->umask & ~0777) { - printk(KERN_ERR - "JFS: Invalid value of umask\n"); + pr_err("JFS: Invalid value of umask\n"); goto cleanup; } break; } + + case Opt_discard: + { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + /* if set to 1, even copying files will cause + * trimming :O + * -> user has more control over the online trimming + */ + sbi->minblks_trim = 64; + if (blk_queue_discard(q)) { + *flag |= JFS_DISCARD; + } else { + pr_err("JFS: discard option " \ + "not supported on device\n"); + } + break; + } + + case Opt_nodiscard: + *flag &= ~JFS_DISCARD; + break; + + case Opt_discard_minblk: + { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + char *minblks_trim = args[0].from; + if (blk_queue_discard(q)) { + *flag |= JFS_DISCARD; + sbi->minblks_trim = simple_strtoull( + minblks_trim, &minblks_trim, 0); + } else { + pr_err("JFS: discard option " \ + "not supported on device\n"); + } + break; + } + default: printk("jfs: Unrecognized mount option \"%s\" " " or missing value\n", p); @@ -374,8 +413,8 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) if (newLVSize) { if (sb->s_flags & MS_RDONLY) { - printk(KERN_ERR - "JFS: resize requires volume to be mounted read-write\n"); + pr_err("JFS: resize requires volume" \ + " to be mounted read-write\n"); return -EROFS; } rc = jfs_extendfs(sb, newLVSize, 0); @@ -457,7 +496,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) #endif if (newLVSize) { - printk(KERN_ERR "resize option for remount only\n"); + pr_err("resize option for remount only\n"); goto out_kfree; } @@ -625,6 +664,8 @@ static int jfs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",umask=%03o", sbi->umask); if (sbi->flag & JFS_NOINTEGRITY) seq_puts(seq, ",nointegrity"); + if (sbi->flag & JFS_DISCARD) + seq_printf(seq, ",discard=%u", sbi->minblks_trim); if (sbi->nls_tab) seq_printf(seq, ",iocharset=%s", sbi->nls_tab->charset); if (sbi->flag & JFS_ERR_CONTINUE) -- cgit v1.2.3 From 16221d9071d48841b23256a2ad643a845d5aaf80 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 14 Sep 2012 15:48:35 -0500 Subject: jfs: Remove obsolete email address The MAINTAINERS file suffices. Signed-off-by: Dave Kleikamp --- Documentation/filesystems/jfs.txt | 2 -- 1 file changed, 2 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/jfs.txt b/Documentation/filesystems/jfs.txt index 2f94f9ca1794..f7433355394a 100644 --- a/Documentation/filesystems/jfs.txt +++ b/Documentation/filesystems/jfs.txt @@ -48,7 +48,5 @@ nodiscard(*) block device when blocks are freed. This is useful for SSD The minlen value of discard overrides the minlen value given on an FITRIM ioctl(). -Please send bugs, comments, cards and letters to shaggy@linux.vnet.ibm.com. - The JFS mailing list can be subscribed to by using the link labeled "Mail list Subscribe" at our web page http://jfs.sourceforge.net/ -- cgit v1.2.3 From 5e953778a2aab04929a5e7b69f53dc26e39b079e Mon Sep 17 00:00:00 2001 From: Christoph Fritz Date: Fri, 21 Sep 2012 08:31:19 +0000 Subject: ipconfig: add nameserver IPs to kernel-parameter ip= On small systems (e.g. embedded ones) IP addresses are often configured by bootloaders and get assigned to kernel via parameter "ip=". If set to "ip=dhcp", even nameserver entries from DHCP daemons are handled. These entries exported in /proc/net/pnp are commonly linked by /etc/resolv.conf. To configure nameservers for networks without DHCP, this patch adds option and to kernel-parameter 'ip='. Signed-off-by: Christoph Fritz Tested-by: Jan Weitzel Signed-off-by: David S. Miller --- Documentation/filesystems/nfs/nfsroot.txt | 10 +++++++- net/ipv4/ipconfig.c | 39 ++++++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 4 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt index ffdd9d866ad7..2d66ed688125 100644 --- a/Documentation/filesystems/nfs/nfsroot.txt +++ b/Documentation/filesystems/nfs/nfsroot.txt @@ -78,7 +78,8 @@ nfsroot=[:][,] flags = hard, nointr, noposix, cto, ac -ip=:::::: +ip=::::::: + : This parameter tells the kernel how to configure IP addresses of devices and also how to set up the IP routing table. It was originally called @@ -158,6 +159,13 @@ ip=:::::: Default: any + IP address of first nameserver. + Value gets exported by /proc/net/pnp which is often linked + on embedded systems by /etc/resolv.conf. + + IP address of secound nameserver. + Same as above. + nfsrootdebug diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 67e8a6b086ea..1c0e7e051044 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -743,14 +743,22 @@ static void __init ic_bootp_init_ext(u8 *e) /* - * Initialize the DHCP/BOOTP mechanism. + * Predefine Nameservers */ -static inline void __init ic_bootp_init(void) +static inline void __init ic_nameservers_predef(void) { int i; for (i = 0; i < CONF_NAMESERVERS_MAX; i++) ic_nameservers[i] = NONE; +} + +/* + * Initialize the DHCP/BOOTP mechanism. + */ +static inline void __init ic_bootp_init(void) +{ + ic_nameservers_predef(); dev_add_pack(&bootp_packet_type); } @@ -1379,6 +1387,7 @@ static int __init ip_auto_config(void) int retries = CONF_OPEN_RETRIES; #endif int err; + unsigned int i; #ifdef CONFIG_PROC_FS proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); @@ -1499,7 +1508,15 @@ static int __init ip_auto_config(void) &ic_servaddr, &root_server_addr, root_server_path); if (ic_dev_mtu) pr_cont(", mtu=%d", ic_dev_mtu); - pr_cont("\n"); + for (i = 0; i < CONF_NAMESERVERS_MAX; i++) + if (ic_nameservers[i] != NONE) { + pr_info(" nameserver%u=%pI4", + i, &ic_nameservers[i]); + break; + } + for (i++; i < CONF_NAMESERVERS_MAX; i++) + if (ic_nameservers[i] != NONE) + pr_cont(", nameserver%u=%pI4\n", i, &ic_nameservers[i]); #endif /* !SILENT */ return 0; @@ -1570,6 +1587,8 @@ static int __init ip_auto_config_setup(char *addrs) return 1; } + ic_nameservers_predef(); + /* Parse string for static IP assignment. */ ip = addrs; while (ip && *ip) { @@ -1613,6 +1632,20 @@ static int __init ip_auto_config_setup(char *addrs) ic_enable = 0; } break; + case 7: + if (CONF_NAMESERVERS_MAX >= 1) { + ic_nameservers[0] = in_aton(ip); + if (ic_nameservers[0] == ANY) + ic_nameservers[0] = NONE; + } + break; + case 8: + if (CONF_NAMESERVERS_MAX >= 2) { + ic_nameservers[1] = in_aton(ip); + if (ic_nameservers[1] == ANY) + ic_nameservers[1] = NONE; + } + break; } } ip = cp; -- cgit v1.2.3 From 01dc52ebdf472f77cca623ca693ca24cfc0f1bbe Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 Oct 2012 16:29:30 -0700 Subject: oom: remove deprecated oom_adj The deprecated /proc//oom_adj is scheduled for removal this month. Signed-off-by: Davidlohr Bueso Acked-by: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/obsolete/proc-pid-oom_adj | 22 ------ Documentation/filesystems/proc.txt | 22 +----- fs/proc/base.c | 117 +--------------------------- include/linux/oom.h | 11 --- include/linux/sched.h | 1 - kernel/fork.c | 1 - mm/oom_kill.c | 4 +- 7 files changed, 7 insertions(+), 171 deletions(-) delete mode 100644 Documentation/ABI/obsolete/proc-pid-oom_adj (limited to 'Documentation/filesystems') diff --git a/Documentation/ABI/obsolete/proc-pid-oom_adj b/Documentation/ABI/obsolete/proc-pid-oom_adj deleted file mode 100644 index 9a3cb88ade47..000000000000 --- a/Documentation/ABI/obsolete/proc-pid-oom_adj +++ /dev/null @@ -1,22 +0,0 @@ -What: /proc//oom_adj -When: August 2012 -Why: /proc//oom_adj allows userspace to influence the oom killer's - badness heuristic used to determine which task to kill when the kernel - is out of memory. - - The badness heuristic has since been rewritten since the introduction of - this tunable such that its meaning is deprecated. The value was - implemented as a bitshift on a score generated by the badness() - function that did not have any precise units of measure. With the - rewrite, the score is given as a proportion of available memory to the - task allocating pages, so using a bitshift which grows the score - exponentially is, thus, impossible to tune with fine granularity. - - A much more powerful interface, /proc//oom_score_adj, was - introduced with the oom killer rewrite that allows users to increase or - decrease the badness score linearly. This interface will replace - /proc//oom_adj. - - A warning will be emitted to the kernel log if an application uses this - deprecated interface. After it is printed once, future warnings will be - suppressed until the kernel is rebooted. diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index fb0a6aeb936c..a1793d670cd0 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -33,7 +33,7 @@ Table of Contents 2 Modifying System Parameters 3 Per-Process Parameters - 3.1 /proc//oom_adj & /proc//oom_score_adj - Adjust the oom-killer + 3.1 /proc//oom_score_adj - Adjust the oom-killer score 3.2 /proc//oom_score - Display current oom-killer score 3.3 /proc//io - Display the IO accounting fields @@ -1320,10 +1320,10 @@ of the kernel. CHAPTER 3: PER-PROCESS PARAMETERS ------------------------------------------------------------------------------ -3.1 /proc//oom_adj & /proc//oom_score_adj- Adjust the oom-killer score +3.1 /proc//oom_score_adj- Adjust the oom-killer score -------------------------------------------------------------------------------- -These file can be used to adjust the badness heuristic used to select which +This file can be used to adjust the badness heuristic used to select which process gets killed in out of memory conditions. The badness heuristic assigns a value to each candidate task ranging from 0 @@ -1361,22 +1361,10 @@ same system, cpuset, mempolicy, or memory controller resources to use at least equivalent to discounting 50% of the task's allowed memory from being considered as scoring against the task. -For backwards compatibility with previous kernels, /proc//oom_adj may also -be used to tune the badness score. Its acceptable values range from -16 -(OOM_ADJUST_MIN) to +15 (OOM_ADJUST_MAX) and a special value of -17 -(OOM_DISABLE) to disable oom killing entirely for that task. Its value is -scaled linearly with /proc//oom_score_adj. - -Writing to /proc//oom_score_adj or /proc//oom_adj will change the -other with its scaled value. - The value of /proc//oom_score_adj may be reduced no lower than the last value set by a CAP_SYS_RESOURCE process. To reduce the value any lower requires CAP_SYS_RESOURCE. -NOTICE: /proc//oom_adj is deprecated and will be removed, please see -Documentation/feature-removal-schedule.txt. - Caveat: when a parent task is selected, the oom killer will sacrifice any first generation children with separate address spaces instead, if possible. This avoids servers and important system daemons from being killed and loses the @@ -1387,9 +1375,7 @@ minimal amount of work. ------------------------------------------------------------- This file can be used to check the current score used by the oom-killer is for -any given . Use it together with /proc//oom_adj to tune which -process should be killed in an out-of-memory situation. - +any given . 3.3 /proc//io - Display the IO accounting fields ------------------------------------------------------- diff --git a/fs/proc/base.c b/fs/proc/base.c index d295af993677..ef5c84be66f9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -873,111 +873,6 @@ static const struct file_operations proc_environ_operations = { .release = mem_release, }; -static ssize_t oom_adjust_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); - char buffer[PROC_NUMBUF]; - size_t len; - int oom_adjust = OOM_DISABLE; - unsigned long flags; - - if (!task) - return -ESRCH; - - if (lock_task_sighand(task, &flags)) { - oom_adjust = task->signal->oom_adj; - unlock_task_sighand(task, &flags); - } - - put_task_struct(task); - - len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); - - return simple_read_from_buffer(buf, count, ppos, buffer, len); -} - -static ssize_t oom_adjust_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct task_struct *task; - char buffer[PROC_NUMBUF]; - int oom_adjust; - unsigned long flags; - int err; - - memset(buffer, 0, sizeof(buffer)); - if (count > sizeof(buffer) - 1) - count = sizeof(buffer) - 1; - if (copy_from_user(buffer, buf, count)) { - err = -EFAULT; - goto out; - } - - err = kstrtoint(strstrip(buffer), 0, &oom_adjust); - if (err) - goto out; - if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && - oom_adjust != OOM_DISABLE) { - err = -EINVAL; - goto out; - } - - task = get_proc_task(file->f_path.dentry->d_inode); - if (!task) { - err = -ESRCH; - goto out; - } - - task_lock(task); - if (!task->mm) { - err = -EINVAL; - goto err_task_lock; - } - - if (!lock_task_sighand(task, &flags)) { - err = -ESRCH; - goto err_task_lock; - } - - if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) { - err = -EACCES; - goto err_sighand; - } - - /* - * Warn that /proc/pid/oom_adj is deprecated, see - * Documentation/feature-removal-schedule.txt. - */ - printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", - current->comm, task_pid_nr(current), task_pid_nr(task), - task_pid_nr(task)); - task->signal->oom_adj = oom_adjust; - /* - * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum - * value is always attainable. - */ - if (task->signal->oom_adj == OOM_ADJUST_MAX) - task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX; - else - task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / - -OOM_DISABLE; - trace_oom_score_adj_update(task); -err_sighand: - unlock_task_sighand(task, &flags); -err_task_lock: - task_unlock(task); - put_task_struct(task); -out: - return err < 0 ? err : count; -} - -static const struct file_operations proc_oom_adjust_operations = { - .read = oom_adjust_read, - .write = oom_adjust_write, - .llseek = generic_file_llseek, -}; - static ssize_t oom_score_adj_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -1051,15 +946,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) task->signal->oom_score_adj_min = oom_score_adj; trace_oom_score_adj_update(task); - /* - * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is - * always attainable. - */ - if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) - task->signal->oom_adj = OOM_DISABLE; - else - task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) / - OOM_SCORE_ADJ_MAX; + err_sighand: unlock_task_sighand(task, &flags); err_task_lock: @@ -2710,7 +2597,6 @@ static const struct pid_entry tgid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), @@ -3077,7 +2963,6 @@ static const struct pid_entry tid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), diff --git a/include/linux/oom.h b/include/linux/oom.h index 49a3031fda50..d36a8221f58b 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -1,17 +1,6 @@ #ifndef __INCLUDE_LINUX_OOM_H #define __INCLUDE_LINUX_OOM_H -/* - * /proc//oom_adj is deprecated, see - * Documentation/feature-removal-schedule.txt. - * - * /proc//oom_adj set to -17 protects from the oom-killer - */ -#define OOM_DISABLE (-17) -/* inclusive */ -#define OOM_ADJUST_MIN (-16) -#define OOM_ADJUST_MAX 15 - /* * /proc//oom_score_adj set to OOM_SCORE_ADJ_MIN disables oom killing for * pid. diff --git a/include/linux/sched.h b/include/linux/sched.h index 9c5612f0374b..c2070e92a9d6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -671,7 +671,6 @@ struct signal_struct { struct rw_semaphore group_rwsem; #endif - int oom_adj; /* OOM kill score adjustment (bit shift) */ int oom_score_adj; /* OOM kill score adjustment */ int oom_score_adj_min; /* OOM kill score adjustment minimum value. * Only settable by CAP_SYS_RESOURCE. */ diff --git a/kernel/fork.c b/kernel/fork.c index ec667f797af3..972762e01024 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1056,7 +1056,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) init_rwsem(&sig->group_rwsem); #endif - sig->oom_adj = current->signal->oom_adj; sig->oom_score_adj = current->signal->oom_score_adj; sig->oom_score_adj_min = current->signal->oom_score_adj_min; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 198600861638..79e0f3e24831 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -428,8 +428,8 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, { task_lock(current); pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " - "oom_adj=%d, oom_score_adj=%d\n", - current->comm, gfp_mask, order, current->signal->oom_adj, + "oom_score_adj=%d\n", + current->comm, gfp_mask, order, current->signal->oom_score_adj); cpuset_print_task_mems_allowed(current); task_unlock(current); -- cgit v1.2.3