diff options
Diffstat (limited to 'fs/xfs/libxfs')
26 files changed, 770 insertions, 189 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 8cf73fe4338e..9331f3516afa 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -333,6 +333,11 @@ xfs_agiblock_init( } for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); + if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) { + agi->agi_iblocks = cpu_to_be32(1); + if (xfs_sb_version_hasfinobt(&mp->m_sb)) + agi->agi_fblocks = cpu_to_be32(1); + } } typedef void (*aghdr_init_work_f)(struct xfs_mount *mp, struct xfs_buf *bp, diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 2e055c079f39..fd8e6418a0d3 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -428,7 +428,7 @@ xfs_attr_set( */ if (XFS_IFORK_Q(dp) == 0) { int sf_size = sizeof(struct xfs_attr_sf_hdr) + - XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, + xfs_attr_sf_entsize_byname(args->namelen, args->valuelen); error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); @@ -523,6 +523,14 @@ out_trans_cancel: * External routines when attribute list is inside the inode *========================================================================*/ +static inline int xfs_attr_sf_totsize(struct xfs_inode *dp) +{ + struct xfs_attr_shortform *sf; + + sf = (struct xfs_attr_shortform *)dp->i_afp->if_u1.if_data; + return be16_to_cpu(sf->hdr.totsize); +} + /* * Add a name to the shortform attribute list structure * This is the external routine. @@ -555,8 +563,8 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX) return -ENOSPC; - newsize = XFS_ATTR_SF_TOTSIZE(args->dp); - newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen); + newsize = xfs_attr_sf_totsize(args->dp); + newsize += xfs_attr_sf_entsize_byname(args->namelen, args->valuelen); forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize); if (!forkoff) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 305d4bc07337..bb128db220ac 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -684,9 +684,9 @@ xfs_attr_sf_findname( sf = (struct xfs_attr_shortform *)args->dp->i_afp->if_u1.if_data; sfe = &sf->list[0]; end = sf->hdr.count; - for (i = 0; i < end; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), + for (i = 0; i < end; sfe = xfs_attr_sf_nextentry(sfe), base += size, i++) { - size = XFS_ATTR_SF_ENTSIZE(sfe); + size = xfs_attr_sf_entsize(sfe); if (!xfs_attr_match(args, sfe->namelen, sfe->nameval, sfe->flags)) continue; @@ -728,15 +728,15 @@ xfs_attr_shortform_add( ifp = dp->i_afp; ASSERT(ifp->if_flags & XFS_IFINLINE); - sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; + sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data; if (xfs_attr_sf_findname(args, &sfe, NULL) == -EEXIST) ASSERT(0); offset = (char *)sfe - (char *)sf; - size = XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen); + size = xfs_attr_sf_entsize_byname(args->namelen, args->valuelen); xfs_idata_realloc(dp, size, XFS_ATTR_FORK); - sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; - sfe = (xfs_attr_sf_entry_t *)((char *)sf + offset); + sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data; + sfe = (struct xfs_attr_sf_entry *)((char *)sf + offset); sfe->namelen = args->namelen; sfe->valuelen = args->valuelen; @@ -787,12 +787,12 @@ xfs_attr_shortform_remove( dp = args->dp; mp = dp->i_mount; - sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data; + sf = (struct xfs_attr_shortform *)dp->i_afp->if_u1.if_data; error = xfs_attr_sf_findname(args, &sfe, &base); if (error != -EEXIST) return error; - size = XFS_ATTR_SF_ENTSIZE(sfe); + size = xfs_attr_sf_entsize(sfe); /* * Fix up the attribute fork data, covering the hole @@ -837,8 +837,8 @@ xfs_attr_shortform_remove( int xfs_attr_shortform_lookup(xfs_da_args_t *args) { - xfs_attr_shortform_t *sf; - xfs_attr_sf_entry_t *sfe; + struct xfs_attr_shortform *sf; + struct xfs_attr_sf_entry *sfe; int i; struct xfs_ifork *ifp; @@ -846,10 +846,10 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args) ifp = args->dp->i_afp; ASSERT(ifp->if_flags & XFS_IFINLINE); - sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; + sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data; sfe = &sf->list[0]; for (i = 0; i < sf->hdr.count; - sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { + sfe = xfs_attr_sf_nextentry(sfe), i++) { if (xfs_attr_match(args, sfe->namelen, sfe->nameval, sfe->flags)) return -EEXIST; @@ -873,10 +873,10 @@ xfs_attr_shortform_getvalue( int i; ASSERT(args->dp->i_afp->if_flags == XFS_IFINLINE); - sf = (xfs_attr_shortform_t *)args->dp->i_afp->if_u1.if_data; + sf = (struct xfs_attr_shortform *)args->dp->i_afp->if_u1.if_data; sfe = &sf->list[0]; for (i = 0; i < sf->hdr.count; - sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { + sfe = xfs_attr_sf_nextentry(sfe), i++) { if (xfs_attr_match(args, sfe->namelen, sfe->nameval, sfe->flags)) return xfs_attr_copy_value(args, @@ -908,12 +908,12 @@ xfs_attr_shortform_to_leaf( dp = args->dp; ifp = dp->i_afp; - sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; + sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data; size = be16_to_cpu(sf->hdr.totsize); tmpbuffer = kmem_alloc(size, 0); ASSERT(tmpbuffer != NULL); memcpy(tmpbuffer, ifp->if_u1.if_data, size); - sf = (xfs_attr_shortform_t *)tmpbuffer; + sf = (struct xfs_attr_shortform *)tmpbuffer; xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); xfs_bmap_local_to_extents_empty(args->trans, dp, XFS_ATTR_FORK); @@ -951,7 +951,7 @@ xfs_attr_shortform_to_leaf( ASSERT(error != -ENOSPC); if (error) goto out; - sfe = XFS_ATTR_SF_NEXTENTRY(sfe); + sfe = xfs_attr_sf_nextentry(sfe); } error = 0; *leaf_bp = bp; @@ -992,9 +992,8 @@ xfs_attr_shortform_allfit( return 0; if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX) return 0; - bytes += sizeof(struct xfs_attr_sf_entry) - 1 - + name_loc->namelen - + be16_to_cpu(name_loc->valuelen); + bytes += xfs_attr_sf_entsize_byname(name_loc->namelen, + be16_to_cpu(name_loc->valuelen)); } if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) && (dp->i_df.if_format != XFS_DINODE_FMT_BTREE) && @@ -1039,7 +1038,7 @@ xfs_attr_shortform_verify( * xfs_attr_sf_entry is defined with a 1-byte variable * array at the end, so we must subtract that off. */ - if (((char *)sfep + sizeof(*sfep) - 1) >= endp) + if (((char *)sfep + sizeof(*sfep)) >= endp) return __this_address; /* Don't allow names with known bad length. */ @@ -1051,7 +1050,7 @@ xfs_attr_shortform_verify( * within the data buffer. The next entry starts after the * name component, so nextentry is an acceptable test. */ - next_sfep = XFS_ATTR_SF_NEXTENTRY(sfep); + next_sfep = xfs_attr_sf_nextentry(sfep); if ((char *)next_sfep > endp) return __this_address; diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 3f80cede7406..48d8e9caf86f 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -96,8 +96,6 @@ xfs_attr3_rmt_verify( { struct xfs_attr3_rmt_hdr *rmt = ptr; - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return __this_address; if (!xfs_verify_magic(bp, rmt->rm_magic)) return __this_address; if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid)) diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h index bb004fb7944a..37578b369d9b 100644 --- a/fs/xfs/libxfs/xfs_attr_sf.h +++ b/fs/xfs/libxfs/xfs_attr_sf.h @@ -13,7 +13,6 @@ * to fit into the literal area of the inode. */ typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t; -typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t; /* * We generate this then sort it, attr_list() must return things in hash-order. @@ -27,16 +26,26 @@ typedef struct xfs_attr_sf_sort { unsigned char *name; /* name value, pointer into buffer */ } xfs_attr_sf_sort_t; -#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) /* space name/value uses */ \ - (((int)sizeof(xfs_attr_sf_entry_t)-1 + (nlen)+(vlen))) #define XFS_ATTR_SF_ENTSIZE_MAX /* max space for name&value */ \ ((1 << (NBBY*(int)sizeof(uint8_t))) - 1) -#define XFS_ATTR_SF_ENTSIZE(sfep) /* space an entry uses */ \ - ((int)sizeof(xfs_attr_sf_entry_t)-1 + (sfep)->namelen+(sfep)->valuelen) -#define XFS_ATTR_SF_NEXTENTRY(sfep) /* next entry in struct */ \ - ((xfs_attr_sf_entry_t *)((char *)(sfep) + XFS_ATTR_SF_ENTSIZE(sfep))) -#define XFS_ATTR_SF_TOTSIZE(dp) /* total space in use */ \ - (be16_to_cpu(((xfs_attr_shortform_t *) \ - ((dp)->i_afp->if_u1.if_data))->hdr.totsize)) + +/* space name/value uses */ +static inline int xfs_attr_sf_entsize_byname(uint8_t nlen, uint8_t vlen) +{ + return sizeof(struct xfs_attr_sf_entry) + nlen + vlen; +} + +/* space an entry uses */ +static inline int xfs_attr_sf_entsize(struct xfs_attr_sf_entry *sfep) +{ + return struct_size(sfep, nameval, sfep->namelen + sfep->valuelen); +} + +/* next entry in struct */ +static inline struct xfs_attr_sf_entry * +xfs_attr_sf_nextentry(struct xfs_attr_sf_entry *sfep) +{ + return (void *)sfep + xfs_attr_sf_entsize(sfep); +} #endif /* __XFS_ATTR_SF_H__ */ diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 1b0a01b06a05..d9a692484eae 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5046,20 +5046,25 @@ xfs_bmap_del_extent_real( flags = XFS_ILOG_CORE; if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { - xfs_fsblock_t bno; xfs_filblks_t len; xfs_extlen_t mod; - bno = div_u64_rem(del->br_startblock, mp->m_sb.sb_rextsize, - &mod); - ASSERT(mod == 0); len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize, &mod); ASSERT(mod == 0); - error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); - if (error) - goto done; + if (!(bflags & XFS_BMAPI_REMAP)) { + xfs_fsblock_t bno; + + bno = div_u64_rem(del->br_startblock, + mp->m_sb.sb_rextsize, &mod); + ASSERT(mod == 0); + + error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); + if (error) + goto done; + } + do_fx = 0; nblks = len * mp->m_sb.sb_rextsize; qfield = XFS_TRANS_DQ_RTBCOUNT; diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 059ac108b1b3..b876b44c0204 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -15,8 +15,8 @@ */ #define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */ #define XFS_ATTR_LEAF_MAGIC 0xfbee /* magic number: attribute leaf blks */ -#define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */ -#define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */ +#define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */ +#define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */ typedef struct xfs_da_blkinfo { __be32 forw; /* previous block in list */ @@ -35,8 +35,8 @@ typedef struct xfs_da_blkinfo { */ #define XFS_DA3_NODE_MAGIC 0x3ebe /* magic number: non-leaf blocks */ #define XFS_ATTR3_LEAF_MAGIC 0x3bee /* magic number: attribute leaf blks */ -#define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v2 dirlf single blks */ -#define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v2 dirlf multi blks */ +#define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v3 dirlf single blks */ +#define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v3 dirlf multi blks */ struct xfs_da3_blkinfo { /* @@ -61,7 +61,7 @@ struct xfs_da3_blkinfo { * Since we have duplicate keys, use a binary search but always follow * all match in the block, not just the first match found. */ -#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */ +#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */ typedef struct xfs_da_node_hdr { struct xfs_da_blkinfo info; /* block type, links, etc. */ @@ -579,7 +579,7 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp) /* * Entries are packed toward the top as tight as possible. */ -typedef struct xfs_attr_shortform { +struct xfs_attr_shortform { struct xfs_attr_sf_hdr { /* constant-structure header block */ __be16 totsize; /* total bytes in shortform list */ __u8 count; /* count of active entries */ @@ -589,9 +589,9 @@ typedef struct xfs_attr_shortform { uint8_t namelen; /* actual length of name (no NULL) */ uint8_t valuelen; /* actual length of value (no NULL) */ uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ - uint8_t nameval[1]; /* name & value bytes concatenated */ + uint8_t nameval[]; /* name & value bytes concatenated */ } list[1]; /* variable sized array */ -} xfs_attr_shortform_t; +}; typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */ __be16 base; /* base of free region */ @@ -746,14 +746,14 @@ xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx) */ static inline int xfs_attr_leaf_entsize_remote(int nlen) { - return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \ - XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); + return round_up(sizeof(struct xfs_attr_leaf_name_remote) - 1 + + nlen, XFS_ATTR_LEAF_NAME_ALIGN); } static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen) { - return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) + - XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); + return round_up(sizeof(struct xfs_attr_leaf_name_local) - 1 + + nlen + vlen, XFS_ATTR_LEAF_NAME_ALIGN); } static inline int xfs_attr_leaf_entsize_local_max(int bsize) diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index d8f586256add..eff4a127188e 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -16,6 +16,8 @@ #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_trace.h" +#include "xfs_icache.h" +#include "xfs_log.h" /* * Deferred Operations in XFS @@ -186,8 +188,9 @@ xfs_defer_create_intent( { const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type]; - dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work, - dfp->dfp_count, sort); + if (!dfp->dfp_intent) + dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work, + dfp->dfp_count, sort); } /* @@ -312,22 +315,6 @@ xfs_defer_trans_roll( } /* - * Reset an already used dfops after finish. - */ -static void -xfs_defer_reset( - struct xfs_trans *tp) -{ - ASSERT(list_empty(&tp->t_dfops)); - - /* - * Low mode state transfers across transaction rolls to mirror dfops - * lifetime. Clear it now that dfops is reset. - */ - tp->t_flags &= ~XFS_TRANS_LOWMODE; -} - -/* * Free up any items left in the list. */ static void @@ -360,6 +347,58 @@ xfs_defer_cancel_list( } /* + * Prevent a log intent item from pinning the tail of the log by logging a + * done item to release the intent item; and then log a new intent item. + * The caller should provide a fresh transaction and roll it after we're done. + */ +static int +xfs_defer_relog( + struct xfs_trans **tpp, + struct list_head *dfops) +{ + struct xlog *log = (*tpp)->t_mountp->m_log; + struct xfs_defer_pending *dfp; + xfs_lsn_t threshold_lsn = NULLCOMMITLSN; + + + ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES); + + list_for_each_entry(dfp, dfops, dfp_list) { + /* + * If the log intent item for this deferred op is not a part of + * the current log checkpoint, relog the intent item to keep + * the log tail moving forward. We're ok with this being racy + * because an incorrect decision means we'll be a little slower + * at pushing the tail. + */ + if (dfp->dfp_intent == NULL || + xfs_log_item_in_current_chkpt(dfp->dfp_intent)) + continue; + + /* + * Figure out where we need the tail to be in order to maintain + * the minimum required free space in the log. Only sample + * the log threshold once per call. + */ + if (threshold_lsn == NULLCOMMITLSN) { + threshold_lsn = xlog_grant_push_threshold(log, 0); + if (threshold_lsn == NULLCOMMITLSN) + break; + } + if (XFS_LSN_CMP(dfp->dfp_intent->li_lsn, threshold_lsn) >= 0) + continue; + + trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp); + XFS_STATS_INC((*tpp)->t_mountp, defer_relog); + dfp->dfp_intent = xfs_trans_item_relog(dfp->dfp_intent, *tpp); + } + + if ((*tpp)->t_flags & XFS_TRANS_DIRTY) + return xfs_defer_trans_roll(tpp); + return 0; +} + +/* * Log an intent-done item for the first pending intent, and finish the work * items. */ @@ -390,6 +429,7 @@ xfs_defer_finish_one( list_add(li, &dfp->dfp_work); dfp->dfp_count++; dfp->dfp_done = NULL; + dfp->dfp_intent = NULL; xfs_defer_create_intent(tp, dfp, false); } @@ -428,13 +468,27 @@ xfs_defer_finish_noroll( /* Until we run out of pending work to finish... */ while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) { + /* + * Deferred items that are created in the process of finishing + * other deferred work items should be queued at the head of + * the pending list, which puts them ahead of the deferred work + * that was created by the caller. This keeps the number of + * pending work items to a minimum, which decreases the amount + * of time that any one intent item can stick around in memory, + * pinning the log tail. + */ xfs_defer_create_intents(*tp); - list_splice_tail_init(&(*tp)->t_dfops, &dop_pending); + list_splice_init(&(*tp)->t_dfops, &dop_pending); error = xfs_defer_trans_roll(tp); if (error) goto out_shutdown; + /* Possibly relog intent items to keep the log moving. */ + error = xfs_defer_relog(tp, &dop_pending); + if (error) + goto out_shutdown; + dfp = list_first_entry(&dop_pending, struct xfs_defer_pending, dfp_list); error = xfs_defer_finish_one(*tp, dfp); @@ -475,7 +529,10 @@ xfs_defer_finish( return error; } } - xfs_defer_reset(*tp); + + /* Reset LOWMODE now that we've finished all the dfops. */ + ASSERT(list_empty(&(*tp)->t_dfops)); + (*tp)->t_flags &= ~XFS_TRANS_LOWMODE; return 0; } @@ -549,6 +606,139 @@ xfs_defer_move( * that behavior. */ dtp->t_flags |= (stp->t_flags & XFS_TRANS_LOWMODE); + stp->t_flags &= ~XFS_TRANS_LOWMODE; +} + +/* + * Prepare a chain of fresh deferred ops work items to be completed later. Log + * recovery requires the ability to put off until later the actual finishing + * work so that it can process unfinished items recovered from the log in + * correct order. + * + * Create and log intent items for all the work that we're capturing so that we + * can be assured that the items will get replayed if the system goes down + * before log recovery gets a chance to finish the work it put off. The entire + * deferred ops state is transferred to the capture structure and the + * transaction is then ready for the caller to commit it. If there are no + * intent items to capture, this function returns NULL. + * + * If capture_ip is not NULL, the capture structure will obtain an extra + * reference to the inode. + */ +static struct xfs_defer_capture * +xfs_defer_ops_capture( + struct xfs_trans *tp, + struct xfs_inode *capture_ip) +{ + struct xfs_defer_capture *dfc; + + if (list_empty(&tp->t_dfops)) + return NULL; + + /* Create an object to capture the defer ops. */ + dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS); + INIT_LIST_HEAD(&dfc->dfc_list); + INIT_LIST_HEAD(&dfc->dfc_dfops); + + xfs_defer_create_intents(tp); + + /* Move the dfops chain and transaction state to the capture struct. */ + list_splice_init(&tp->t_dfops, &dfc->dfc_dfops); + dfc->dfc_tpflags = tp->t_flags & XFS_TRANS_LOWMODE; + tp->t_flags &= ~XFS_TRANS_LOWMODE; + + /* Capture the remaining block reservations along with the dfops. */ + dfc->dfc_blkres = tp->t_blk_res - tp->t_blk_res_used; + dfc->dfc_rtxres = tp->t_rtx_res - tp->t_rtx_res_used; + + /* Preserve the log reservation size. */ + dfc->dfc_logres = tp->t_log_res; + + /* + * Grab an extra reference to this inode and attach it to the capture + * structure. + */ + if (capture_ip) { + ihold(VFS_I(capture_ip)); + dfc->dfc_capture_ip = capture_ip; + } + + return dfc; +} + +/* Release all resources that we used to capture deferred ops. */ +void +xfs_defer_ops_release( + struct xfs_mount *mp, + struct xfs_defer_capture *dfc) +{ + xfs_defer_cancel_list(mp, &dfc->dfc_dfops); + if (dfc->dfc_capture_ip) + xfs_irele(dfc->dfc_capture_ip); + kmem_free(dfc); +} + +/* + * Capture any deferred ops and commit the transaction. This is the last step + * needed to finish a log intent item that we recovered from the log. If any + * of the deferred ops operate on an inode, the caller must pass in that inode + * so that the reference can be transferred to the capture structure. The + * caller must hold ILOCK_EXCL on the inode, and must unlock it before calling + * xfs_defer_ops_continue. + */ +int +xfs_defer_ops_capture_and_commit( + struct xfs_trans *tp, + struct xfs_inode *capture_ip, + struct list_head *capture_list) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_defer_capture *dfc; + int error; + + ASSERT(!capture_ip || xfs_isilocked(capture_ip, XFS_ILOCK_EXCL)); + + /* If we don't capture anything, commit transaction and exit. */ + dfc = xfs_defer_ops_capture(tp, capture_ip); + if (!dfc) + return xfs_trans_commit(tp); + + /* Commit the transaction and add the capture structure to the list. */ + error = xfs_trans_commit(tp); + if (error) { + xfs_defer_ops_release(mp, dfc); + return error; + } + + list_add_tail(&dfc->dfc_list, capture_list); + return 0; +} + +/* + * Attach a chain of captured deferred ops to a new transaction and free the + * capture structure. If an inode was captured, it will be passed back to the + * caller with ILOCK_EXCL held and joined to the transaction with lockflags==0. + * The caller now owns the inode reference. + */ +void +xfs_defer_ops_continue( + struct xfs_defer_capture *dfc, + struct xfs_trans *tp, + struct xfs_inode **captured_ipp) +{ + ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); + ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY)); + + /* Lock and join the captured inode to the new transaction. */ + if (dfc->dfc_capture_ip) { + xfs_ilock(dfc->dfc_capture_ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, dfc->dfc_capture_ip, 0); + } + *captured_ipp = dfc->dfc_capture_ip; + + /* Move captured dfops chain and state to the transaction. */ + list_splice_init(&dfc->dfc_dfops, &tp->t_dfops); + tp->t_flags |= dfc->dfc_tpflags; - xfs_defer_reset(stp); + kmem_free(dfc); } diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index 6b2ca580f2b0..05472f71fffe 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -8,6 +8,7 @@ struct xfs_btree_cur; struct xfs_defer_op_type; +struct xfs_defer_capture; /* * Header for deferred operation list. @@ -63,4 +64,40 @@ extern const struct xfs_defer_op_type xfs_rmap_update_defer_type; extern const struct xfs_defer_op_type xfs_extent_free_defer_type; extern const struct xfs_defer_op_type xfs_agfl_free_defer_type; +/* + * This structure enables a dfops user to detach the chain of deferred + * operations from a transaction so that they can be continued later. + */ +struct xfs_defer_capture { + /* List of other capture structures. */ + struct list_head dfc_list; + + /* Deferred ops state saved from the transaction. */ + struct list_head dfc_dfops; + unsigned int dfc_tpflags; + + /* Block reservations for the data and rt devices. */ + unsigned int dfc_blkres; + unsigned int dfc_rtxres; + + /* Log reservation saved from the transaction. */ + unsigned int dfc_logres; + + /* + * An inode reference that must be maintained to complete the deferred + * work. + */ + struct xfs_inode *dfc_capture_ip; +}; + +/* + * Functions to capture a chain of deferred operations and continue them later. + * This doesn't normally happen except log recovery. + */ +int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp, + struct xfs_inode *capture_ip, struct list_head *capture_list); +void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp, + struct xfs_inode **captured_ipp); +void xfs_defer_ops_release(struct xfs_mount *mp, struct xfs_defer_capture *d); + #endif /* __XFS_DEFER_H__ */ diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 5a2db00b9d5f..6766417d5ba4 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -69,6 +69,13 @@ xfs_dquot_verify( ddq_type != XFS_DQTYPE_GROUP) return __this_address; + if ((ddq->d_type & XFS_DQTYPE_BIGTIME) && + !xfs_sb_version_hasbigtime(&mp->m_sb)) + return __this_address; + + if ((ddq->d_type & XFS_DQTYPE_BIGTIME) && !ddq->d_id) + return __this_address; + if (id != -1 && id != be32_to_cpu(ddq->d_id)) return __this_address; @@ -288,3 +295,31 @@ const struct xfs_buf_ops xfs_dquot_buf_ra_ops = { .verify_read = xfs_dquot_buf_readahead_verify, .verify_write = xfs_dquot_buf_write_verify, }; + +/* Convert an on-disk timer value into an incore timer value. */ +time64_t +xfs_dquot_from_disk_ts( + struct xfs_disk_dquot *ddq, + __be32 dtimer) +{ + uint32_t t = be32_to_cpu(dtimer); + + if (t != 0 && (ddq->d_type & XFS_DQTYPE_BIGTIME)) + return xfs_dq_bigtime_to_unix(t); + + return t; +} + +/* Convert an incore timer value into an on-disk timer value. */ +__be32 +xfs_dquot_to_disk_ts( + struct xfs_dquot *dqp, + time64_t timer) +{ + uint32_t t = timer; + + if (timer != 0 && (dqp->q_type & XFS_DQTYPE_BIGTIME)) + t = xfs_dq_unix_to_bigtime(timer); + + return cpu_to_be32(t); +} diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 31b7ece985bb..dd764da08f6f 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -449,10 +449,12 @@ xfs_sb_has_compat_feature( #define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */ #define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */ #define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */ +#define XFS_SB_FEAT_RO_COMPAT_INOBTCNT (1 << 3) /* inobt block counts */ #define XFS_SB_FEAT_RO_COMPAT_ALL \ (XFS_SB_FEAT_RO_COMPAT_FINOBT | \ XFS_SB_FEAT_RO_COMPAT_RMAPBT | \ - XFS_SB_FEAT_RO_COMPAT_REFLINK) + XFS_SB_FEAT_RO_COMPAT_REFLINK| \ + XFS_SB_FEAT_RO_COMPAT_INOBTCNT) #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL static inline bool xfs_sb_has_ro_compat_feature( @@ -465,10 +467,12 @@ xfs_sb_has_ro_compat_feature( #define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */ #define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */ #define XFS_SB_FEAT_INCOMPAT_META_UUID (1 << 2) /* metadata UUID */ +#define XFS_SB_FEAT_INCOMPAT_BIGTIME (1 << 3) /* large timestamps */ #define XFS_SB_FEAT_INCOMPAT_ALL \ (XFS_SB_FEAT_INCOMPAT_FTYPE| \ XFS_SB_FEAT_INCOMPAT_SPINODES| \ - XFS_SB_FEAT_INCOMPAT_META_UUID) + XFS_SB_FEAT_INCOMPAT_META_UUID| \ + XFS_SB_FEAT_INCOMPAT_BIGTIME) #define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL static inline bool @@ -563,6 +567,23 @@ static inline bool xfs_sb_version_hasreflink(struct xfs_sb *sbp) (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK); } +static inline bool xfs_sb_version_hasbigtime(struct xfs_sb *sbp) +{ + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_BIGTIME); +} + +/* + * Inode btree block counter. We record the number of inobt and finobt blocks + * in the AGI header so that we can skip the finobt walk at mount time when + * setting up per-AG reservations. + */ +static inline bool xfs_sb_version_hasinobtcounts(struct xfs_sb *sbp) +{ + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && + (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT); +} + /* * end of superblock version macros */ @@ -765,6 +786,9 @@ typedef struct xfs_agi { __be32 agi_free_root; /* root of the free inode btree */ __be32 agi_free_level;/* levels in free inode btree */ + __be32 agi_iblocks; /* inobt blocks used */ + __be32 agi_fblocks; /* finobt blocks used */ + /* structure must be padded to 64 bit alignment */ } xfs_agi_t; @@ -785,7 +809,8 @@ typedef struct xfs_agi { #define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1) #define XFS_AGI_FREE_ROOT (1 << 11) #define XFS_AGI_FREE_LEVEL (1 << 12) -#define XFS_AGI_NUM_BITS_R2 13 +#define XFS_AGI_IBLOCKS (1 << 13) /* both inobt/finobt block counters */ +#define XFS_AGI_NUM_BITS_R2 14 /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) @@ -831,10 +856,87 @@ struct xfs_agfl { ASSERT(xfs_daddr_to_agno(mp, d) == \ xfs_daddr_to_agno(mp, (d) + (len) - 1))) -typedef struct xfs_timestamp { +/* + * XFS Timestamps + * ============== + * + * Traditional ondisk inode timestamps consist of signed 32-bit counters for + * seconds and nanoseconds; time zero is the Unix epoch, Jan 1 00:00:00 UTC + * 1970, which means that the timestamp epoch is the same as the Unix epoch. + * Therefore, the ondisk min and max defined here can be used directly to + * constrain the incore timestamps on a Unix system. Note that we actually + * encode a __be64 value on disk. + * + * When the bigtime feature is enabled, ondisk inode timestamps become an + * unsigned 64-bit nanoseconds counter. This means that the bigtime inode + * timestamp epoch is the start of the classic timestamp range, which is + * Dec 31 20:45:52 UTC 1901. Because the epochs are not the same, callers + * /must/ use the bigtime conversion functions when encoding and decoding raw + * timestamps. + */ +typedef __be64 xfs_timestamp_t; + +/* Legacy timestamp encoding format. */ +struct xfs_legacy_timestamp { __be32 t_sec; /* timestamp seconds */ __be32 t_nsec; /* timestamp nanoseconds */ -} xfs_timestamp_t; +}; + +/* + * Smallest possible ondisk seconds value with traditional timestamps. This + * corresponds exactly with the incore timestamp Dec 13 20:45:52 UTC 1901. + */ +#define XFS_LEGACY_TIME_MIN ((int64_t)S32_MIN) + +/* + * Largest possible ondisk seconds value with traditional timestamps. This + * corresponds exactly with the incore timestamp Jan 19 03:14:07 UTC 2038. + */ +#define XFS_LEGACY_TIME_MAX ((int64_t)S32_MAX) + +/* + * Smallest possible ondisk seconds value with bigtime timestamps. This + * corresponds (after conversion to a Unix timestamp) with the traditional + * minimum timestamp of Dec 13 20:45:52 UTC 1901. + */ +#define XFS_BIGTIME_TIME_MIN ((int64_t)0) + +/* + * Largest supported ondisk seconds value with bigtime timestamps. This + * corresponds (after conversion to a Unix timestamp) with an incore timestamp + * of Jul 2 20:20:24 UTC 2486. + * + * We round down the ondisk limit so that the bigtime quota and inode max + * timestamps will be the same. + */ +#define XFS_BIGTIME_TIME_MAX ((int64_t)((-1ULL / NSEC_PER_SEC) & ~0x3ULL)) + +/* + * Bigtime epoch is set exactly to the minimum time value that a traditional + * 32-bit timestamp can represent when using the Unix epoch as a reference. + * Hence the Unix epoch is at a fixed offset into the supported bigtime + * timestamp range. + * + * The bigtime epoch also matches the minimum value an on-disk 32-bit XFS + * timestamp can represent so we will not lose any fidelity in converting + * to/from unix and bigtime timestamps. + * + * The following conversion factor converts a seconds counter from the Unix + * epoch to the bigtime epoch. + */ +#define XFS_BIGTIME_EPOCH_OFFSET (-(int64_t)S32_MIN) + +/* Convert a timestamp from the Unix epoch to the bigtime epoch. */ +static inline uint64_t xfs_unix_to_bigtime(time64_t unix_seconds) +{ + return (uint64_t)unix_seconds + XFS_BIGTIME_EPOCH_OFFSET; +} + +/* Convert a timestamp from the bigtime epoch to the Unix epoch. */ +static inline time64_t xfs_bigtime_to_unix(uint64_t ondisk_seconds) +{ + return (time64_t)ondisk_seconds - XFS_BIGTIME_EPOCH_OFFSET; +} /* * On-disk inode structure. @@ -1061,12 +1163,22 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) #define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */ #define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */ #define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */ +#define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */ + #define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT) #define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT) #define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT) +#define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT) #define XFS_DIFLAG2_ANY \ - (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE) + (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \ + XFS_DIFLAG2_BIGTIME) + +static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip) +{ + return dip->di_version >= 3 && + (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_BIGTIME)); +} /* * Inode number format: @@ -1152,13 +1264,98 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) #define XFS_DQTYPE_USER 0x01 /* user dquot record */ #define XFS_DQTYPE_PROJ 0x02 /* project dquot record */ #define XFS_DQTYPE_GROUP 0x04 /* group dquot record */ +#define XFS_DQTYPE_BIGTIME 0x80 /* large expiry timestamps */ /* bitmask to determine if this is a user/group/project dquot */ #define XFS_DQTYPE_REC_MASK (XFS_DQTYPE_USER | \ XFS_DQTYPE_PROJ | \ XFS_DQTYPE_GROUP) -#define XFS_DQTYPE_ANY (XFS_DQTYPE_REC_MASK) +#define XFS_DQTYPE_ANY (XFS_DQTYPE_REC_MASK | \ + XFS_DQTYPE_BIGTIME) + +/* + * XFS Quota Timers + * ================ + * + * Traditional quota grace period expiration timers are an unsigned 32-bit + * seconds counter; time zero is the Unix epoch, Jan 1 00:00:01 UTC 1970. + * Note that an expiration value of zero means that the quota limit has not + * been reached, and therefore no expiration has been set. Therefore, the + * ondisk min and max defined here can be used directly to constrain the incore + * quota expiration timestamps on a Unix system. + * + * When bigtime is enabled, we trade two bits of precision to expand the + * expiration timeout range to match that of big inode timestamps. The min and + * max recorded here are the on-disk limits, not a Unix timestamp. + * + * The grace period for each quota type is stored in the root dquot (id = 0) + * and is applied to a non-root dquot when it exceeds the soft or hard limits. + * The length of quota grace periods are unsigned 32-bit quantities measured in + * units of seconds. A value of zero means to use the default period. + */ + +/* + * Smallest possible ondisk quota expiration value with traditional timestamps. + * This corresponds exactly with the incore expiration Jan 1 00:00:01 UTC 1970. + */ +#define XFS_DQ_LEGACY_EXPIRY_MIN ((int64_t)1) + +/* + * Largest possible ondisk quota expiration value with traditional timestamps. + * This corresponds exactly with the incore expiration Feb 7 06:28:15 UTC 2106. + */ +#define XFS_DQ_LEGACY_EXPIRY_MAX ((int64_t)U32_MAX) + +/* + * Smallest possible ondisk quota expiration value with bigtime timestamps. + * This corresponds (after conversion to a Unix timestamp) with the incore + * expiration of Jan 1 00:00:04 UTC 1970. + */ +#define XFS_DQ_BIGTIME_EXPIRY_MIN (XFS_DQ_LEGACY_EXPIRY_MIN) + +/* + * Largest supported ondisk quota expiration value with bigtime timestamps. + * This corresponds (after conversion to a Unix timestamp) with an incore + * expiration of Jul 2 20:20:24 UTC 2486. + * + * The ondisk field supports values up to -1U, which corresponds to an incore + * expiration in 2514. This is beyond the maximum the bigtime inode timestamp, + * so we cap the maximum bigtime quota expiration to the max inode timestamp. + */ +#define XFS_DQ_BIGTIME_EXPIRY_MAX ((int64_t)4074815106U) + +/* + * The following conversion factors assist in converting a quota expiration + * timestamp between the incore and ondisk formats. + */ +#define XFS_DQ_BIGTIME_SHIFT (2) +#define XFS_DQ_BIGTIME_SLACK ((int64_t)(1ULL << XFS_DQ_BIGTIME_SHIFT) - 1) + +/* Convert an incore quota expiration timestamp to an ondisk bigtime value. */ +static inline uint32_t xfs_dq_unix_to_bigtime(time64_t unix_seconds) +{ + /* + * Round the expiration timestamp up to the nearest bigtime timestamp + * that we can store, to give users the most time to fix problems. + */ + return ((uint64_t)unix_seconds + XFS_DQ_BIGTIME_SLACK) >> + XFS_DQ_BIGTIME_SHIFT; +} + +/* Convert an ondisk bigtime quota expiration value to an incore timestamp. */ +static inline time64_t xfs_dq_bigtime_to_unix(uint32_t ondisk_seconds) +{ + return (time64_t)ondisk_seconds << XFS_DQ_BIGTIME_SHIFT; +} + +/* + * Default quota grace periods, ranging from zero (use the compiled defaults) + * to ~136 years. These are applied to a non-root dquot that has exceeded + * either limit. + */ +#define XFS_DQ_GRACE_MIN ((int64_t)0) +#define XFS_DQ_GRACE_MAX ((int64_t)U32_MAX) /* * This is the main portion of the on-disk representation of quota information diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 84bcffa87753..2a2e3cfd94f0 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -249,6 +249,7 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_SPINODES (1 << 18) /* sparse inode chunks */ #define XFS_FSOP_GEOM_FLAGS_RMAPBT (1 << 19) /* reverse mapping btree */ #define XFS_FSOP_GEOM_FLAGS_REFLINK (1 << 20) /* files can share blocks */ +#define XFS_FSOP_GEOM_FLAGS_BIGTIME (1 << 21) /* 64-bit nsec timestamps */ /* * Minimum and maximum sizes need for growth checks. diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index a6b37db55169..974e71bc4a3a 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2473,6 +2473,7 @@ xfs_ialloc_log_agi( offsetof(xfs_agi_t, agi_unlinked), offsetof(xfs_agi_t, agi_free_root), offsetof(xfs_agi_t, agi_free_level), + offsetof(xfs_agi_t, agi_iblocks), sizeof(xfs_agi_t) }; #ifdef DEBUG @@ -2806,6 +2807,10 @@ xfs_ialloc_setup_geometry( uint64_t icount; uint inodes; + igeo->new_diflags2 = 0; + if (xfs_sb_version_hasbigtime(&mp->m_sb)) + igeo->new_diflags2 |= XFS_DIFLAG2_BIGTIME; + /* Compute inode btree geometry. */ igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog; igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 3c8aebc36e64..cc919a2ee870 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -67,6 +67,25 @@ xfs_finobt_set_root( XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL); } +/* Update the inode btree block counter for this btree. */ +static inline void +xfs_inobt_mod_blockcount( + struct xfs_btree_cur *cur, + int howmuch) +{ + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agi *agi = agbp->b_addr; + + if (!xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb)) + return; + + if (cur->bc_btnum == XFS_BTNUM_FINO) + be32_add_cpu(&agi->agi_fblocks, howmuch); + else if (cur->bc_btnum == XFS_BTNUM_INO) + be32_add_cpu(&agi->agi_iblocks, howmuch); + xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_IBLOCKS); +} + STATIC int __xfs_inobt_alloc_block( struct xfs_btree_cur *cur, @@ -102,6 +121,7 @@ __xfs_inobt_alloc_block( new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno)); *stat = 1; + xfs_inobt_mod_blockcount(cur, 1); return 0; } @@ -134,6 +154,7 @@ __xfs_inobt_free_block( struct xfs_buf *bp, enum xfs_ag_resv_type resv) { + xfs_inobt_mod_blockcount(cur, -1); return xfs_free_extent(cur->bc_tp, XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, &XFS_RMAP_OINFO_INOBT, resv); @@ -480,19 +501,29 @@ xfs_inobt_commit_staged_btree( { struct xfs_agi *agi = agbp->b_addr; struct xbtree_afakeroot *afake = cur->bc_ag.afake; + int fields; ASSERT(cur->bc_flags & XFS_BTREE_STAGING); if (cur->bc_btnum == XFS_BTNUM_INO) { + fields = XFS_AGI_ROOT | XFS_AGI_LEVEL; agi->agi_root = cpu_to_be32(afake->af_root); agi->agi_level = cpu_to_be32(afake->af_levels); - xfs_ialloc_log_agi(tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); + if (xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb)) { + agi->agi_iblocks = cpu_to_be32(afake->af_blocks); + fields |= XFS_AGI_IBLOCKS; + } + xfs_ialloc_log_agi(tp, agbp, fields); xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_inobt_ops); } else { + fields = XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL; agi->agi_free_root = cpu_to_be32(afake->af_root); agi->agi_free_level = cpu_to_be32(afake->af_levels); - xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREE_ROOT | - XFS_AGI_FREE_LEVEL); + if (xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb)) { + agi->agi_fblocks = cpu_to_be32(afake->af_blocks); + fields |= XFS_AGI_IBLOCKS; + } + xfs_ialloc_log_agi(tp, agbp, fields); xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_finobt_ops); } } @@ -673,6 +704,28 @@ xfs_inobt_count_blocks( return error; } +/* Read finobt block count from AGI header. */ +static int +xfs_finobt_read_blocks( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_agnumber_t agno, + xfs_extlen_t *tree_blocks) +{ + struct xfs_buf *agbp; + struct xfs_agi *agi; + int error; + + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + if (error) + return error; + + agi = agbp->b_addr; + *tree_blocks = be32_to_cpu(agi->agi_fblocks); + xfs_trans_brelse(tp, agbp); + return 0; +} + /* * Figure out how many blocks to reserve and how many are used by this btree. */ @@ -690,7 +743,11 @@ xfs_finobt_calc_reserves( if (!xfs_sb_version_hasfinobt(&mp->m_sb)) return 0; - error = xfs_inobt_count_blocks(mp, tp, agno, XFS_BTNUM_FINO, &tree_len); + if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) + error = xfs_finobt_read_blocks(mp, tp, agno, &tree_len); + else + error = xfs_inobt_count_blocks(mp, tp, agno, XFS_BTNUM_FINO, + &tree_len); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index 52451809c478..b4164256993d 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -603,7 +603,7 @@ xfs_iext_realloc_root( if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF) new_size = NODE_SIZE; - new = kmem_realloc(ifp->if_u1.if_root, new_size, KM_NOFS); + new = krealloc(ifp->if_u1.if_root, new_size, GFP_NOFS | __GFP_NOFAIL); memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes); ifp->if_u1.if_root = new; cur->leaf = new; diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 8d5dd08eab75..c667c63f2cb0 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -157,6 +157,36 @@ xfs_imap_to_bp( return 0; } +static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts) +{ + struct timespec64 tv; + uint32_t n; + + tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n)); + tv.tv_nsec = n; + + return tv; +} + +/* Convert an ondisk timestamp to an incore timestamp. */ +struct timespec64 +xfs_inode_from_disk_ts( + struct xfs_dinode *dip, + const xfs_timestamp_t ts) +{ + struct timespec64 tv; + struct xfs_legacy_timestamp *lts; + + if (xfs_dinode_has_bigtime(dip)) + return xfs_inode_decode_bigtime(be64_to_cpu(ts)); + + lts = (struct xfs_legacy_timestamp *)&ts; + tv.tv_sec = (int)be32_to_cpu(lts->t_sec); + tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec); + + return tv; +} + int xfs_inode_from_disk( struct xfs_inode *ip, @@ -211,12 +241,9 @@ xfs_inode_from_disk( * a time before epoch is converted to a time long after epoch * on 64 bit systems. */ - inode->i_atime.tv_sec = (int)be32_to_cpu(from->di_atime.t_sec); - inode->i_atime.tv_nsec = (int)be32_to_cpu(from->di_atime.t_nsec); - inode->i_mtime.tv_sec = (int)be32_to_cpu(from->di_mtime.t_sec); - inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec); - inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec); - inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec); + inode->i_atime = xfs_inode_from_disk_ts(from, from->di_atime); + inode->i_mtime = xfs_inode_from_disk_ts(from, from->di_mtime); + inode->i_ctime = xfs_inode_from_disk_ts(from, from->di_ctime); to->di_size = be64_to_cpu(from->di_size); to->di_nblocks = be64_to_cpu(from->di_nblocks); @@ -229,8 +256,7 @@ xfs_inode_from_disk( if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { inode_set_iversion_queried(inode, be64_to_cpu(from->di_changecount)); - to->di_crtime.tv_sec = be32_to_cpu(from->di_crtime.t_sec); - to->di_crtime.tv_nsec = be32_to_cpu(from->di_crtime.t_nsec); + to->di_crtime = xfs_inode_from_disk_ts(from, from->di_crtime); to->di_flags2 = be64_to_cpu(from->di_flags2); to->di_cowextsize = be32_to_cpu(from->di_cowextsize); } @@ -252,6 +278,25 @@ out_destroy_data_fork: return error; } +/* Convert an incore timestamp to an ondisk timestamp. */ +static inline xfs_timestamp_t +xfs_inode_to_disk_ts( + struct xfs_inode *ip, + const struct timespec64 tv) +{ + struct xfs_legacy_timestamp *lts; + xfs_timestamp_t ts; + + if (xfs_inode_has_bigtime(ip)) + return cpu_to_be64(xfs_inode_encode_bigtime(tv)); + + lts = (struct xfs_legacy_timestamp *)&ts; + lts->t_sec = cpu_to_be32(tv.tv_sec); + lts->t_nsec = cpu_to_be32(tv.tv_nsec); + + return ts; +} + void xfs_inode_to_disk( struct xfs_inode *ip, @@ -271,12 +316,9 @@ xfs_inode_to_disk( to->di_projid_hi = cpu_to_be16(from->di_projid >> 16); memset(to->di_pad, 0, sizeof(to->di_pad)); - to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec); - to->di_atime.t_nsec = cpu_to_be32(inode->i_atime.tv_nsec); - to->di_mtime.t_sec = cpu_to_be32(inode->i_mtime.tv_sec); - to->di_mtime.t_nsec = cpu_to_be32(inode->i_mtime.tv_nsec); - to->di_ctime.t_sec = cpu_to_be32(inode->i_ctime.tv_sec); - to->di_ctime.t_nsec = cpu_to_be32(inode->i_ctime.tv_nsec); + to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime); + to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime); + to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime); to->di_nlink = cpu_to_be32(inode->i_nlink); to->di_gen = cpu_to_be32(inode->i_generation); to->di_mode = cpu_to_be16(inode->i_mode); @@ -295,8 +337,7 @@ xfs_inode_to_disk( if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { to->di_version = 3; to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); - to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.tv_sec); - to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.tv_nsec); + to->di_crtime = xfs_inode_to_disk_ts(ip, from->di_crtime); to->di_flags2 = cpu_to_be64(from->di_flags2); to->di_cowextsize = cpu_to_be32(from->di_cowextsize); to->di_ino = cpu_to_be64(ip->i_ino); @@ -310,58 +351,6 @@ xfs_inode_to_disk( } } -void -xfs_log_dinode_to_disk( - struct xfs_log_dinode *from, - struct xfs_dinode *to) -{ - to->di_magic = cpu_to_be16(from->di_magic); - to->di_mode = cpu_to_be16(from->di_mode); - to->di_version = from->di_version; - to->di_format = from->di_format; - to->di_onlink = 0; - to->di_uid = cpu_to_be32(from->di_uid); - to->di_gid = cpu_to_be32(from->di_gid); - to->di_nlink = cpu_to_be32(from->di_nlink); - to->di_projid_lo = cpu_to_be16(from->di_projid_lo); - to->di_projid_hi = cpu_to_be16(from->di_projid_hi); - memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); - - to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); - to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); - to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); - to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec); - to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec); - to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec); - - to->di_size = cpu_to_be64(from->di_size); - to->di_nblocks = cpu_to_be64(from->di_nblocks); - to->di_extsize = cpu_to_be32(from->di_extsize); - to->di_nextents = cpu_to_be32(from->di_nextents); - to->di_anextents = cpu_to_be16(from->di_anextents); - to->di_forkoff = from->di_forkoff; - to->di_aformat = from->di_aformat; - to->di_dmevmask = cpu_to_be32(from->di_dmevmask); - to->di_dmstate = cpu_to_be16(from->di_dmstate); - to->di_flags = cpu_to_be16(from->di_flags); - to->di_gen = cpu_to_be32(from->di_gen); - - if (from->di_version == 3) { - to->di_changecount = cpu_to_be64(from->di_changecount); - to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); - to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); - to->di_flags2 = cpu_to_be64(from->di_flags2); - to->di_cowextsize = cpu_to_be32(from->di_cowextsize); - to->di_ino = cpu_to_be64(from->di_ino); - to->di_lsn = cpu_to_be64(from->di_lsn); - memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); - uuid_copy(&to->di_uuid, &from->di_uuid); - to->di_flushiter = 0; - } else { - to->di_flushiter = cpu_to_be16(from->di_flushiter); - } -} - static xfs_failaddr_t xfs_dinode_verify_fork( struct xfs_dinode *dip, @@ -568,6 +557,11 @@ xfs_dinode_verify( if (fa) return fa; + /* bigtime iflag can only happen on bigtime filesystems */ + if (xfs_dinode_has_bigtime(dip) && + !xfs_sb_version_hasbigtime(&mp->m_sb)) + return __this_address; + return NULL; } diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 6b08b9d060c2..ef5eaf33d146 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -17,7 +17,7 @@ struct xfs_dinode; */ struct xfs_icdinode { uint16_t di_flushiter; /* incremented on flush */ - uint32_t di_projid; /* owner's project id */ + prid_t di_projid; /* owner's project id */ xfs_fsize_t di_size; /* number of bytes in file */ xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */ xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ @@ -32,6 +32,11 @@ struct xfs_icdinode { struct timespec64 di_crtime; /* time created */ }; +static inline bool xfs_icdinode_has_bigtime(const struct xfs_icdinode *icd) +{ + return icd->di_flags2 & XFS_DIFLAG2_BIGTIME; +} + /* * Inode location information. Stored in the inode and passed to * xfs_imap_to_bp() to get a buffer and dinode for a given inode. @@ -49,8 +54,6 @@ void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *); void xfs_inode_to_disk(struct xfs_inode *ip, struct xfs_dinode *to, xfs_lsn_t lsn); int xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from); -void xfs_log_dinode_to_disk(struct xfs_log_dinode *from, - struct xfs_dinode *to); xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, struct xfs_dinode *dip); @@ -60,4 +63,12 @@ xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp, uint32_t cowextsize, uint16_t mode, uint16_t flags, uint64_t flags2); +static inline uint64_t xfs_inode_encode_bigtime(struct timespec64 tv) +{ + return xfs_unix_to_bigtime(tv.tv_sec) * NSEC_PER_SEC + tv.tv_nsec; +} + +struct timespec64 xfs_inode_from_disk_ts(struct xfs_dinode *dip, + const xfs_timestamp_t ts); + #endif /* __XFS_INODE_BUF_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 0cf853d42d62..7575de5cecb1 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -386,8 +386,8 @@ xfs_iroot_realloc( cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); new_max = cur_max + rec_diff; new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); - ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, - KM_NOFS); + ifp->if_broot = krealloc(ifp->if_broot, new_size, + GFP_NOFS | __GFP_NOFAIL); op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, ifp->if_broot_bytes); np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, @@ -496,8 +496,8 @@ xfs_idata_realloc( * in size so that it can be logged and stay on word boundaries. * We enforce that here. */ - ifp->if_u1.if_data = kmem_realloc(ifp->if_u1.if_data, - roundup(new_size, 4), KM_NOFS); + ifp->if_u1.if_data = krealloc(ifp->if_u1.if_data, roundup(new_size, 4), + GFP_NOFS | __GFP_NOFAIL); ifp->if_bytes = new_size; } diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index e3400c9c71cd..8bd00da6d2a4 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -368,10 +368,13 @@ static inline int xfs_ilog_fdata(int w) * directly mirrors the xfs_dinode structure as it must contain all the same * information. */ -typedef struct xfs_ictimestamp { +typedef uint64_t xfs_ictimestamp_t; + +/* Legacy timestamp encoding format. */ +struct xfs_legacy_ictimestamp { int32_t t_sec; /* timestamp seconds */ int32_t t_nsec; /* timestamp nanoseconds */ -} xfs_ictimestamp_t; +}; /* * Define the format of the inode core that is logged. This structure must be diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index 641132d0e39d..3cca2bfe714c 100644 --- a/fs/xfs/libxfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h @@ -121,7 +121,6 @@ struct xlog_recover { void xlog_buf_readahead(struct xlog *log, xfs_daddr_t blkno, uint len, const struct xfs_buf_ops *ops); bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len); -void xlog_recover_iodone(struct xfs_buf *bp); void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type, uint64_t intent_id); diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index 076bdc7037ee..0f0af4e35032 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -23,7 +23,8 @@ typedef uint8_t xfs_dqtype_t; #define XFS_DQTYPE_STRINGS \ { XFS_DQTYPE_USER, "USER" }, \ { XFS_DQTYPE_PROJ, "PROJ" }, \ - { XFS_DQTYPE_GROUP, "GROUP" } + { XFS_DQTYPE_GROUP, "GROUP" }, \ + { XFS_DQTYPE_BIGTIME, "BIGTIME" } /* * flags for q_flags field in the dquot. @@ -143,4 +144,9 @@ extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); extern void xfs_dqblk_repair(struct xfs_mount *mp, struct xfs_dqblk *dqb, xfs_dqid_t id, xfs_dqtype_t type); +struct xfs_dquot; +time64_t xfs_dquot_from_disk_ts(struct xfs_disk_dquot *ddq, + __be32 dtimer); +__be32 xfs_dquot_to_disk_ts(struct xfs_dquot *ddq, time64_t timer); + #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 27c39268c31f..340c83f76c80 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -2505,12 +2505,15 @@ xfs_rmap_map_extent( int whichfork, struct xfs_bmbt_irec *PREV) { + enum xfs_rmap_intent_type type = XFS_RMAP_MAP; + if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork)) return; - __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? - XFS_RMAP_MAP_SHARED : XFS_RMAP_MAP, ip->i_ino, - whichfork, PREV); + if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip)) + type = XFS_RMAP_MAP_SHARED; + + __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV); } /* Unmap an extent out of a file. */ @@ -2521,12 +2524,15 @@ xfs_rmap_unmap_extent( int whichfork, struct xfs_bmbt_irec *PREV) { + enum xfs_rmap_intent_type type = XFS_RMAP_UNMAP; + if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork)) return; - __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? - XFS_RMAP_UNMAP_SHARED : XFS_RMAP_UNMAP, ip->i_ino, - whichfork, PREV); + if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip)) + type = XFS_RMAP_UNMAP_SHARED; + + __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV); } /* @@ -2543,12 +2549,15 @@ xfs_rmap_convert_extent( int whichfork, struct xfs_bmbt_irec *PREV) { + enum xfs_rmap_intent_type type = XFS_RMAP_CONVERT; + if (!xfs_rmap_update_is_needed(mp, whichfork)) return; - __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? - XFS_RMAP_CONVERT_SHARED : XFS_RMAP_CONVERT, ip->i_ino, - whichfork, PREV); + if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip)) + type = XFS_RMAP_CONVERT_SHARED; + + __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV); } /* Schedule the creation of an rmap for non-file data. */ diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 1d9fa8a300f1..6c1aba16113c 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -1018,7 +1018,6 @@ xfs_rtalloc_query_range( struct xfs_mount *mp = tp->t_mountp; xfs_rtblock_t rtstart; xfs_rtblock_t rtend; - xfs_rtblock_t rem; int is_free; int error = 0; @@ -1027,13 +1026,12 @@ xfs_rtalloc_query_range( if (low_rec->ar_startext >= mp->m_sb.sb_rextents || low_rec->ar_startext == high_rec->ar_startext) return 0; - if (high_rec->ar_startext > mp->m_sb.sb_rextents) - high_rec->ar_startext = mp->m_sb.sb_rextents; + high_rec->ar_startext = min(high_rec->ar_startext, + mp->m_sb.sb_rextents - 1); /* Iterate the bitmap, looking for discrepancies. */ rtstart = low_rec->ar_startext; - rem = high_rec->ar_startext - rtstart; - while (rem) { + while (rtstart <= high_rec->ar_startext) { /* Is the first block free? */ error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend, &is_free); @@ -1042,7 +1040,7 @@ xfs_rtalloc_query_range( /* How long does the extent go for? */ error = xfs_rtfind_forw(mp, tp, rtstart, - high_rec->ar_startext - 1, &rtend); + high_rec->ar_startext, &rtend); if (error) break; @@ -1055,7 +1053,6 @@ xfs_rtalloc_query_range( break; } - rem -= rtend - rtstart + 1; rtstart = rtend + 1; } diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index ae9aaf1f34bf..5aeafa59ed27 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -954,7 +954,7 @@ xfs_log_sb( struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_buf *bp = xfs_trans_getsb(tp, mp); + struct xfs_buf *bp = xfs_trans_getsb(tp); mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); @@ -1084,7 +1084,7 @@ xfs_sync_sb_buf( if (error) return error; - bp = xfs_trans_getsb(tp, mp); + bp = xfs_trans_getsb(tp); xfs_log_sb(tp); xfs_trans_bhold(tp, bp); xfs_trans_set_sync(tp); @@ -1166,6 +1166,8 @@ xfs_fs_geometry( geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT; if (xfs_sb_version_hasreflink(sbp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK; + if (xfs_sb_version_hasbigtime(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_BIGTIME; if (xfs_sb_version_hassector(sbp)) geo->logsectsize = sbp->sb_logsectsize; else diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 708feb8eac76..c795ae47b3c9 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -176,6 +176,9 @@ struct xfs_ino_geometry { unsigned int ialloc_align; unsigned int agino_log; /* #bits for agino in inum */ + + /* precomputed value for di_flags2 */ + uint64_t new_diflags2; }; #endif /* __XFS_SHARED_H__ */ diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index b7e222befb08..90f1d5645052 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -132,6 +132,17 @@ xfs_trans_log_inode( } /* + * If we're updating the inode core or the timestamps and it's possible + * to upgrade this inode to bigtime format, do so now. + */ + if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) && + xfs_sb_version_hasbigtime(&ip->i_mount->m_sb) && + !xfs_inode_has_bigtime(ip)) { + ip->i_d.di_flags2 |= XFS_DIFLAG2_BIGTIME; + flags |= XFS_ILOG_CORE; + } + + /* * Record the specific change for fdatasync optimisation. This allows * fdatasync to skip log forces for inodes that are only timestamp * dirty. @@ -177,9 +188,9 @@ xfs_trans_log_inode( /* * Always OR in the bits from the ili_last_fields field. This is to - * coordinate with the xfs_iflush() and xfs_iflush_done() routines in - * the eventual clearing of the ili_fields bits. See the big comment in - * xfs_iflush() for an explanation of this coordination mechanism. + * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines + * in the eventual clearing of the ili_fields bits. See the big comment + * in xfs_iflush() for an explanation of this coordination mechanism. */ iip->ili_fields |= (flags | iip->ili_last_fields | iversion_flags); spin_unlock(&iip->ili_lock); |