Currently each log iovec starts with an xlog_op_header in the CIL buffer.
This wastes a little bit of memory, but more importantly makes managing
the alignment of the individual log regions rather painful.
But there is almost no information in the op_header that we actually need
keep while the item is in the CIL - the transaction ID gets overwritten
in xlog_write, the length can be derived from i_len in the xfs_log_iovec,
the client_id is set to the same value for all but one caller, leaving
only the values of oh_flags as relevant information.
Add a new i_op field to the xfs_log_vec to encode the flags and the
special oh_clientid value for the unmount record, and instead create the
xlog_op_header on the fly while writing to the iclog. This in turns
matches what we already do for continuation records, allowing to share
the code to write an op header and a region into iclog in a single
helper.
The in-memory only debug i_flags field is shortened to 16-bits to better
pack this new value.
Note that before this change the i_len field in the xfs_log_iovec used to
include the length of ophdr but now doesn't.
Signed-off-by: Christoph Hellwig <hch@lst.de>
char hic_sector[XLOG_HEADER_SIZE];
} xlog_in_core_2_t;
-/* not an on-disk structure, but needed by log recovery in userspace */
-typedef struct xfs_log_iovec {
- void *i_addr; /* beginning address of region */
- int i_len; /* length in bytes of region */
- uint i_type; /* type of region */
-} xfs_log_iovec_t;
-
-
/*
* Transaction Header definitions.
*
struct xfs_log_vec lv = {
.lv_niovecs = 1,
.lv_iovecp = reg,
- .lv_bytes = reg->i_len,
+ .lv_bytes = sizeof(struct xlog_op_header) + reg->i_len,
};
LIST_HEAD (lv_chain);
struct xlog *log,
struct xlog_ticket *ticket)
{
- struct {
- struct xlog_op_header ophdr;
- struct xfs_unmount_log_format ulf;
- } unmount_rec = {
- .ophdr = {
- .oh_clientid = XFS_LOG,
- .oh_tid = cpu_to_be32(ticket->t_tid),
- .oh_flags = XLOG_UNMOUNT_TRANS,
- },
- .ulf = {
- .magic = XLOG_UNMOUNT_TYPE,
- },
+ struct xfs_unmount_log_format ulf = {
+ .magic = XLOG_UNMOUNT_TYPE,
};
- struct xfs_log_iovec reg = {
- .i_addr = &unmount_rec,
- .i_len = sizeof(unmount_rec),
- .i_type = XLOG_REG_TYPE_UNMOUNT,
+ struct xfs_log_iovec reg = {
+ .i_addr = &ulf,
+ .i_len = sizeof(ulf),
+ .i_op = XLOG_OP_UNMOUNT,
+ .i_type = XLOG_REG_TYPE_UNMOUNT,
};
return xlog_write_one_vec(log, NULL, ®, ticket);
}
}
-static inline void
-xlog_write_iovec(
+static void
+xlog_write_region(
+ struct xlog_ticket *ticket,
struct xlog_in_core *iclog,
uint32_t *log_offset,
- void *data,
- uint32_t write_len,
+ struct xfs_log_iovec *reg,
int *bytes_left,
uint32_t *record_cnt,
uint32_t *data_cnt)
{
+ struct xlog_op_header *ophdr = iclog->ic_datap + *log_offset;
+ uint32_t rlen;
+
ASSERT(*log_offset < iclog->ic_log->l_iclog_size);
ASSERT(*log_offset % sizeof(int32_t) == 0);
- ASSERT(write_len % sizeof(int32_t) == 0);
+ ASSERT(reg->i_len % sizeof(int32_t) == 0);
+
+ *log_offset += sizeof(struct xlog_op_header);
+ if (reg->i_op != XLOG_OP_CONT_TRANS)
+ *bytes_left -= sizeof(struct xlog_op_header);
+ *data_cnt += sizeof(struct xlog_op_header);
+
+ ASSERT(iclog->ic_size - *log_offset > 0);
+ rlen = min_t(uint32_t, reg->i_len, iclog->ic_size - *log_offset);
+ if (rlen) {
+ memcpy(iclog->ic_datap + *log_offset, reg->i_addr, rlen);
+ *log_offset += rlen;
+ *bytes_left -= rlen;
+ *data_cnt += rlen;
+ reg->i_addr += rlen;
+ reg->i_len -= rlen;
+ }
+
+ ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
+ ophdr->oh_len = cpu_to_be32(rlen);
+ ophdr->oh_clientid = XFS_TRANSACTION;
+ ophdr->oh_flags = 0;
+ ophdr->oh_res2 = 0;
+
+ switch (reg->i_op) {
+ case XLOG_OP_TRANS:
+ if (reg->i_len) {
+ ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
+ reg->i_op = XLOG_OP_CONT_TRANS;
+ }
+ break;
+ case XLOG_OP_CONT_TRANS:
+ ophdr->oh_flags |= XLOG_WAS_CONT_TRANS;
+ if (reg->i_len)
+ ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
+ else
+ ophdr->oh_flags |= XLOG_END_TRANS;
+ break;
+ case XLOG_OP_UNMOUNT:
+ ophdr->oh_clientid = XFS_LOG;
+ ophdr->oh_flags |= XLOG_UNMOUNT_TRANS;
+ break;
+ case XLOG_OP_START_TRANS:
+ ophdr->oh_flags |= XLOG_START_TRANS;
+ break;
+ case XLOG_OP_COMMIT_TRANS:
+ ophdr->oh_flags |= XLOG_COMMIT_TRANS;
+ break;
+ }
- memcpy(iclog->ic_datap + *log_offset, data, write_len);
- *log_offset += write_len;
- *bytes_left -= write_len;
(*record_cnt)++;
- *data_cnt += write_len;
}
/*
* loop will naturally skip them.
*/
for (index = 0; index < lv->lv_niovecs; index++) {
- struct xfs_log_iovec *reg = &lv->lv_iovecp[index];
- struct xlog_op_header *ophdr = reg->i_addr;
-
- ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
- xlog_write_iovec(iclog, log_offset, reg->i_addr,
- reg->i_len, len, record_cnt, data_cnt);
+ xlog_write_region(ticket, iclog, log_offset,
+ &lv->lv_iovecp[index], len, record_cnt,
+ data_cnt);
+ ASSERT(lv->lv_iovecp[index].i_len == 0);
}
}
uint32_t *data_cnt)
{
struct xlog_in_core *iclog = *iclogp;
- struct xlog_op_header *ophdr;
int index = 0;
- uint32_t rlen;
int error;
/* walk the logvec, copying until we run out of space in the iclog */
for (index = 0; index < lv->lv_niovecs; index++) {
struct xfs_log_iovec *reg = &lv->lv_iovecp[index];
- uint32_t reg_offset = 0;
/*
* The first region of a continuation must have a non-zero
return error;
}
- ophdr = reg->i_addr;
- rlen = min_t(uint32_t, reg->i_len, iclog->ic_size - *log_offset);
-
- ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
- ophdr->oh_len = cpu_to_be32(rlen - sizeof(struct xlog_op_header));
- if (rlen != reg->i_len)
- ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
-
- xlog_write_iovec(iclog, log_offset, reg->i_addr,
- rlen, len, record_cnt, data_cnt);
+ xlog_write_region(ticket, iclog, log_offset, reg, len,
+ record_cnt, data_cnt);
/* If we wrote the whole region, move to the next. */
- if (rlen == reg->i_len)
+ if (reg->i_len == 0)
continue;
/*
if (error)
return error;
- ophdr = iclog->ic_datap + *log_offset;
- ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
- ophdr->oh_clientid = XFS_TRANSACTION;
- ophdr->oh_res2 = 0;
- ophdr->oh_flags = XLOG_WAS_CONT_TRANS;
-
+ xlog_write_region(ticket, iclog, log_offset, reg, len,
+ record_cnt, data_cnt);
ticket->t_curr_res -= sizeof(struct xlog_op_header);
- *log_offset += sizeof(struct xlog_op_header);
- *data_cnt += sizeof(struct xlog_op_header);
-
- /*
- * If rlen fits in the iclog, then end the region
- * continuation. Otherwise we're going around again.
- */
- reg_offset += rlen;
- rlen = reg->i_len - reg_offset;
- if (rlen <= iclog->ic_size - *log_offset)
- ophdr->oh_flags |= XLOG_END_TRANS;
- else
- ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
-
- rlen = min_t(uint32_t, rlen, iclog->ic_size - *log_offset);
- ophdr->oh_len = cpu_to_be32(rlen);
-
- xlog_write_iovec(iclog, log_offset,
- reg->i_addr + reg_offset,
- rlen, len, record_cnt, data_cnt);
-
- } while (ophdr->oh_flags & XLOG_CONTINUE_TRANS);
+ } while (reg->i_len > 0);
}
/*
return roundup(len, sizeof(uint32_t));
}
-void *xlog_format_start(struct xlog_format_buf *lfb, uint type);
+void *xlog_format_start(struct xlog_format_buf *lfb, uint16_t type);
void xlog_format_commit(struct xlog_format_buf *lfb, unsigned int data_len);
/*
static inline void *
xlog_format_copy(
struct xlog_format_buf *lfb,
- uint type,
+ uint16_t type,
void *data,
unsigned int len)
{
* the next one is naturally aligned. We'll need to account for
* that slack space here.
*
- * We also add the xlog_op_header to each region when
- * formatting, but that's not accounted to the size of the item
- * at this point. Hence we'll need an addition number of bytes
- * for each vector to hold an opheader.
- *
* Then round nbytes up to 64-bit alignment so that the initial
* buffer alignment is easy to calculate and verify.
+ *
+ * Note that this does not include the per-iovec ophdr, which only
+ * exists in the iclog buffer, but not the CIL buffer.
*/
- nbytes = xlog_item_space(niovecs, nbytes);
+ nbytes = round_up(nbytes + niovecs * sizeof(uint64_t),
+ sizeof(uint64_t));
/*
* The data buffer needs to start 64-bit aligned, so round up
unsigned int idx;
};
-/*
- * We need to make sure the buffer pointer returned is naturally aligned for the
- * biggest basic data type we put into it. We have already accounted for this
- * padding when sizing the buffer.
- *
- * However, this padding does not get written into the log, and hence we have to
- * track the space used by the log vectors separately to prevent log space hangs
- * due to inaccurate accounting (i.e. a leak) of the used log space through the
- * CIL context ticket.
- *
- * We also add space for the xlog_op_header that describes this region in the
- * log. This prepends the data region we return to the caller to copy their data
- * into, so do all the static initialisation of the ophdr now. Because the ophdr
- * is not 8 byte aligned, we have to be careful to ensure that we align the
- * start of the buffer such that the region we return to the call is 8 byte
- * aligned and packed against the tail of the ophdr.
- */
void *
xlog_format_start(
struct xlog_format_buf *lfb,
- uint type)
+ uint16_t type)
{
struct xfs_log_vec *lv = lfb->lv;
struct xfs_log_iovec *vec = &lv->lv_iovecp[lfb->idx];
- struct xlog_op_header *oph;
- uint32_t len;
- void *buf;
+ void *buf = lv->lv_buf + lv->lv_buf_used;
ASSERT(lfb->idx < lv->lv_niovecs);
-
- len = lv->lv_buf_used + sizeof(struct xlog_op_header);
- if (!IS_ALIGNED(len, sizeof(uint64_t))) {
- lv->lv_buf_used = round_up(len, sizeof(uint64_t)) -
- sizeof(struct xlog_op_header);
- }
+ ASSERT(IS_ALIGNED((unsigned long)buf, sizeof(uint64_t)));
vec->i_type = type;
- vec->i_addr = lv->lv_buf + lv->lv_buf_used;
-
- oph = vec->i_addr;
- oph->oh_clientid = XFS_TRANSACTION;
- oph->oh_res2 = 0;
- oph->oh_flags = 0;
-
- buf = vec->i_addr + sizeof(struct xlog_op_header);
- ASSERT(IS_ALIGNED((unsigned long)buf, sizeof(uint64_t)));
+ vec->i_addr = buf;
return buf;
}
{
struct xfs_log_vec *lv = lfb->lv;
struct xfs_log_iovec *vec = &lv->lv_iovecp[lfb->idx];
- struct xlog_op_header *oph = vec->i_addr;
int len;
/*
* Always round up the length to the correct alignment so callers don't
* need to know anything about this log vec layout requirement. This
* means we have to zero the area the data to be written does not cover.
- * This is complicated by fact the payload region is offset into the
- * logvec region by the opheader that tracks the payload.
*/
len = xlog_calc_iovec_len(data_len);
- if (len - data_len != 0) {
- char *buf = vec->i_addr + sizeof(struct xlog_op_header);
-
- memset(buf + data_len, 0, len - data_len);
- }
+ if (len - data_len != 0)
+ memset(vec->i_addr + data_len, 0, len - data_len);
/*
- * The opheader tracks aligned payload length, whilst the logvec tracks
- * the overall region length.
+ * We need to make sure the next buffer pointer is naturally aligned for
+ * the biggest basic data type we put into it. We have already accounted
+ * for this padding when sizing the buffer.
+ *
+ * However, this padding does not get written into the log, and hence we
+ * have to track the space used by the log vectors separately to prevent
+ * log space hangs due to inaccurate accounting (i.e. a leak) of the
+ * used log space through the CIL context ticket. The used log space
+ * also needs to account for the op_header that gets added to each
+ * region.
*/
- oph->oh_len = cpu_to_be32(len);
-
- len += sizeof(struct xlog_op_header);
- lv->lv_buf_used += len;
- lv->lv_bytes += len;
+ lv->lv_buf_used += round_up(len, sizeof(uint64_t));
+ lv->lv_bytes += sizeof(struct xlog_op_header) + len;
vec->i_len = len;
/* Catch buffer overruns */
- ASSERT((void *)lv->lv_buf + lv->lv_bytes <=
+ ASSERT((void *)lv->lv_buf + lv->lv_buf_used <=
(void *)lv + lv->lv_alloc_size);
lfb->idx++;
struct xfs_cil_ctx *ctx)
{
struct xlog *log = ctx->cil->xc_log;
- struct xlog_op_header ophdr = {
- .oh_clientid = XFS_TRANSACTION,
- .oh_tid = cpu_to_be32(ctx->ticket->t_tid),
- .oh_flags = XLOG_COMMIT_TRANS,
- };
struct xfs_log_iovec reg = {
- .i_addr = &ophdr,
- .i_len = sizeof(struct xlog_op_header),
- .i_type = XLOG_REG_TYPE_COMMIT,
+ .i_op = XLOG_OP_COMMIT_TRANS,
+ .i_type = XLOG_REG_TYPE_COMMIT,
};
int error;
}
struct xlog_cil_trans_hdr {
- struct xlog_op_header oph[2];
struct xfs_trans_header thdr;
struct xfs_log_iovec lhdr[2];
};
int num_iovecs)
{
struct xlog_ticket *tic = ctx->ticket;
- __be32 tid = cpu_to_be32(tic->t_tid);
memset(hdr, 0, sizeof(*hdr));
/* Log start record */
- hdr->oph[0].oh_tid = tid;
- hdr->oph[0].oh_clientid = XFS_TRANSACTION;
- hdr->oph[0].oh_flags = XLOG_START_TRANS;
-
- /* log iovec region pointer */
- hdr->lhdr[0].i_addr = &hdr->oph[0];
- hdr->lhdr[0].i_len = sizeof(struct xlog_op_header);
+ hdr->lhdr[0].i_op = XLOG_OP_START_TRANS;
hdr->lhdr[0].i_type = XLOG_REG_TYPE_LRHEADER;
- /* log opheader */
- hdr->oph[1].oh_tid = tid;
- hdr->oph[1].oh_clientid = XFS_TRANSACTION;
- hdr->oph[1].oh_len = cpu_to_be32(sizeof(struct xfs_trans_header));
-
/* transaction header in host byte order format */
hdr->thdr.th_magic = XFS_TRANS_HEADER_MAGIC;
hdr->thdr.th_type = XFS_TRANS_CHECKPOINT;
hdr->thdr.th_num_items = num_iovecs;
/* log iovec region pointer */
- hdr->lhdr[1].i_addr = &hdr->oph[1];
- hdr->lhdr[1].i_len = sizeof(struct xlog_op_header) +
- sizeof(struct xfs_trans_header);
+ hdr->lhdr[1].i_addr = &hdr->thdr;
+ hdr->lhdr[1].i_len = sizeof(struct xfs_trans_header);
hdr->lhdr[1].i_type = XLOG_REG_TYPE_TRANSHDR;
lvhdr->lv_niovecs = 2;
- lvhdr->lv_iovecp = &hdr->lhdr[0];
- lvhdr->lv_bytes = hdr->lhdr[0].i_len + hdr->lhdr[1].i_len;
+ lvhdr->lv_iovecp = hdr->lhdr;
+ lvhdr->lv_bytes = 2 * sizeof(struct xlog_op_header) +
+ sizeof(struct xfs_trans_header);
tic->t_curr_res -= lvhdr->lv_bytes;
}
struct xlog_ticket;
struct xfs_mount;
+enum xlog_op_type {
+ XLOG_OP_TRANS = 0,
+ XLOG_OP_CONT_TRANS,
+ XLOG_OP_START_TRANS,
+ XLOG_OP_COMMIT_TRANS,
+ XLOG_OP_UNMOUNT,
+} __packed;
+
+struct xfs_log_iovec {
+ void *i_addr;/* beginning address of region */
+ int i_len; /* length in bytes of region */
+ enum xlog_op_type i_op; /* log operation */
+ uint16_t i_type; /* type of region (debug only) */
+};
+
/*
* get client id from packed copy.
*