1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (c) 2008-2010, 2013 Dave Chinner |
4 | * All Rights Reserved. |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_log_format.h" |
11 | #include "xfs_trans_resv.h" |
12 | #include "xfs_mount.h" |
13 | #include "xfs_inode.h" |
14 | #include "xfs_trans.h" |
15 | #include "xfs_trans_priv.h" |
16 | #include "xfs_icreate_item.h" |
17 | #include "xfs_log.h" |
18 | #include "xfs_log_priv.h" |
19 | #include "xfs_log_recover.h" |
20 | #include "xfs_ialloc.h" |
21 | #include "xfs_trace.h" |
22 | |
23 | struct kmem_cache *xfs_icreate_cache; /* inode create item */ |
24 | |
25 | static inline struct xfs_icreate_item *ICR_ITEM(struct xfs_log_item *lip) |
26 | { |
27 | return container_of(lip, struct xfs_icreate_item, ic_item); |
28 | } |
29 | |
30 | /* |
31 | * This returns the number of iovecs needed to log the given inode item. |
32 | * |
33 | * We only need one iovec for the icreate log structure. |
34 | */ |
35 | STATIC void |
36 | xfs_icreate_item_size( |
37 | struct xfs_log_item *lip, |
38 | int *nvecs, |
39 | int *nbytes) |
40 | { |
41 | *nvecs += 1; |
42 | *nbytes += sizeof(struct xfs_icreate_log); |
43 | } |
44 | |
45 | /* |
46 | * This is called to fill in the vector of log iovecs for the |
47 | * given inode create log item. |
48 | */ |
49 | STATIC void |
50 | xfs_icreate_item_format( |
51 | struct xfs_log_item *lip, |
52 | struct xfs_log_vec *lv) |
53 | { |
54 | struct xfs_icreate_item *icp = ICR_ITEM(lip); |
55 | struct xfs_log_iovec *vecp = NULL; |
56 | |
57 | xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICREATE, |
58 | &icp->ic_format, |
59 | sizeof(struct xfs_icreate_log)); |
60 | } |
61 | |
62 | STATIC void |
63 | xfs_icreate_item_release( |
64 | struct xfs_log_item *lip) |
65 | { |
66 | kvfree(addr: ICR_ITEM(lip)->ic_item.li_lv_shadow); |
67 | kmem_cache_free(s: xfs_icreate_cache, objp: ICR_ITEM(lip)); |
68 | } |
69 | |
70 | static const struct xfs_item_ops xfs_icreate_item_ops = { |
71 | .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, |
72 | .iop_size = xfs_icreate_item_size, |
73 | .iop_format = xfs_icreate_item_format, |
74 | .iop_release = xfs_icreate_item_release, |
75 | }; |
76 | |
77 | |
78 | /* |
79 | * Initialize the inode log item for a newly allocated (in-core) inode. |
80 | * |
81 | * Inode extents can only reside within an AG. Hence specify the starting |
82 | * block for the inode chunk by offset within an AG as well as the |
83 | * length of the allocated extent. |
84 | * |
85 | * This joins the item to the transaction and marks it dirty so |
86 | * that we don't need a separate call to do this, nor does the |
87 | * caller need to know anything about the icreate item. |
88 | */ |
89 | void |
90 | xfs_icreate_log( |
91 | struct xfs_trans *tp, |
92 | xfs_agnumber_t agno, |
93 | xfs_agblock_t agbno, |
94 | unsigned int count, |
95 | unsigned int inode_size, |
96 | xfs_agblock_t length, |
97 | unsigned int generation) |
98 | { |
99 | struct xfs_icreate_item *icp; |
100 | |
101 | icp = kmem_cache_zalloc(k: xfs_icreate_cache, GFP_KERNEL | __GFP_NOFAIL); |
102 | |
103 | xfs_log_item_init(tp->t_mountp, &icp->ic_item, XFS_LI_ICREATE, |
104 | &xfs_icreate_item_ops); |
105 | |
106 | icp->ic_format.icl_type = XFS_LI_ICREATE; |
107 | icp->ic_format.icl_size = 1; /* single vector */ |
108 | icp->ic_format.icl_ag = cpu_to_be32(agno); |
109 | icp->ic_format.icl_agbno = cpu_to_be32(agbno); |
110 | icp->ic_format.icl_count = cpu_to_be32(count); |
111 | icp->ic_format.icl_isize = cpu_to_be32(inode_size); |
112 | icp->ic_format.icl_length = cpu_to_be32(length); |
113 | icp->ic_format.icl_gen = cpu_to_be32(generation); |
114 | |
115 | xfs_trans_add_item(tp, &icp->ic_item); |
116 | tp->t_flags |= XFS_TRANS_DIRTY; |
117 | set_bit(XFS_LI_DIRTY, addr: &icp->ic_item.li_flags); |
118 | } |
119 | |
120 | static enum xlog_recover_reorder |
121 | xlog_recover_icreate_reorder( |
122 | struct xlog_recover_item *item) |
123 | { |
124 | /* |
125 | * Inode allocation buffers must be replayed before subsequent inode |
126 | * items try to modify those buffers. ICREATE items are the logical |
127 | * equivalent of logging a newly initialized inode buffer, so recover |
128 | * these at the same time that we recover logged buffers. |
129 | */ |
130 | return XLOG_REORDER_BUFFER_LIST; |
131 | } |
132 | |
133 | /* |
134 | * This routine is called when an inode create format structure is found in a |
135 | * committed transaction in the log. It's purpose is to initialise the inodes |
136 | * being allocated on disk. This requires us to get inode cluster buffers that |
137 | * match the range to be initialised, stamped with inode templates and written |
138 | * by delayed write so that subsequent modifications will hit the cached buffer |
139 | * and only need writing out at the end of recovery. |
140 | */ |
141 | STATIC int |
142 | xlog_recover_icreate_commit_pass2( |
143 | struct xlog *log, |
144 | struct list_head *buffer_list, |
145 | struct xlog_recover_item *item, |
146 | xfs_lsn_t lsn) |
147 | { |
148 | struct xfs_mount *mp = log->l_mp; |
149 | struct xfs_icreate_log *icl; |
150 | struct xfs_ino_geometry *igeo = M_IGEO(mp); |
151 | xfs_agnumber_t agno; |
152 | xfs_agblock_t agbno; |
153 | unsigned int count; |
154 | unsigned int isize; |
155 | xfs_agblock_t length; |
156 | int bb_per_cluster; |
157 | int cancel_count; |
158 | int nbufs; |
159 | int i; |
160 | |
161 | icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; |
162 | if (icl->icl_type != XFS_LI_ICREATE) { |
163 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type" ); |
164 | return -EINVAL; |
165 | } |
166 | |
167 | if (icl->icl_size != 1) { |
168 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size" ); |
169 | return -EINVAL; |
170 | } |
171 | |
172 | agno = be32_to_cpu(icl->icl_ag); |
173 | if (agno >= mp->m_sb.sb_agcount) { |
174 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno" ); |
175 | return -EINVAL; |
176 | } |
177 | agbno = be32_to_cpu(icl->icl_agbno); |
178 | if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) { |
179 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno" ); |
180 | return -EINVAL; |
181 | } |
182 | isize = be32_to_cpu(icl->icl_isize); |
183 | if (isize != mp->m_sb.sb_inodesize) { |
184 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize" ); |
185 | return -EINVAL; |
186 | } |
187 | count = be32_to_cpu(icl->icl_count); |
188 | if (!count) { |
189 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count" ); |
190 | return -EINVAL; |
191 | } |
192 | length = be32_to_cpu(icl->icl_length); |
193 | if (!length || length >= mp->m_sb.sb_agblocks) { |
194 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length" ); |
195 | return -EINVAL; |
196 | } |
197 | |
198 | /* |
199 | * The inode chunk is either full or sparse and we only support |
200 | * m_ino_geo.ialloc_min_blks sized sparse allocations at this time. |
201 | */ |
202 | if (length != igeo->ialloc_blks && |
203 | length != igeo->ialloc_min_blks) { |
204 | xfs_warn(log->l_mp, |
205 | "%s: unsupported chunk length" , __func__); |
206 | return -EINVAL; |
207 | } |
208 | |
209 | /* verify inode count is consistent with extent length */ |
210 | if ((count >> mp->m_sb.sb_inopblog) != length) { |
211 | xfs_warn(log->l_mp, |
212 | "%s: inconsistent inode count and chunk length" , |
213 | __func__); |
214 | return -EINVAL; |
215 | } |
216 | |
217 | /* |
218 | * The icreate transaction can cover multiple cluster buffers and these |
219 | * buffers could have been freed and reused. Check the individual |
220 | * buffers for cancellation so we don't overwrite anything written after |
221 | * a cancellation. |
222 | */ |
223 | bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster); |
224 | nbufs = length / igeo->blocks_per_cluster; |
225 | for (i = 0, cancel_count = 0; i < nbufs; i++) { |
226 | xfs_daddr_t daddr; |
227 | |
228 | daddr = XFS_AGB_TO_DADDR(mp, agno, |
229 | agbno + i * igeo->blocks_per_cluster); |
230 | if (xlog_is_buffer_cancelled(log, daddr, bb_per_cluster)) |
231 | cancel_count++; |
232 | } |
233 | |
234 | /* |
235 | * We currently only use icreate for a single allocation at a time. This |
236 | * means we should expect either all or none of the buffers to be |
237 | * cancelled. Be conservative and skip replay if at least one buffer is |
238 | * cancelled, but warn the user that something is awry if the buffers |
239 | * are not consistent. |
240 | * |
241 | * XXX: This must be refined to only skip cancelled clusters once we use |
242 | * icreate for multiple chunk allocations. |
243 | */ |
244 | ASSERT(!cancel_count || cancel_count == nbufs); |
245 | if (cancel_count) { |
246 | if (cancel_count != nbufs) |
247 | xfs_warn(mp, |
248 | "WARNING: partial inode chunk cancellation, skipped icreate." ); |
249 | trace_xfs_log_recover_icreate_cancel(log, in_f: icl); |
250 | return 0; |
251 | } |
252 | |
253 | trace_xfs_log_recover_icreate_recover(log, in_f: icl); |
254 | return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, |
255 | length, be32_to_cpu(icl->icl_gen)); |
256 | } |
257 | |
258 | const struct xlog_recover_item_ops xlog_icreate_item_ops = { |
259 | .item_type = XFS_LI_ICREATE, |
260 | .reorder = xlog_recover_icreate_reorder, |
261 | .commit_pass2 = xlog_recover_icreate_commit_pass2, |
262 | }; |
263 | |