1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. |
4 | * Copyright (C) 2010 Red Hat, Inc. |
5 | * All Rights Reserved. |
6 | */ |
7 | #include "xfs.h" |
8 | #include "xfs_fs.h" |
9 | #include "xfs_shared.h" |
10 | #include "xfs_format.h" |
11 | #include "xfs_log_format.h" |
12 | #include "xfs_trans_resv.h" |
13 | #include "xfs_mount.h" |
14 | #include "xfs_da_format.h" |
15 | #include "xfs_da_btree.h" |
16 | #include "xfs_inode.h" |
17 | #include "xfs_bmap_btree.h" |
18 | #include "xfs_quota.h" |
19 | #include "xfs_trans.h" |
20 | #include "xfs_qm.h" |
21 | #include "xfs_trans_space.h" |
22 | #include "xfs_rtbitmap.h" |
23 | |
24 | #define _ALLOC true |
25 | #define _FREE false |
26 | |
27 | /* |
28 | * A buffer has a format structure overhead in the log in addition |
29 | * to the data, so we need to take this into account when reserving |
30 | * space in a transaction for a buffer. Round the space required up |
31 | * to a multiple of 128 bytes so that we don't change the historical |
32 | * reservation that has been used for this overhead. |
33 | */ |
34 | STATIC uint |
35 | xfs_buf_log_overhead(void) |
36 | { |
37 | return round_up(sizeof(struct xlog_op_header) + |
38 | sizeof(struct xfs_buf_log_format), 128); |
39 | } |
40 | |
41 | /* |
42 | * Calculate out transaction log reservation per item in bytes. |
43 | * |
44 | * The nbufs argument is used to indicate the number of items that |
45 | * will be changed in a transaction. size is used to tell how many |
46 | * bytes should be reserved per item. |
47 | */ |
48 | STATIC uint |
49 | xfs_calc_buf_res( |
50 | uint nbufs, |
51 | uint size) |
52 | { |
53 | return nbufs * (size + xfs_buf_log_overhead()); |
54 | } |
55 | |
56 | /* |
57 | * Per-extent log reservation for the btree changes involved in freeing or |
58 | * allocating an extent. In classic XFS there were two trees that will be |
59 | * modified (bnobt + cntbt). With rmap enabled, there are three trees |
60 | * (rmapbt). The number of blocks reserved is based on the formula: |
61 | * |
62 | * num trees * ((2 blocks/level * max depth) - 1) |
63 | * |
64 | * Keep in mind that max depth is calculated separately for each type of tree. |
65 | */ |
66 | uint |
67 | xfs_allocfree_block_count( |
68 | struct xfs_mount *mp, |
69 | uint num_ops) |
70 | { |
71 | uint blocks; |
72 | |
73 | blocks = num_ops * 2 * (2 * mp->m_alloc_maxlevels - 1); |
74 | if (xfs_has_rmapbt(mp)) |
75 | blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1); |
76 | |
77 | return blocks; |
78 | } |
79 | |
80 | /* |
81 | * Per-extent log reservation for refcount btree changes. These are never done |
82 | * in the same transaction as an allocation or a free, so we compute them |
83 | * separately. |
84 | */ |
85 | static unsigned int |
86 | xfs_refcountbt_block_count( |
87 | struct xfs_mount *mp, |
88 | unsigned int num_ops) |
89 | { |
90 | return num_ops * (2 * mp->m_refc_maxlevels - 1); |
91 | } |
92 | |
93 | /* |
94 | * Logging inodes is really tricksy. They are logged in memory format, |
95 | * which means that what we write into the log doesn't directly translate into |
96 | * the amount of space they use on disk. |
97 | * |
98 | * Case in point - btree format forks in memory format use more space than the |
99 | * on-disk format. In memory, the buffer contains a normal btree block header so |
100 | * the btree code can treat it as though it is just another generic buffer. |
101 | * However, when we write it to the inode fork, we don't write all of this |
102 | * header as it isn't needed. e.g. the root is only ever in the inode, so |
103 | * there's no need for sibling pointers which would waste 16 bytes of space. |
104 | * |
105 | * Hence when we have an inode with a maximally sized btree format fork, then |
106 | * amount of information we actually log is greater than the size of the inode |
107 | * on disk. Hence we need an inode reservation function that calculates all this |
108 | * correctly. So, we log: |
109 | * |
110 | * - 4 log op headers for object |
111 | * - for the ilf, the inode core and 2 forks |
112 | * - inode log format object |
113 | * - the inode core |
114 | * - two inode forks containing bmap btree root blocks. |
115 | * - the btree data contained by both forks will fit into the inode size, |
116 | * hence when combined with the inode core above, we have a total of the |
117 | * actual inode size. |
118 | * - the BMBT headers need to be accounted separately, as they are |
119 | * additional to the records and pointers that fit inside the inode |
120 | * forks. |
121 | */ |
122 | STATIC uint |
123 | xfs_calc_inode_res( |
124 | struct xfs_mount *mp, |
125 | uint ninodes) |
126 | { |
127 | return ninodes * |
128 | (4 * sizeof(struct xlog_op_header) + |
129 | sizeof(struct xfs_inode_log_format) + |
130 | mp->m_sb.sb_inodesize + |
131 | 2 * XFS_BMBT_BLOCK_LEN(mp)); |
132 | } |
133 | |
134 | /* |
135 | * Inode btree record insertion/removal modifies the inode btree and free space |
136 | * btrees (since the inobt does not use the agfl). This requires the following |
137 | * reservation: |
138 | * |
139 | * the inode btree: max depth * blocksize |
140 | * the allocation btrees: 2 trees * (max depth - 1) * block size |
141 | * |
142 | * The caller must account for SB and AG header modifications, etc. |
143 | */ |
144 | STATIC uint |
145 | xfs_calc_inobt_res( |
146 | struct xfs_mount *mp) |
147 | { |
148 | return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels, |
149 | XFS_FSB_TO_B(mp, 1)) + |
150 | xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), |
151 | XFS_FSB_TO_B(mp, 1)); |
152 | } |
153 | |
154 | /* |
155 | * The free inode btree is a conditional feature. The behavior differs slightly |
156 | * from that of the traditional inode btree in that the finobt tracks records |
157 | * for inode chunks with at least one free inode. A record can be removed from |
158 | * the tree during individual inode allocation. Therefore the finobt |
159 | * reservation is unconditional for both the inode chunk allocation and |
160 | * individual inode allocation (modify) cases. |
161 | * |
162 | * Behavior aside, the reservation for finobt modification is equivalent to the |
163 | * traditional inobt: cover a full finobt shape change plus block allocation. |
164 | */ |
165 | STATIC uint |
166 | xfs_calc_finobt_res( |
167 | struct xfs_mount *mp) |
168 | { |
169 | if (!xfs_has_finobt(mp)) |
170 | return 0; |
171 | |
172 | return xfs_calc_inobt_res(mp); |
173 | } |
174 | |
175 | /* |
176 | * Calculate the reservation required to allocate or free an inode chunk. This |
177 | * includes: |
178 | * |
179 | * the allocation btrees: 2 trees * (max depth - 1) * block size |
180 | * the inode chunk: m_ino_geo.ialloc_blks * N |
181 | * |
182 | * The size N of the inode chunk reservation depends on whether it is for |
183 | * allocation or free and which type of create transaction is in use. An inode |
184 | * chunk free always invalidates the buffers and only requires reservation for |
185 | * headers (N == 0). An inode chunk allocation requires a chunk sized |
186 | * reservation on v4 and older superblocks to initialize the chunk. No chunk |
187 | * reservation is required for allocation on v5 supers, which use ordered |
188 | * buffers to initialize. |
189 | */ |
190 | STATIC uint |
191 | xfs_calc_inode_chunk_res( |
192 | struct xfs_mount *mp, |
193 | bool alloc) |
194 | { |
195 | uint res, size = 0; |
196 | |
197 | res = xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), |
198 | XFS_FSB_TO_B(mp, 1)); |
199 | if (alloc) { |
200 | /* icreate tx uses ordered buffers */ |
201 | if (xfs_has_v3inodes(mp)) |
202 | return res; |
203 | size = XFS_FSB_TO_B(mp, 1); |
204 | } |
205 | |
206 | res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size); |
207 | return res; |
208 | } |
209 | |
210 | /* |
211 | * Per-extent log reservation for the btree changes involved in freeing or |
212 | * allocating a realtime extent. We have to be able to log as many rtbitmap |
213 | * blocks as needed to mark inuse XFS_BMBT_MAX_EXTLEN blocks' worth of realtime |
214 | * extents, as well as the realtime summary block. |
215 | */ |
216 | static unsigned int |
217 | xfs_rtalloc_block_count( |
218 | struct xfs_mount *mp, |
219 | unsigned int num_ops) |
220 | { |
221 | unsigned int rtbmp_blocks; |
222 | xfs_rtxlen_t rtxlen; |
223 | |
224 | rtxlen = xfs_extlen_to_rtxlen(mp, XFS_MAX_BMBT_EXTLEN); |
225 | rtbmp_blocks = xfs_rtbitmap_blockcount(mp, rtxlen); |
226 | return (rtbmp_blocks + 1) * num_ops; |
227 | } |
228 | |
229 | /* |
230 | * Various log reservation values. |
231 | * |
232 | * These are based on the size of the file system block because that is what |
233 | * most transactions manipulate. Each adds in an additional 128 bytes per |
234 | * item logged to try to account for the overhead of the transaction mechanism. |
235 | * |
236 | * Note: Most of the reservations underestimate the number of allocation |
237 | * groups into which they could free extents in the xfs_defer_finish() call. |
238 | * This is because the number in the worst case is quite high and quite |
239 | * unusual. In order to fix this we need to change xfs_defer_finish() to free |
240 | * extents in only a single AG at a time. This will require changes to the |
241 | * EFI code as well, however, so that the EFI for the extents not freed is |
242 | * logged again in each transaction. See SGI PV #261917. |
243 | * |
244 | * Reservation functions here avoid a huge stack in xfs_trans_init due to |
245 | * register overflow from temporaries in the calculations. |
246 | */ |
247 | |
248 | /* |
249 | * Compute the log reservation required to handle the refcount update |
250 | * transaction. Refcount updates are always done via deferred log items. |
251 | * |
252 | * This is calculated as: |
253 | * Data device refcount updates (t1): |
254 | * the agfs of the ags containing the blocks: nr_ops * sector size |
255 | * the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size |
256 | */ |
257 | static unsigned int |
258 | xfs_calc_refcountbt_reservation( |
259 | struct xfs_mount *mp, |
260 | unsigned int nr_ops) |
261 | { |
262 | unsigned int blksz = XFS_FSB_TO_B(mp, 1); |
263 | |
264 | if (!xfs_has_reflink(mp)) |
265 | return 0; |
266 | |
267 | return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) + |
268 | xfs_calc_buf_res(xfs_refcountbt_block_count(mp, num_ops: nr_ops), blksz); |
269 | } |
270 | |
271 | /* |
272 | * In a write transaction we can allocate a maximum of 2 |
273 | * extents. This gives (t1): |
274 | * the inode getting the new extents: inode size |
275 | * the inode's bmap btree: max depth * block size |
276 | * the agfs of the ags from which the extents are allocated: 2 * sector |
277 | * the superblock free block counter: sector size |
278 | * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size |
279 | * Or, if we're writing to a realtime file (t2): |
280 | * the inode getting the new extents: inode size |
281 | * the inode's bmap btree: max depth * block size |
282 | * the agfs of the ags from which the extents are allocated: 2 * sector |
283 | * the superblock free block counter: sector size |
284 | * the realtime bitmap: ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes |
285 | * the realtime summary: 1 block |
286 | * the allocation btrees: 2 trees * (2 * max depth - 1) * block size |
287 | * And the bmap_finish transaction can free bmap blocks in a join (t3): |
288 | * the agfs of the ags containing the blocks: 2 * sector size |
289 | * the agfls of the ags containing the blocks: 2 * sector size |
290 | * the super block free block counter: sector size |
291 | * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size |
292 | * And any refcount updates that happen in a separate transaction (t4). |
293 | */ |
294 | STATIC uint |
295 | xfs_calc_write_reservation( |
296 | struct xfs_mount *mp, |
297 | bool for_minlogsize) |
298 | { |
299 | unsigned int t1, t2, t3, t4; |
300 | unsigned int blksz = XFS_FSB_TO_B(mp, 1); |
301 | |
302 | t1 = xfs_calc_inode_res(mp, 1) + |
303 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) + |
304 | xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + |
305 | xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz); |
306 | |
307 | if (xfs_has_realtime(mp)) { |
308 | t2 = xfs_calc_inode_res(mp, 1) + |
309 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), |
310 | blksz) + |
311 | xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + |
312 | xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 1), blksz) + |
313 | xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), blksz); |
314 | } else { |
315 | t2 = 0; |
316 | } |
317 | |
318 | t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + |
319 | xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz); |
320 | |
321 | /* |
322 | * In the early days of reflink, we included enough reservation to log |
323 | * two refcountbt splits for each transaction. The codebase runs |
324 | * refcountbt updates in separate transactions now, so to compute the |
325 | * minimum log size, add the refcountbtree splits back to t1 and t3 and |
326 | * do not account them separately as t4. Reflink did not support |
327 | * realtime when the reservations were established, so no adjustment to |
328 | * t2 is needed. |
329 | */ |
330 | if (for_minlogsize) { |
331 | unsigned int adj = 0; |
332 | |
333 | if (xfs_has_reflink(mp)) |
334 | adj = xfs_calc_buf_res( |
335 | xfs_refcountbt_block_count(mp, 2), |
336 | blksz); |
337 | t1 += adj; |
338 | t3 += adj; |
339 | return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); |
340 | } |
341 | |
342 | t4 = xfs_calc_refcountbt_reservation(mp, 1); |
343 | return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3)); |
344 | } |
345 | |
346 | unsigned int |
347 | xfs_calc_write_reservation_minlogsize( |
348 | struct xfs_mount *mp) |
349 | { |
350 | return xfs_calc_write_reservation(mp, true); |
351 | } |
352 | |
353 | /* |
354 | * In truncating a file we free up to two extents at once. We can modify (t1): |
355 | * the inode being truncated: inode size |
356 | * the inode's bmap btree: (max depth + 1) * block size |
357 | * And the bmap_finish transaction can free the blocks and bmap blocks (t2): |
358 | * the agf for each of the ags: 4 * sector size |
359 | * the agfl for each of the ags: 4 * sector size |
360 | * the super block to reflect the freed blocks: sector size |
361 | * worst case split in allocation btrees per extent assuming 4 extents: |
362 | * 4 exts * 2 trees * (2 * max depth - 1) * block size |
363 | * Or, if it's a realtime file (t3): |
364 | * the agf for each of the ags: 2 * sector size |
365 | * the agfl for each of the ags: 2 * sector size |
366 | * the super block to reflect the freed blocks: sector size |
367 | * the realtime bitmap: |
368 | * 2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes |
369 | * the realtime summary: 2 exts * 1 block |
370 | * worst case split in allocation btrees per extent assuming 2 extents: |
371 | * 2 exts * 2 trees * (2 * max depth - 1) * block size |
372 | * And any refcount updates that happen in a separate transaction (t4). |
373 | */ |
374 | STATIC uint |
375 | xfs_calc_itruncate_reservation( |
376 | struct xfs_mount *mp, |
377 | bool for_minlogsize) |
378 | { |
379 | unsigned int t1, t2, t3, t4; |
380 | unsigned int blksz = XFS_FSB_TO_B(mp, 1); |
381 | |
382 | t1 = xfs_calc_inode_res(mp, 1) + |
383 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz); |
384 | |
385 | t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + |
386 | xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4), blksz); |
387 | |
388 | if (xfs_has_realtime(mp)) { |
389 | t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + |
390 | xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 2), blksz) + |
391 | xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz); |
392 | } else { |
393 | t3 = 0; |
394 | } |
395 | |
396 | /* |
397 | * In the early days of reflink, we included enough reservation to log |
398 | * four refcountbt splits in the same transaction as bnobt/cntbt |
399 | * updates. The codebase runs refcountbt updates in separate |
400 | * transactions now, so to compute the minimum log size, add the |
401 | * refcount btree splits back here and do not compute them separately |
402 | * as t4. Reflink did not support realtime when the reservations were |
403 | * established, so do not adjust t3. |
404 | */ |
405 | if (for_minlogsize) { |
406 | if (xfs_has_reflink(mp)) |
407 | t2 += xfs_calc_buf_res( |
408 | xfs_refcountbt_block_count(mp, 4), |
409 | blksz); |
410 | |
411 | return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); |
412 | } |
413 | |
414 | t4 = xfs_calc_refcountbt_reservation(mp, 2); |
415 | return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3)); |
416 | } |
417 | |
418 | unsigned int |
419 | xfs_calc_itruncate_reservation_minlogsize( |
420 | struct xfs_mount *mp) |
421 | { |
422 | return xfs_calc_itruncate_reservation(mp, true); |
423 | } |
424 | |
425 | /* |
426 | * In renaming a files we can modify: |
427 | * the five inodes involved: 5 * inode size |
428 | * the two directory btrees: 2 * (max depth + v2) * dir block size |
429 | * the two directory bmap btrees: 2 * max depth * block size |
430 | * And the bmap_finish transaction can free dir and bmap blocks (two sets |
431 | * of bmap blocks) giving: |
432 | * the agf for the ags in which the blocks live: 3 * sector size |
433 | * the agfl for the ags in which the blocks live: 3 * sector size |
434 | * the superblock for the free block count: sector size |
435 | * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size |
436 | */ |
437 | STATIC uint |
438 | xfs_calc_rename_reservation( |
439 | struct xfs_mount *mp) |
440 | { |
441 | return XFS_DQUOT_LOGRES(mp) + |
442 | max((xfs_calc_inode_res(mp, 5) + |
443 | xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), |
444 | XFS_FSB_TO_B(mp, 1))), |
445 | (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) + |
446 | xfs_calc_buf_res(xfs_allocfree_block_count(mp, 3), |
447 | XFS_FSB_TO_B(mp, 1)))); |
448 | } |
449 | |
450 | /* |
451 | * For removing an inode from unlinked list at first, we can modify: |
452 | * the agi hash list and counters: sector size |
453 | * the on disk inode before ours in the agi hash list: inode cluster size |
454 | * the on disk inode in the agi hash list: inode cluster size |
455 | */ |
456 | STATIC uint |
457 | xfs_calc_iunlink_remove_reservation( |
458 | struct xfs_mount *mp) |
459 | { |
460 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + |
461 | 2 * M_IGEO(mp)->inode_cluster_size; |
462 | } |
463 | |
464 | /* |
465 | * For creating a link to an inode: |
466 | * the parent directory inode: inode size |
467 | * the linked inode: inode size |
468 | * the directory btree could split: (max depth + v2) * dir block size |
469 | * the directory bmap btree could join or split: (max depth + v2) * blocksize |
470 | * And the bmap_finish transaction can free some bmap blocks giving: |
471 | * the agf for the ag in which the blocks live: sector size |
472 | * the agfl for the ag in which the blocks live: sector size |
473 | * the superblock for the free block count: sector size |
474 | * the allocation btrees: 2 trees * (2 * max depth - 1) * block size |
475 | */ |
476 | STATIC uint |
477 | xfs_calc_link_reservation( |
478 | struct xfs_mount *mp) |
479 | { |
480 | return XFS_DQUOT_LOGRES(mp) + |
481 | xfs_calc_iunlink_remove_reservation(mp) + |
482 | max((xfs_calc_inode_res(mp, 2) + |
483 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), |
484 | XFS_FSB_TO_B(mp, 1))), |
485 | (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + |
486 | xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), |
487 | XFS_FSB_TO_B(mp, 1)))); |
488 | } |
489 | |
490 | /* |
491 | * For adding an inode to unlinked list we can modify: |
492 | * the agi hash list: sector size |
493 | * the on disk inode: inode cluster size |
494 | */ |
495 | STATIC uint |
496 | xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) |
497 | { |
498 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + |
499 | M_IGEO(mp)->inode_cluster_size; |
500 | } |
501 | |
502 | /* |
503 | * For removing a directory entry we can modify: |
504 | * the parent directory inode: inode size |
505 | * the removed inode: inode size |
506 | * the directory btree could join: (max depth + v2) * dir block size |
507 | * the directory bmap btree could join or split: (max depth + v2) * blocksize |
508 | * And the bmap_finish transaction can free the dir and bmap blocks giving: |
509 | * the agf for the ag in which the blocks live: 2 * sector size |
510 | * the agfl for the ag in which the blocks live: 2 * sector size |
511 | * the superblock for the free block count: sector size |
512 | * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size |
513 | */ |
514 | STATIC uint |
515 | xfs_calc_remove_reservation( |
516 | struct xfs_mount *mp) |
517 | { |
518 | return XFS_DQUOT_LOGRES(mp) + |
519 | xfs_calc_iunlink_add_reservation(mp) + |
520 | max((xfs_calc_inode_res(mp, 2) + |
521 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), |
522 | XFS_FSB_TO_B(mp, 1))), |
523 | (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) + |
524 | xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), |
525 | XFS_FSB_TO_B(mp, 1)))); |
526 | } |
527 | |
528 | /* |
529 | * For create, break it in to the two cases that the transaction |
530 | * covers. We start with the modify case - allocation done by modification |
531 | * of the state of existing inodes - and the allocation case. |
532 | */ |
533 | |
534 | /* |
535 | * For create we can modify: |
536 | * the parent directory inode: inode size |
537 | * the new inode: inode size |
538 | * the inode btree entry: block size |
539 | * the superblock for the nlink flag: sector size |
540 | * the directory btree: (max depth + v2) * dir block size |
541 | * the directory inode's bmap btree: (max depth + v2) * block size |
542 | * the finobt (record modification and allocation btrees) |
543 | */ |
544 | STATIC uint |
545 | xfs_calc_create_resv_modify( |
546 | struct xfs_mount *mp) |
547 | { |
548 | return xfs_calc_inode_res(mp, 2) + |
549 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + |
550 | (uint)XFS_FSB_TO_B(mp, 1) + |
551 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) + |
552 | xfs_calc_finobt_res(mp); |
553 | } |
554 | |
555 | /* |
556 | * For icreate we can allocate some inodes giving: |
557 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize |
558 | * the superblock for the nlink flag: sector size |
559 | * the inode chunk (allocation, optional init) |
560 | * the inobt (record insertion) |
561 | * the finobt (optional, record insertion) |
562 | */ |
563 | STATIC uint |
564 | xfs_calc_icreate_resv_alloc( |
565 | struct xfs_mount *mp) |
566 | { |
567 | return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + |
568 | mp->m_sb.sb_sectsize + |
569 | xfs_calc_inode_chunk_res(mp, _ALLOC) + |
570 | xfs_calc_inobt_res(mp) + |
571 | xfs_calc_finobt_res(mp); |
572 | } |
573 | |
574 | STATIC uint |
575 | xfs_calc_icreate_reservation(xfs_mount_t *mp) |
576 | { |
577 | return XFS_DQUOT_LOGRES(mp) + |
578 | max(xfs_calc_icreate_resv_alloc(mp), |
579 | xfs_calc_create_resv_modify(mp)); |
580 | } |
581 | |
582 | STATIC uint |
583 | xfs_calc_create_tmpfile_reservation( |
584 | struct xfs_mount *mp) |
585 | { |
586 | uint res = XFS_DQUOT_LOGRES(mp); |
587 | |
588 | res += xfs_calc_icreate_resv_alloc(mp); |
589 | return res + xfs_calc_iunlink_add_reservation(mp); |
590 | } |
591 | |
592 | /* |
593 | * Making a new directory is the same as creating a new file. |
594 | */ |
595 | STATIC uint |
596 | xfs_calc_mkdir_reservation( |
597 | struct xfs_mount *mp) |
598 | { |
599 | return xfs_calc_icreate_reservation(mp); |
600 | } |
601 | |
602 | |
603 | /* |
604 | * Making a new symplink is the same as creating a new file, but |
605 | * with the added blocks for remote symlink data which can be up to 1kB in |
606 | * length (XFS_SYMLINK_MAXLEN). |
607 | */ |
608 | STATIC uint |
609 | xfs_calc_symlink_reservation( |
610 | struct xfs_mount *mp) |
611 | { |
612 | return xfs_calc_icreate_reservation(mp) + |
613 | xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN); |
614 | } |
615 | |
616 | /* |
617 | * In freeing an inode we can modify: |
618 | * the inode being freed: inode size |
619 | * the super block free inode counter, AGF and AGFL: sector size |
620 | * the on disk inode (agi unlinked list removal) |
621 | * the inode chunk (invalidated, headers only) |
622 | * the inode btree |
623 | * the finobt (record insertion, removal or modification) |
624 | * |
625 | * Note that the inode chunk res. includes an allocfree res. for freeing of the |
626 | * inode chunk. This is technically extraneous because the inode chunk free is |
627 | * deferred (it occurs after a transaction roll). Include the extra reservation |
628 | * anyways since we've had reports of ifree transaction overruns due to too many |
629 | * agfl fixups during inode chunk frees. |
630 | */ |
631 | STATIC uint |
632 | xfs_calc_ifree_reservation( |
633 | struct xfs_mount *mp) |
634 | { |
635 | return XFS_DQUOT_LOGRES(mp) + |
636 | xfs_calc_inode_res(mp, 1) + |
637 | xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + |
638 | xfs_calc_iunlink_remove_reservation(mp) + |
639 | xfs_calc_inode_chunk_res(mp, _FREE) + |
640 | xfs_calc_inobt_res(mp) + |
641 | xfs_calc_finobt_res(mp); |
642 | } |
643 | |
644 | /* |
645 | * When only changing the inode we log the inode and possibly the superblock |
646 | * We also add a bit of slop for the transaction stuff. |
647 | */ |
648 | STATIC uint |
649 | xfs_calc_ichange_reservation( |
650 | struct xfs_mount *mp) |
651 | { |
652 | return XFS_DQUOT_LOGRES(mp) + |
653 | xfs_calc_inode_res(mp, 1) + |
654 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); |
655 | |
656 | } |
657 | |
658 | /* |
659 | * Growing the data section of the filesystem. |
660 | * superblock |
661 | * agi and agf |
662 | * allocation btrees |
663 | */ |
664 | STATIC uint |
665 | xfs_calc_growdata_reservation( |
666 | struct xfs_mount *mp) |
667 | { |
668 | return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + |
669 | xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), |
670 | XFS_FSB_TO_B(mp, 1)); |
671 | } |
672 | |
673 | /* |
674 | * Growing the rt section of the filesystem. |
675 | * In the first set of transactions (ALLOC) we allocate space to the |
676 | * bitmap or summary files. |
677 | * superblock: sector size |
678 | * agf of the ag from which the extent is allocated: sector size |
679 | * bmap btree for bitmap/summary inode: max depth * blocksize |
680 | * bitmap/summary inode: inode size |
681 | * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize |
682 | */ |
683 | STATIC uint |
684 | xfs_calc_growrtalloc_reservation( |
685 | struct xfs_mount *mp) |
686 | { |
687 | return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + |
688 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), |
689 | XFS_FSB_TO_B(mp, 1)) + |
690 | xfs_calc_inode_res(mp, 1) + |
691 | xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), |
692 | XFS_FSB_TO_B(mp, 1)); |
693 | } |
694 | |
695 | /* |
696 | * Growing the rt section of the filesystem. |
697 | * In the second set of transactions (ZERO) we zero the new metadata blocks. |
698 | * one bitmap/summary block: blocksize |
699 | */ |
700 | STATIC uint |
701 | xfs_calc_growrtzero_reservation( |
702 | struct xfs_mount *mp) |
703 | { |
704 | return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize); |
705 | } |
706 | |
707 | /* |
708 | * Growing the rt section of the filesystem. |
709 | * In the third set of transactions (FREE) we update metadata without |
710 | * allocating any new blocks. |
711 | * superblock: sector size |
712 | * bitmap inode: inode size |
713 | * summary inode: inode size |
714 | * one bitmap block: blocksize |
715 | * summary blocks: new summary size |
716 | */ |
717 | STATIC uint |
718 | xfs_calc_growrtfree_reservation( |
719 | struct xfs_mount *mp) |
720 | { |
721 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + |
722 | xfs_calc_inode_res(mp, 2) + |
723 | xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) + |
724 | xfs_calc_buf_res(1, mp->m_rsumsize); |
725 | } |
726 | |
727 | /* |
728 | * Logging the inode modification timestamp on a synchronous write. |
729 | * inode |
730 | */ |
731 | STATIC uint |
732 | xfs_calc_swrite_reservation( |
733 | struct xfs_mount *mp) |
734 | { |
735 | return xfs_calc_inode_res(mp, 1); |
736 | } |
737 | |
738 | /* |
739 | * Logging the inode mode bits when writing a setuid/setgid file |
740 | * inode |
741 | */ |
742 | STATIC uint |
743 | xfs_calc_writeid_reservation( |
744 | struct xfs_mount *mp) |
745 | { |
746 | return xfs_calc_inode_res(mp, 1); |
747 | } |
748 | |
749 | /* |
750 | * Converting the inode from non-attributed to attributed. |
751 | * the inode being converted: inode size |
752 | * agf block and superblock (for block allocation) |
753 | * the new block (directory sized) |
754 | * bmap blocks for the new directory block |
755 | * allocation btrees |
756 | */ |
757 | STATIC uint |
758 | xfs_calc_addafork_reservation( |
759 | struct xfs_mount *mp) |
760 | { |
761 | return XFS_DQUOT_LOGRES(mp) + |
762 | xfs_calc_inode_res(mp, 1) + |
763 | xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + |
764 | xfs_calc_buf_res(1, mp->m_dir_geo->blksize) + |
765 | xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1, |
766 | XFS_FSB_TO_B(mp, 1)) + |
767 | xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), |
768 | XFS_FSB_TO_B(mp, 1)); |
769 | } |
770 | |
771 | /* |
772 | * Removing the attribute fork of a file |
773 | * the inode being truncated: inode size |
774 | * the inode's bmap btree: max depth * block size |
775 | * And the bmap_finish transaction can free the blocks and bmap blocks: |
776 | * the agf for each of the ags: 4 * sector size |
777 | * the agfl for each of the ags: 4 * sector size |
778 | * the super block to reflect the freed blocks: sector size |
779 | * worst case split in allocation btrees per extent assuming 4 extents: |
780 | * 4 exts * 2 trees * (2 * max depth - 1) * block size |
781 | */ |
782 | STATIC uint |
783 | xfs_calc_attrinval_reservation( |
784 | struct xfs_mount *mp) |
785 | { |
786 | return max((xfs_calc_inode_res(mp, 1) + |
787 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), |
788 | XFS_FSB_TO_B(mp, 1))), |
789 | (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + |
790 | xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4), |
791 | XFS_FSB_TO_B(mp, 1)))); |
792 | } |
793 | |
794 | /* |
795 | * Setting an attribute at mount time. |
796 | * the inode getting the attribute |
797 | * the superblock for allocations |
798 | * the agfs extents are allocated from |
799 | * the attribute btree * max depth |
800 | * the inode allocation btree |
801 | * Since attribute transaction space is dependent on the size of the attribute, |
802 | * the calculation is done partially at mount time and partially at runtime(see |
803 | * below). |
804 | */ |
805 | STATIC uint |
806 | xfs_calc_attrsetm_reservation( |
807 | struct xfs_mount *mp) |
808 | { |
809 | return XFS_DQUOT_LOGRES(mp) + |
810 | xfs_calc_inode_res(mp, 1) + |
811 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + |
812 | xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)); |
813 | } |
814 | |
815 | /* |
816 | * Setting an attribute at runtime, transaction space unit per block. |
817 | * the superblock for allocations: sector size |
818 | * the inode bmap btree could join or split: max depth * block size |
819 | * Since the runtime attribute transaction space is dependent on the total |
820 | * blocks needed for the 1st bmap, here we calculate out the space unit for |
821 | * one block so that the caller could figure out the total space according |
822 | * to the attibute extent length in blocks by: |
823 | * ext * M_RES(mp)->tr_attrsetrt.tr_logres |
824 | */ |
825 | STATIC uint |
826 | xfs_calc_attrsetrt_reservation( |
827 | struct xfs_mount *mp) |
828 | { |
829 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + |
830 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), |
831 | XFS_FSB_TO_B(mp, 1)); |
832 | } |
833 | |
834 | /* |
835 | * Removing an attribute. |
836 | * the inode: inode size |
837 | * the attribute btree could join: max depth * block size |
838 | * the inode bmap btree could join or split: max depth * block size |
839 | * And the bmap_finish transaction can free the attr blocks freed giving: |
840 | * the agf for the ag in which the blocks live: 2 * sector size |
841 | * the agfl for the ag in which the blocks live: 2 * sector size |
842 | * the superblock for the free block count: sector size |
843 | * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size |
844 | */ |
845 | STATIC uint |
846 | xfs_calc_attrrm_reservation( |
847 | struct xfs_mount *mp) |
848 | { |
849 | return XFS_DQUOT_LOGRES(mp) + |
850 | max((xfs_calc_inode_res(mp, 1) + |
851 | xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, |
852 | XFS_FSB_TO_B(mp, 1)) + |
853 | (uint)XFS_FSB_TO_B(mp, |
854 | XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + |
855 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)), |
856 | (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + |
857 | xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), |
858 | XFS_FSB_TO_B(mp, 1)))); |
859 | } |
860 | |
861 | /* |
862 | * Clearing a bad agino number in an agi hash bucket. |
863 | */ |
864 | STATIC uint |
865 | xfs_calc_clear_agi_bucket_reservation( |
866 | struct xfs_mount *mp) |
867 | { |
868 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); |
869 | } |
870 | |
871 | /* |
872 | * Adjusting quota limits. |
873 | * the disk quota buffer: sizeof(struct xfs_disk_dquot) |
874 | */ |
875 | STATIC uint |
876 | xfs_calc_qm_setqlim_reservation(void) |
877 | { |
878 | return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot)); |
879 | } |
880 | |
881 | /* |
882 | * Allocating quota on disk if needed. |
883 | * the write transaction log space for quota file extent allocation |
884 | * the unit of quota allocation: one system block size |
885 | */ |
886 | STATIC uint |
887 | xfs_calc_qm_dqalloc_reservation( |
888 | struct xfs_mount *mp, |
889 | bool for_minlogsize) |
890 | { |
891 | return xfs_calc_write_reservation(mp, for_minlogsize) + |
892 | xfs_calc_buf_res(1, |
893 | XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1); |
894 | } |
895 | |
896 | unsigned int |
897 | xfs_calc_qm_dqalloc_reservation_minlogsize( |
898 | struct xfs_mount *mp) |
899 | { |
900 | return xfs_calc_qm_dqalloc_reservation(mp, true); |
901 | } |
902 | |
903 | /* |
904 | * Syncing the incore super block changes to disk. |
905 | * the super block to reflect the changes: sector size |
906 | */ |
907 | STATIC uint |
908 | xfs_calc_sb_reservation( |
909 | struct xfs_mount *mp) |
910 | { |
911 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); |
912 | } |
913 | |
914 | void |
915 | xfs_trans_resv_calc( |
916 | struct xfs_mount *mp, |
917 | struct xfs_trans_resv *resp) |
918 | { |
919 | int logcount_adj = 0; |
920 | |
921 | /* |
922 | * The following transactions are logged in physical format and |
923 | * require a permanent reservation on space. |
924 | */ |
925 | resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false); |
926 | resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT; |
927 | resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
928 | |
929 | resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false); |
930 | resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT; |
931 | resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
932 | |
933 | resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp); |
934 | resp->tr_rename.tr_logcount = XFS_RENAME_LOG_COUNT; |
935 | resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
936 | |
937 | resp->tr_link.tr_logres = xfs_calc_link_reservation(mp); |
938 | resp->tr_link.tr_logcount = XFS_LINK_LOG_COUNT; |
939 | resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
940 | |
941 | resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp); |
942 | resp->tr_remove.tr_logcount = XFS_REMOVE_LOG_COUNT; |
943 | resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
944 | |
945 | resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp); |
946 | resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT; |
947 | resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
948 | |
949 | resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp); |
950 | resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; |
951 | resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
952 | |
953 | resp->tr_create_tmpfile.tr_logres = |
954 | xfs_calc_create_tmpfile_reservation(mp); |
955 | resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT; |
956 | resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
957 | |
958 | resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp); |
959 | resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT; |
960 | resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
961 | |
962 | resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp); |
963 | resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT; |
964 | resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
965 | |
966 | resp->tr_addafork.tr_logres = xfs_calc_addafork_reservation(mp); |
967 | resp->tr_addafork.tr_logcount = XFS_ADDAFORK_LOG_COUNT; |
968 | resp->tr_addafork.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
969 | |
970 | resp->tr_attrinval.tr_logres = xfs_calc_attrinval_reservation(mp); |
971 | resp->tr_attrinval.tr_logcount = XFS_ATTRINVAL_LOG_COUNT; |
972 | resp->tr_attrinval.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
973 | |
974 | resp->tr_attrsetm.tr_logres = xfs_calc_attrsetm_reservation(mp); |
975 | resp->tr_attrsetm.tr_logcount = XFS_ATTRSET_LOG_COUNT; |
976 | resp->tr_attrsetm.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
977 | |
978 | resp->tr_attrrm.tr_logres = xfs_calc_attrrm_reservation(mp); |
979 | resp->tr_attrrm.tr_logcount = XFS_ATTRRM_LOG_COUNT; |
980 | resp->tr_attrrm.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
981 | |
982 | resp->tr_growrtalloc.tr_logres = xfs_calc_growrtalloc_reservation(mp); |
983 | resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT; |
984 | resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
985 | |
986 | resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp, |
987 | false); |
988 | resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT; |
989 | resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
990 | |
991 | /* |
992 | * The following transactions are logged in logical format with |
993 | * a default log count. |
994 | */ |
995 | resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(); |
996 | resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT; |
997 | |
998 | resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp); |
999 | resp->tr_sb.tr_logcount = XFS_DEFAULT_LOG_COUNT; |
1000 | |
1001 | /* growdata requires permanent res; it can free space to the last AG */ |
1002 | resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp); |
1003 | resp->tr_growdata.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT; |
1004 | resp->tr_growdata.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
1005 | |
1006 | /* The following transaction are logged in logical format */ |
1007 | resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp); |
1008 | resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp); |
1009 | resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp); |
1010 | resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp); |
1011 | resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp); |
1012 | resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp); |
1013 | resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp); |
1014 | |
1015 | /* |
1016 | * Add one logcount for BUI items that appear with rmap or reflink, |
1017 | * one logcount for refcount intent items, and one logcount for rmap |
1018 | * intent items. |
1019 | */ |
1020 | if (xfs_has_reflink(mp) || xfs_has_rmapbt(mp)) |
1021 | logcount_adj++; |
1022 | if (xfs_has_reflink(mp)) |
1023 | logcount_adj++; |
1024 | if (xfs_has_rmapbt(mp)) |
1025 | logcount_adj++; |
1026 | |
1027 | resp->tr_itruncate.tr_logcount += logcount_adj; |
1028 | resp->tr_write.tr_logcount += logcount_adj; |
1029 | resp->tr_qm_dqalloc.tr_logcount += logcount_adj; |
1030 | } |
1031 | |