PageRenderTime 76ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 1ms

/fs/ocfs2/alloc.c

https://gitlab.com/LiquidSmooth-Devices/android_kernel_htc_msm8974
C | 6079 lines | 4928 code | 1109 blank | 42 comment | 781 complexity | 34cfb83c6db173800841d0da37a34953 MD5 | raw file
Possible License(s): GPL-2.0
  1. /* -*- mode: c; c-basic-offset: 8; -*-
  2. * vim: noexpandtab sw=8 ts=8 sts=0:
  3. *
  4. * alloc.c
  5. *
  6. * Extent allocs and frees
  7. *
  8. * Copyright (C) 2002, 2004 Oracle. All rights reserved.
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2 of the License, or (at your option) any later version.
  14. *
  15. * This program is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public
  21. * License along with this program; if not, write to the
  22. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23. * Boston, MA 021110-1307, USA.
  24. */
  25. #include <linux/fs.h>
  26. #include <linux/types.h>
  27. #include <linux/slab.h>
  28. #include <linux/highmem.h>
  29. #include <linux/swap.h>
  30. #include <linux/quotaops.h>
  31. #include <linux/blkdev.h>
  32. #include <cluster/masklog.h>
  33. #include "ocfs2.h"
  34. #include "alloc.h"
  35. #include "aops.h"
  36. #include "blockcheck.h"
  37. #include "dlmglue.h"
  38. #include "extent_map.h"
  39. #include "inode.h"
  40. #include "journal.h"
  41. #include "localalloc.h"
  42. #include "suballoc.h"
  43. #include "sysfile.h"
  44. #include "file.h"
  45. #include "super.h"
  46. #include "uptodate.h"
  47. #include "xattr.h"
  48. #include "refcounttree.h"
  49. #include "ocfs2_trace.h"
  50. #include "buffer_head_io.h"
  51. enum ocfs2_contig_type {
  52. CONTIG_NONE = 0,
  53. CONTIG_LEFT,
  54. CONTIG_RIGHT,
  55. CONTIG_LEFTRIGHT,
  56. };
  57. static enum ocfs2_contig_type
  58. ocfs2_extent_rec_contig(struct super_block *sb,
  59. struct ocfs2_extent_rec *ext,
  60. struct ocfs2_extent_rec *insert_rec);
  61. struct ocfs2_extent_tree_operations {
  62. void (*eo_set_last_eb_blk)(struct ocfs2_extent_tree *et,
  63. u64 blkno);
  64. u64 (*eo_get_last_eb_blk)(struct ocfs2_extent_tree *et);
  65. void (*eo_update_clusters)(struct ocfs2_extent_tree *et,
  66. u32 new_clusters);
  67. void (*eo_extent_map_insert)(struct ocfs2_extent_tree *et,
  68. struct ocfs2_extent_rec *rec);
  69. void (*eo_extent_map_truncate)(struct ocfs2_extent_tree *et,
  70. u32 clusters);
  71. int (*eo_insert_check)(struct ocfs2_extent_tree *et,
  72. struct ocfs2_extent_rec *rec);
  73. int (*eo_sanity_check)(struct ocfs2_extent_tree *et);
  74. void (*eo_fill_root_el)(struct ocfs2_extent_tree *et);
  75. void (*eo_fill_max_leaf_clusters)(struct ocfs2_extent_tree *et);
  76. enum ocfs2_contig_type
  77. (*eo_extent_contig)(struct ocfs2_extent_tree *et,
  78. struct ocfs2_extent_rec *ext,
  79. struct ocfs2_extent_rec *insert_rec);
  80. };
  81. static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et);
  82. static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
  83. u64 blkno);
  84. static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
  85. u32 clusters);
  86. static void ocfs2_dinode_extent_map_insert(struct ocfs2_extent_tree *et,
  87. struct ocfs2_extent_rec *rec);
  88. static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
  89. u32 clusters);
  90. static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
  91. struct ocfs2_extent_rec *rec);
  92. static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et);
  93. static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et);
  94. static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
  95. .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk,
  96. .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk,
  97. .eo_update_clusters = ocfs2_dinode_update_clusters,
  98. .eo_extent_map_insert = ocfs2_dinode_extent_map_insert,
  99. .eo_extent_map_truncate = ocfs2_dinode_extent_map_truncate,
  100. .eo_insert_check = ocfs2_dinode_insert_check,
  101. .eo_sanity_check = ocfs2_dinode_sanity_check,
  102. .eo_fill_root_el = ocfs2_dinode_fill_root_el,
  103. };
  104. static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
  105. u64 blkno)
  106. {
  107. struct ocfs2_dinode *di = et->et_object;
  108. BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
  109. di->i_last_eb_blk = cpu_to_le64(blkno);
  110. }
  111. static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et)
  112. {
  113. struct ocfs2_dinode *di = et->et_object;
  114. BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
  115. return le64_to_cpu(di->i_last_eb_blk);
  116. }
  117. static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
  118. u32 clusters)
  119. {
  120. struct ocfs2_inode_info *oi = cache_info_to_inode(et->et_ci);
  121. struct ocfs2_dinode *di = et->et_object;
  122. le32_add_cpu(&di->i_clusters, clusters);
  123. spin_lock(&oi->ip_lock);
  124. oi->ip_clusters = le32_to_cpu(di->i_clusters);
  125. spin_unlock(&oi->ip_lock);
  126. }
  127. static void ocfs2_dinode_extent_map_insert(struct ocfs2_extent_tree *et,
  128. struct ocfs2_extent_rec *rec)
  129. {
  130. struct inode *inode = &cache_info_to_inode(et->et_ci)->vfs_inode;
  131. ocfs2_extent_map_insert_rec(inode, rec);
  132. }
  133. static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
  134. u32 clusters)
  135. {
  136. struct inode *inode = &cache_info_to_inode(et->et_ci)->vfs_inode;
  137. ocfs2_extent_map_trunc(inode, clusters);
  138. }
  139. static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
  140. struct ocfs2_extent_rec *rec)
  141. {
  142. struct ocfs2_inode_info *oi = cache_info_to_inode(et->et_ci);
  143. struct ocfs2_super *osb = OCFS2_SB(oi->vfs_inode.i_sb);
  144. BUG_ON(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL);
  145. mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
  146. (oi->ip_clusters != le32_to_cpu(rec->e_cpos)),
  147. "Device %s, asking for sparse allocation: inode %llu, "
  148. "cpos %u, clusters %u\n",
  149. osb->dev_str,
  150. (unsigned long long)oi->ip_blkno,
  151. rec->e_cpos, oi->ip_clusters);
  152. return 0;
  153. }
  154. static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et)
  155. {
  156. struct ocfs2_dinode *di = et->et_object;
  157. BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
  158. BUG_ON(!OCFS2_IS_VALID_DINODE(di));
  159. return 0;
  160. }
  161. static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et)
  162. {
  163. struct ocfs2_dinode *di = et->et_object;
  164. et->et_root_el = &di->id2.i_list;
  165. }
  166. static void ocfs2_xattr_value_fill_root_el(struct ocfs2_extent_tree *et)
  167. {
  168. struct ocfs2_xattr_value_buf *vb = et->et_object;
  169. et->et_root_el = &vb->vb_xv->xr_list;
  170. }
  171. static void ocfs2_xattr_value_set_last_eb_blk(struct ocfs2_extent_tree *et,
  172. u64 blkno)
  173. {
  174. struct ocfs2_xattr_value_buf *vb = et->et_object;
  175. vb->vb_xv->xr_last_eb_blk = cpu_to_le64(blkno);
  176. }
  177. static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et)
  178. {
  179. struct ocfs2_xattr_value_buf *vb = et->et_object;
  180. return le64_to_cpu(vb->vb_xv->xr_last_eb_blk);
  181. }
  182. static void ocfs2_xattr_value_update_clusters(struct ocfs2_extent_tree *et,
  183. u32 clusters)
  184. {
  185. struct ocfs2_xattr_value_buf *vb = et->et_object;
  186. le32_add_cpu(&vb->vb_xv->xr_clusters, clusters);
  187. }
  188. static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = {
  189. .eo_set_last_eb_blk = ocfs2_xattr_value_set_last_eb_blk,
  190. .eo_get_last_eb_blk = ocfs2_xattr_value_get_last_eb_blk,
  191. .eo_update_clusters = ocfs2_xattr_value_update_clusters,
  192. .eo_fill_root_el = ocfs2_xattr_value_fill_root_el,
  193. };
  194. static void ocfs2_xattr_tree_fill_root_el(struct ocfs2_extent_tree *et)
  195. {
  196. struct ocfs2_xattr_block *xb = et->et_object;
  197. et->et_root_el = &xb->xb_attrs.xb_root.xt_list;
  198. }
  199. static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct ocfs2_extent_tree *et)
  200. {
  201. struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
  202. et->et_max_leaf_clusters =
  203. ocfs2_clusters_for_bytes(sb, OCFS2_MAX_XATTR_TREE_LEAF_SIZE);
  204. }
  205. static void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
  206. u64 blkno)
  207. {
  208. struct ocfs2_xattr_block *xb = et->et_object;
  209. struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
  210. xt->xt_last_eb_blk = cpu_to_le64(blkno);
  211. }
  212. static u64 ocfs2_xattr_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
  213. {
  214. struct ocfs2_xattr_block *xb = et->et_object;
  215. struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
  216. return le64_to_cpu(xt->xt_last_eb_blk);
  217. }
  218. static void ocfs2_xattr_tree_update_clusters(struct ocfs2_extent_tree *et,
  219. u32 clusters)
  220. {
  221. struct ocfs2_xattr_block *xb = et->et_object;
  222. le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, clusters);
  223. }
  224. static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
  225. .eo_set_last_eb_blk = ocfs2_xattr_tree_set_last_eb_blk,
  226. .eo_get_last_eb_blk = ocfs2_xattr_tree_get_last_eb_blk,
  227. .eo_update_clusters = ocfs2_xattr_tree_update_clusters,
  228. .eo_fill_root_el = ocfs2_xattr_tree_fill_root_el,
  229. .eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters,
  230. };
  231. static void ocfs2_dx_root_set_last_eb_blk(struct ocfs2_extent_tree *et,
  232. u64 blkno)
  233. {
  234. struct ocfs2_dx_root_block *dx_root = et->et_object;
  235. dx_root->dr_last_eb_blk = cpu_to_le64(blkno);
  236. }
  237. static u64 ocfs2_dx_root_get_last_eb_blk(struct ocfs2_extent_tree *et)
  238. {
  239. struct ocfs2_dx_root_block *dx_root = et->et_object;
  240. return le64_to_cpu(dx_root->dr_last_eb_blk);
  241. }
  242. static void ocfs2_dx_root_update_clusters(struct ocfs2_extent_tree *et,
  243. u32 clusters)
  244. {
  245. struct ocfs2_dx_root_block *dx_root = et->et_object;
  246. le32_add_cpu(&dx_root->dr_clusters, clusters);
  247. }
  248. static int ocfs2_dx_root_sanity_check(struct ocfs2_extent_tree *et)
  249. {
  250. struct ocfs2_dx_root_block *dx_root = et->et_object;
  251. BUG_ON(!OCFS2_IS_VALID_DX_ROOT(dx_root));
  252. return 0;
  253. }
  254. static void ocfs2_dx_root_fill_root_el(struct ocfs2_extent_tree *et)
  255. {
  256. struct ocfs2_dx_root_block *dx_root = et->et_object;
  257. et->et_root_el = &dx_root->dr_list;
  258. }
  259. static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
  260. .eo_set_last_eb_blk = ocfs2_dx_root_set_last_eb_blk,
  261. .eo_get_last_eb_blk = ocfs2_dx_root_get_last_eb_blk,
  262. .eo_update_clusters = ocfs2_dx_root_update_clusters,
  263. .eo_sanity_check = ocfs2_dx_root_sanity_check,
  264. .eo_fill_root_el = ocfs2_dx_root_fill_root_el,
  265. };
  266. static void ocfs2_refcount_tree_fill_root_el(struct ocfs2_extent_tree *et)
  267. {
  268. struct ocfs2_refcount_block *rb = et->et_object;
  269. et->et_root_el = &rb->rf_list;
  270. }
  271. static void ocfs2_refcount_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
  272. u64 blkno)
  273. {
  274. struct ocfs2_refcount_block *rb = et->et_object;
  275. rb->rf_last_eb_blk = cpu_to_le64(blkno);
  276. }
  277. static u64 ocfs2_refcount_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
  278. {
  279. struct ocfs2_refcount_block *rb = et->et_object;
  280. return le64_to_cpu(rb->rf_last_eb_blk);
  281. }
  282. static void ocfs2_refcount_tree_update_clusters(struct ocfs2_extent_tree *et,
  283. u32 clusters)
  284. {
  285. struct ocfs2_refcount_block *rb = et->et_object;
  286. le32_add_cpu(&rb->rf_clusters, clusters);
  287. }
  288. static enum ocfs2_contig_type
  289. ocfs2_refcount_tree_extent_contig(struct ocfs2_extent_tree *et,
  290. struct ocfs2_extent_rec *ext,
  291. struct ocfs2_extent_rec *insert_rec)
  292. {
  293. return CONTIG_NONE;
  294. }
  295. static struct ocfs2_extent_tree_operations ocfs2_refcount_tree_et_ops = {
  296. .eo_set_last_eb_blk = ocfs2_refcount_tree_set_last_eb_blk,
  297. .eo_get_last_eb_blk = ocfs2_refcount_tree_get_last_eb_blk,
  298. .eo_update_clusters = ocfs2_refcount_tree_update_clusters,
  299. .eo_fill_root_el = ocfs2_refcount_tree_fill_root_el,
  300. .eo_extent_contig = ocfs2_refcount_tree_extent_contig,
  301. };
  302. static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
  303. struct ocfs2_caching_info *ci,
  304. struct buffer_head *bh,
  305. ocfs2_journal_access_func access,
  306. void *obj,
  307. struct ocfs2_extent_tree_operations *ops)
  308. {
  309. et->et_ops = ops;
  310. et->et_root_bh = bh;
  311. et->et_ci = ci;
  312. et->et_root_journal_access = access;
  313. if (!obj)
  314. obj = (void *)bh->b_data;
  315. et->et_object = obj;
  316. et->et_ops->eo_fill_root_el(et);
  317. if (!et->et_ops->eo_fill_max_leaf_clusters)
  318. et->et_max_leaf_clusters = 0;
  319. else
  320. et->et_ops->eo_fill_max_leaf_clusters(et);
  321. }
  322. void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
  323. struct ocfs2_caching_info *ci,
  324. struct buffer_head *bh)
  325. {
  326. __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_di,
  327. NULL, &ocfs2_dinode_et_ops);
  328. }
  329. void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
  330. struct ocfs2_caching_info *ci,
  331. struct buffer_head *bh)
  332. {
  333. __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_xb,
  334. NULL, &ocfs2_xattr_tree_et_ops);
  335. }
  336. void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
  337. struct ocfs2_caching_info *ci,
  338. struct ocfs2_xattr_value_buf *vb)
  339. {
  340. __ocfs2_init_extent_tree(et, ci, vb->vb_bh, vb->vb_access, vb,
  341. &ocfs2_xattr_value_et_ops);
  342. }
  343. void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
  344. struct ocfs2_caching_info *ci,
  345. struct buffer_head *bh)
  346. {
  347. __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_dr,
  348. NULL, &ocfs2_dx_root_et_ops);
  349. }
  350. void ocfs2_init_refcount_extent_tree(struct ocfs2_extent_tree *et,
  351. struct ocfs2_caching_info *ci,
  352. struct buffer_head *bh)
  353. {
  354. __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_rb,
  355. NULL, &ocfs2_refcount_tree_et_ops);
  356. }
  357. static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
  358. u64 new_last_eb_blk)
  359. {
  360. et->et_ops->eo_set_last_eb_blk(et, new_last_eb_blk);
  361. }
  362. static inline u64 ocfs2_et_get_last_eb_blk(struct ocfs2_extent_tree *et)
  363. {
  364. return et->et_ops->eo_get_last_eb_blk(et);
  365. }
  366. static inline void ocfs2_et_update_clusters(struct ocfs2_extent_tree *et,
  367. u32 clusters)
  368. {
  369. et->et_ops->eo_update_clusters(et, clusters);
  370. }
  371. static inline void ocfs2_et_extent_map_insert(struct ocfs2_extent_tree *et,
  372. struct ocfs2_extent_rec *rec)
  373. {
  374. if (et->et_ops->eo_extent_map_insert)
  375. et->et_ops->eo_extent_map_insert(et, rec);
  376. }
  377. static inline void ocfs2_et_extent_map_truncate(struct ocfs2_extent_tree *et,
  378. u32 clusters)
  379. {
  380. if (et->et_ops->eo_extent_map_truncate)
  381. et->et_ops->eo_extent_map_truncate(et, clusters);
  382. }
  383. static inline int ocfs2_et_root_journal_access(handle_t *handle,
  384. struct ocfs2_extent_tree *et,
  385. int type)
  386. {
  387. return et->et_root_journal_access(handle, et->et_ci, et->et_root_bh,
  388. type);
  389. }
  390. static inline enum ocfs2_contig_type
  391. ocfs2_et_extent_contig(struct ocfs2_extent_tree *et,
  392. struct ocfs2_extent_rec *rec,
  393. struct ocfs2_extent_rec *insert_rec)
  394. {
  395. if (et->et_ops->eo_extent_contig)
  396. return et->et_ops->eo_extent_contig(et, rec, insert_rec);
  397. return ocfs2_extent_rec_contig(
  398. ocfs2_metadata_cache_get_super(et->et_ci),
  399. rec, insert_rec);
  400. }
  401. static inline int ocfs2_et_insert_check(struct ocfs2_extent_tree *et,
  402. struct ocfs2_extent_rec *rec)
  403. {
  404. int ret = 0;
  405. if (et->et_ops->eo_insert_check)
  406. ret = et->et_ops->eo_insert_check(et, rec);
  407. return ret;
  408. }
  409. static inline int ocfs2_et_sanity_check(struct ocfs2_extent_tree *et)
  410. {
  411. int ret = 0;
  412. if (et->et_ops->eo_sanity_check)
  413. ret = et->et_ops->eo_sanity_check(et);
  414. return ret;
  415. }
  416. static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
  417. struct ocfs2_extent_block *eb);
  418. static void ocfs2_adjust_rightmost_records(handle_t *handle,
  419. struct ocfs2_extent_tree *et,
  420. struct ocfs2_path *path,
  421. struct ocfs2_extent_rec *insert_rec);
  422. void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
  423. {
  424. int i, start = 0, depth = 0;
  425. struct ocfs2_path_item *node;
  426. if (keep_root)
  427. start = 1;
  428. for(i = start; i < path_num_items(path); i++) {
  429. node = &path->p_node[i];
  430. brelse(node->bh);
  431. node->bh = NULL;
  432. node->el = NULL;
  433. }
  434. if (keep_root)
  435. depth = le16_to_cpu(path_root_el(path)->l_tree_depth);
  436. else
  437. path_root_access(path) = NULL;
  438. path->p_tree_depth = depth;
  439. }
  440. void ocfs2_free_path(struct ocfs2_path *path)
  441. {
  442. if (path) {
  443. ocfs2_reinit_path(path, 0);
  444. kfree(path);
  445. }
  446. }
  447. static void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src)
  448. {
  449. int i;
  450. BUG_ON(path_root_bh(dest) != path_root_bh(src));
  451. BUG_ON(path_root_el(dest) != path_root_el(src));
  452. BUG_ON(path_root_access(dest) != path_root_access(src));
  453. ocfs2_reinit_path(dest, 1);
  454. for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) {
  455. dest->p_node[i].bh = src->p_node[i].bh;
  456. dest->p_node[i].el = src->p_node[i].el;
  457. if (dest->p_node[i].bh)
  458. get_bh(dest->p_node[i].bh);
  459. }
  460. }
  461. static void ocfs2_mv_path(struct ocfs2_path *dest, struct ocfs2_path *src)
  462. {
  463. int i;
  464. BUG_ON(path_root_bh(dest) != path_root_bh(src));
  465. BUG_ON(path_root_access(dest) != path_root_access(src));
  466. for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) {
  467. brelse(dest->p_node[i].bh);
  468. dest->p_node[i].bh = src->p_node[i].bh;
  469. dest->p_node[i].el = src->p_node[i].el;
  470. src->p_node[i].bh = NULL;
  471. src->p_node[i].el = NULL;
  472. }
  473. }
  474. static inline void ocfs2_path_insert_eb(struct ocfs2_path *path, int index,
  475. struct buffer_head *eb_bh)
  476. {
  477. struct ocfs2_extent_block *eb = (struct ocfs2_extent_block *)eb_bh->b_data;
  478. BUG_ON(index == 0);
  479. path->p_node[index].bh = eb_bh;
  480. path->p_node[index].el = &eb->h_list;
  481. }
  482. static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
  483. struct ocfs2_extent_list *root_el,
  484. ocfs2_journal_access_func access)
  485. {
  486. struct ocfs2_path *path;
  487. BUG_ON(le16_to_cpu(root_el->l_tree_depth) >= OCFS2_MAX_PATH_DEPTH);
  488. path = kzalloc(sizeof(*path), GFP_NOFS);
  489. if (path) {
  490. path->p_tree_depth = le16_to_cpu(root_el->l_tree_depth);
  491. get_bh(root_bh);
  492. path_root_bh(path) = root_bh;
  493. path_root_el(path) = root_el;
  494. path_root_access(path) = access;
  495. }
  496. return path;
  497. }
  498. struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path)
  499. {
  500. return ocfs2_new_path(path_root_bh(path), path_root_el(path),
  501. path_root_access(path));
  502. }
  503. struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
  504. {
  505. return ocfs2_new_path(et->et_root_bh, et->et_root_el,
  506. et->et_root_journal_access);
  507. }
  508. int ocfs2_path_bh_journal_access(handle_t *handle,
  509. struct ocfs2_caching_info *ci,
  510. struct ocfs2_path *path,
  511. int idx)
  512. {
  513. ocfs2_journal_access_func access = path_root_access(path);
  514. if (!access)
  515. access = ocfs2_journal_access;
  516. if (idx)
  517. access = ocfs2_journal_access_eb;
  518. return access(handle, ci, path->p_node[idx].bh,
  519. OCFS2_JOURNAL_ACCESS_WRITE);
  520. }
  521. int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
  522. handle_t *handle,
  523. struct ocfs2_path *path)
  524. {
  525. int i, ret = 0;
  526. if (!path)
  527. goto out;
  528. for(i = 0; i < path_num_items(path); i++) {
  529. ret = ocfs2_path_bh_journal_access(handle, ci, path, i);
  530. if (ret < 0) {
  531. mlog_errno(ret);
  532. goto out;
  533. }
  534. }
  535. out:
  536. return ret;
  537. }
  538. int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster)
  539. {
  540. int ret = -1;
  541. int i;
  542. struct ocfs2_extent_rec *rec;
  543. u32 rec_end, rec_start, clusters;
  544. for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
  545. rec = &el->l_recs[i];
  546. rec_start = le32_to_cpu(rec->e_cpos);
  547. clusters = ocfs2_rec_clusters(el, rec);
  548. rec_end = rec_start + clusters;
  549. if (v_cluster >= rec_start && v_cluster < rec_end) {
  550. ret = i;
  551. break;
  552. }
  553. }
  554. return ret;
  555. }
  556. static int ocfs2_block_extent_contig(struct super_block *sb,
  557. struct ocfs2_extent_rec *ext,
  558. u64 blkno)
  559. {
  560. u64 blk_end = le64_to_cpu(ext->e_blkno);
  561. blk_end += ocfs2_clusters_to_blocks(sb,
  562. le16_to_cpu(ext->e_leaf_clusters));
  563. return blkno == blk_end;
  564. }
  565. static int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left,
  566. struct ocfs2_extent_rec *right)
  567. {
  568. u32 left_range;
  569. left_range = le32_to_cpu(left->e_cpos) +
  570. le16_to_cpu(left->e_leaf_clusters);
  571. return (left_range == le32_to_cpu(right->e_cpos));
  572. }
  573. static enum ocfs2_contig_type
  574. ocfs2_extent_rec_contig(struct super_block *sb,
  575. struct ocfs2_extent_rec *ext,
  576. struct ocfs2_extent_rec *insert_rec)
  577. {
  578. u64 blkno = le64_to_cpu(insert_rec->e_blkno);
  579. /*
  580. * Refuse to coalesce extent records with different flag
  581. * fields - we don't want to mix unwritten extents with user
  582. * data.
  583. */
  584. if (ext->e_flags != insert_rec->e_flags)
  585. return CONTIG_NONE;
  586. if (ocfs2_extents_adjacent(ext, insert_rec) &&
  587. ocfs2_block_extent_contig(sb, ext, blkno))
  588. return CONTIG_RIGHT;
  589. blkno = le64_to_cpu(ext->e_blkno);
  590. if (ocfs2_extents_adjacent(insert_rec, ext) &&
  591. ocfs2_block_extent_contig(sb, insert_rec, blkno))
  592. return CONTIG_LEFT;
  593. return CONTIG_NONE;
  594. }
  595. enum ocfs2_append_type {
  596. APPEND_NONE = 0,
  597. APPEND_TAIL,
  598. };
  599. enum ocfs2_split_type {
  600. SPLIT_NONE = 0,
  601. SPLIT_LEFT,
  602. SPLIT_RIGHT,
  603. };
  604. struct ocfs2_insert_type {
  605. enum ocfs2_split_type ins_split;
  606. enum ocfs2_append_type ins_appending;
  607. enum ocfs2_contig_type ins_contig;
  608. int ins_contig_index;
  609. int ins_tree_depth;
  610. };
  611. struct ocfs2_merge_ctxt {
  612. enum ocfs2_contig_type c_contig_type;
  613. int c_has_empty_extent;
  614. int c_split_covers_rec;
  615. };
  616. static int ocfs2_validate_extent_block(struct super_block *sb,
  617. struct buffer_head *bh)
  618. {
  619. int rc;
  620. struct ocfs2_extent_block *eb =
  621. (struct ocfs2_extent_block *)bh->b_data;
  622. trace_ocfs2_validate_extent_block((unsigned long long)bh->b_blocknr);
  623. BUG_ON(!buffer_uptodate(bh));
  624. rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &eb->h_check);
  625. if (rc) {
  626. mlog(ML_ERROR, "Checksum failed for extent block %llu\n",
  627. (unsigned long long)bh->b_blocknr);
  628. return rc;
  629. }
  630. if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
  631. ocfs2_error(sb,
  632. "Extent block #%llu has bad signature %.*s",
  633. (unsigned long long)bh->b_blocknr, 7,
  634. eb->h_signature);
  635. return -EINVAL;
  636. }
  637. if (le64_to_cpu(eb->h_blkno) != bh->b_blocknr) {
  638. ocfs2_error(sb,
  639. "Extent block #%llu has an invalid h_blkno "
  640. "of %llu",
  641. (unsigned long long)bh->b_blocknr,
  642. (unsigned long long)le64_to_cpu(eb->h_blkno));
  643. return -EINVAL;
  644. }
  645. if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) {
  646. ocfs2_error(sb,
  647. "Extent block #%llu has an invalid "
  648. "h_fs_generation of #%u",
  649. (unsigned long long)bh->b_blocknr,
  650. le32_to_cpu(eb->h_fs_generation));
  651. return -EINVAL;
  652. }
  653. return 0;
  654. }
  655. int ocfs2_read_extent_block(struct ocfs2_caching_info *ci, u64 eb_blkno,
  656. struct buffer_head **bh)
  657. {
  658. int rc;
  659. struct buffer_head *tmp = *bh;
  660. rc = ocfs2_read_block(ci, eb_blkno, &tmp,
  661. ocfs2_validate_extent_block);
  662. if (!rc && !*bh)
  663. *bh = tmp;
  664. return rc;
  665. }
  666. int ocfs2_num_free_extents(struct ocfs2_super *osb,
  667. struct ocfs2_extent_tree *et)
  668. {
  669. int retval;
  670. struct ocfs2_extent_list *el = NULL;
  671. struct ocfs2_extent_block *eb;
  672. struct buffer_head *eb_bh = NULL;
  673. u64 last_eb_blk = 0;
  674. el = et->et_root_el;
  675. last_eb_blk = ocfs2_et_get_last_eb_blk(et);
  676. if (last_eb_blk) {
  677. retval = ocfs2_read_extent_block(et->et_ci, last_eb_blk,
  678. &eb_bh);
  679. if (retval < 0) {
  680. mlog_errno(retval);
  681. goto bail;
  682. }
  683. eb = (struct ocfs2_extent_block *) eb_bh->b_data;
  684. el = &eb->h_list;
  685. }
  686. BUG_ON(el->l_tree_depth != 0);
  687. retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec);
  688. bail:
  689. brelse(eb_bh);
  690. trace_ocfs2_num_free_extents(retval);
  691. return retval;
  692. }
  693. static int ocfs2_create_new_meta_bhs(handle_t *handle,
  694. struct ocfs2_extent_tree *et,
  695. int wanted,
  696. struct ocfs2_alloc_context *meta_ac,
  697. struct buffer_head *bhs[])
  698. {
  699. int count, status, i;
  700. u16 suballoc_bit_start;
  701. u32 num_got;
  702. u64 suballoc_loc, first_blkno;
  703. struct ocfs2_super *osb =
  704. OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
  705. struct ocfs2_extent_block *eb;
  706. count = 0;
  707. while (count < wanted) {
  708. status = ocfs2_claim_metadata(handle,
  709. meta_ac,
  710. wanted - count,
  711. &suballoc_loc,
  712. &suballoc_bit_start,
  713. &num_got,
  714. &first_blkno);
  715. if (status < 0) {
  716. mlog_errno(status);
  717. goto bail;
  718. }
  719. for(i = count; i < (num_got + count); i++) {
  720. bhs[i] = sb_getblk(osb->sb, first_blkno);
  721. if (bhs[i] == NULL) {
  722. status = -EIO;
  723. mlog_errno(status);
  724. goto bail;
  725. }
  726. ocfs2_set_new_buffer_uptodate(et->et_ci, bhs[i]);
  727. status = ocfs2_journal_access_eb(handle, et->et_ci,
  728. bhs[i],
  729. OCFS2_JOURNAL_ACCESS_CREATE);
  730. if (status < 0) {
  731. mlog_errno(status);
  732. goto bail;
  733. }
  734. memset(bhs[i]->b_data, 0, osb->sb->s_blocksize);
  735. eb = (struct ocfs2_extent_block *) bhs[i]->b_data;
  736. strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
  737. eb->h_blkno = cpu_to_le64(first_blkno);
  738. eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
  739. eb->h_suballoc_slot =
  740. cpu_to_le16(meta_ac->ac_alloc_slot);
  741. eb->h_suballoc_loc = cpu_to_le64(suballoc_loc);
  742. eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start);
  743. eb->h_list.l_count =
  744. cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
  745. suballoc_bit_start++;
  746. first_blkno++;
  747. ocfs2_journal_dirty(handle, bhs[i]);
  748. }
  749. count += num_got;
  750. }
  751. status = 0;
  752. bail:
  753. if (status < 0) {
  754. for(i = 0; i < wanted; i++) {
  755. brelse(bhs[i]);
  756. bhs[i] = NULL;
  757. }
  758. mlog_errno(status);
  759. }
  760. return status;
  761. }
  762. static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el)
  763. {
  764. int i;
  765. i = le16_to_cpu(el->l_next_free_rec) - 1;
  766. return le32_to_cpu(el->l_recs[i].e_cpos) +
  767. ocfs2_rec_clusters(el, &el->l_recs[i]);
  768. }
  769. static int ocfs2_adjust_rightmost_branch(handle_t *handle,
  770. struct ocfs2_extent_tree *et)
  771. {
  772. int status;
  773. struct ocfs2_path *path = NULL;
  774. struct ocfs2_extent_list *el;
  775. struct ocfs2_extent_rec *rec;
  776. path = ocfs2_new_path_from_et(et);
  777. if (!path) {
  778. status = -ENOMEM;
  779. return status;
  780. }
  781. status = ocfs2_find_path(et->et_ci, path, UINT_MAX);
  782. if (status < 0) {
  783. mlog_errno(status);
  784. goto out;
  785. }
  786. status = ocfs2_extend_trans(handle, path_num_items(path));
  787. if (status < 0) {
  788. mlog_errno(status);
  789. goto out;
  790. }
  791. status = ocfs2_journal_access_path(et->et_ci, handle, path);
  792. if (status < 0) {
  793. mlog_errno(status);
  794. goto out;
  795. }
  796. el = path_leaf_el(path);
  797. rec = &el->l_recs[le16_to_cpu(el->l_next_free_rec) - 1];
  798. ocfs2_adjust_rightmost_records(handle, et, path, rec);
  799. out:
  800. ocfs2_free_path(path);
  801. return status;
  802. }
  803. static int ocfs2_add_branch(handle_t *handle,
  804. struct ocfs2_extent_tree *et,
  805. struct buffer_head *eb_bh,
  806. struct buffer_head **last_eb_bh,
  807. struct ocfs2_alloc_context *meta_ac)
  808. {
  809. int status, new_blocks, i;
  810. u64 next_blkno, new_last_eb_blk;
  811. struct buffer_head *bh;
  812. struct buffer_head **new_eb_bhs = NULL;
  813. struct ocfs2_extent_block *eb;
  814. struct ocfs2_extent_list *eb_el;
  815. struct ocfs2_extent_list *el;
  816. u32 new_cpos, root_end;
  817. BUG_ON(!last_eb_bh || !*last_eb_bh);
  818. if (eb_bh) {
  819. eb = (struct ocfs2_extent_block *) eb_bh->b_data;
  820. el = &eb->h_list;
  821. } else
  822. el = et->et_root_el;
  823. BUG_ON(!el->l_tree_depth);
  824. new_blocks = le16_to_cpu(el->l_tree_depth);
  825. eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
  826. new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list);
  827. root_end = ocfs2_sum_rightmost_rec(et->et_root_el);
  828. if (root_end > new_cpos) {
  829. trace_ocfs2_adjust_rightmost_branch(
  830. (unsigned long long)
  831. ocfs2_metadata_cache_owner(et->et_ci),
  832. root_end, new_cpos);
  833. status = ocfs2_adjust_rightmost_branch(handle, et);
  834. if (status) {
  835. mlog_errno(status);
  836. goto bail;
  837. }
  838. }
  839. new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *),
  840. GFP_KERNEL);
  841. if (!new_eb_bhs) {
  842. status = -ENOMEM;
  843. mlog_errno(status);
  844. goto bail;
  845. }
  846. status = ocfs2_create_new_meta_bhs(handle, et, new_blocks,
  847. meta_ac, new_eb_bhs);
  848. if (status < 0) {
  849. mlog_errno(status);
  850. goto bail;
  851. }
  852. next_blkno = new_last_eb_blk = 0;
  853. for(i = 0; i < new_blocks; i++) {
  854. bh = new_eb_bhs[i];
  855. eb = (struct ocfs2_extent_block *) bh->b_data;
  856. BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
  857. eb_el = &eb->h_list;
  858. status = ocfs2_journal_access_eb(handle, et->et_ci, bh,
  859. OCFS2_JOURNAL_ACCESS_CREATE);
  860. if (status < 0) {
  861. mlog_errno(status);
  862. goto bail;
  863. }
  864. eb->h_next_leaf_blk = 0;
  865. eb_el->l_tree_depth = cpu_to_le16(i);
  866. eb_el->l_next_free_rec = cpu_to_le16(1);
  867. eb_el->l_recs[0].e_cpos = cpu_to_le32(new_cpos);
  868. eb_el->l_recs[0].e_blkno = cpu_to_le64(next_blkno);
  869. eb_el->l_recs[0].e_int_clusters = cpu_to_le32(0);
  870. if (!eb_el->l_tree_depth)
  871. new_last_eb_blk = le64_to_cpu(eb->h_blkno);
  872. ocfs2_journal_dirty(handle, bh);
  873. next_blkno = le64_to_cpu(eb->h_blkno);
  874. }
  875. status = ocfs2_journal_access_eb(handle, et->et_ci, *last_eb_bh,
  876. OCFS2_JOURNAL_ACCESS_WRITE);
  877. if (status < 0) {
  878. mlog_errno(status);
  879. goto bail;
  880. }
  881. status = ocfs2_et_root_journal_access(handle, et,
  882. OCFS2_JOURNAL_ACCESS_WRITE);
  883. if (status < 0) {
  884. mlog_errno(status);
  885. goto bail;
  886. }
  887. if (eb_bh) {
  888. status = ocfs2_journal_access_eb(handle, et->et_ci, eb_bh,
  889. OCFS2_JOURNAL_ACCESS_WRITE);
  890. if (status < 0) {
  891. mlog_errno(status);
  892. goto bail;
  893. }
  894. }
  895. i = le16_to_cpu(el->l_next_free_rec);
  896. el->l_recs[i].e_blkno = cpu_to_le64(next_blkno);
  897. el->l_recs[i].e_cpos = cpu_to_le32(new_cpos);
  898. el->l_recs[i].e_int_clusters = 0;
  899. le16_add_cpu(&el->l_next_free_rec, 1);
  900. ocfs2_et_set_last_eb_blk(et, new_last_eb_blk);
  901. eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
  902. eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
  903. ocfs2_journal_dirty(handle, *last_eb_bh);
  904. ocfs2_journal_dirty(handle, et->et_root_bh);
  905. if (eb_bh)
  906. ocfs2_journal_dirty(handle, eb_bh);
  907. brelse(*last_eb_bh);
  908. get_bh(new_eb_bhs[0]);
  909. *last_eb_bh = new_eb_bhs[0];
  910. status = 0;
  911. bail:
  912. if (new_eb_bhs) {
  913. for (i = 0; i < new_blocks; i++)
  914. brelse(new_eb_bhs[i]);
  915. kfree(new_eb_bhs);
  916. }
  917. return status;
  918. }
  919. static int ocfs2_shift_tree_depth(handle_t *handle,
  920. struct ocfs2_extent_tree *et,
  921. struct ocfs2_alloc_context *meta_ac,
  922. struct buffer_head **ret_new_eb_bh)
  923. {
  924. int status, i;
  925. u32 new_clusters;
  926. struct buffer_head *new_eb_bh = NULL;
  927. struct ocfs2_extent_block *eb;
  928. struct ocfs2_extent_list *root_el;
  929. struct ocfs2_extent_list *eb_el;
  930. status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac,
  931. &new_eb_bh);
  932. if (status < 0) {
  933. mlog_errno(status);
  934. goto bail;
  935. }
  936. eb = (struct ocfs2_extent_block *) new_eb_bh->b_data;
  937. BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
  938. eb_el = &eb->h_list;
  939. root_el = et->et_root_el;
  940. status = ocfs2_journal_access_eb(handle, et->et_ci, new_eb_bh,
  941. OCFS2_JOURNAL_ACCESS_CREATE);
  942. if (status < 0) {
  943. mlog_errno(status);
  944. goto bail;
  945. }
  946. eb_el->l_tree_depth = root_el->l_tree_depth;
  947. eb_el->l_next_free_rec = root_el->l_next_free_rec;
  948. for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++)
  949. eb_el->l_recs[i] = root_el->l_recs[i];
  950. ocfs2_journal_dirty(handle, new_eb_bh);
  951. status = ocfs2_et_root_journal_access(handle, et,
  952. OCFS2_JOURNAL_ACCESS_WRITE);
  953. if (status < 0) {
  954. mlog_errno(status);
  955. goto bail;
  956. }
  957. new_clusters = ocfs2_sum_rightmost_rec(eb_el);
  958. le16_add_cpu(&root_el->l_tree_depth, 1);
  959. root_el->l_recs[0].e_cpos = 0;
  960. root_el->l_recs[0].e_blkno = eb->h_blkno;
  961. root_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters);
  962. for (i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++)
  963. memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
  964. root_el->l_next_free_rec = cpu_to_le16(1);
  965. if (root_el->l_tree_depth == cpu_to_le16(1))
  966. ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
  967. ocfs2_journal_dirty(handle, et->et_root_bh);
  968. *ret_new_eb_bh = new_eb_bh;
  969. new_eb_bh = NULL;
  970. status = 0;
  971. bail:
  972. brelse(new_eb_bh);
  973. return status;
  974. }
  975. static int ocfs2_find_branch_target(struct ocfs2_extent_tree *et,
  976. struct buffer_head **target_bh)
  977. {
  978. int status = 0, i;
  979. u64 blkno;
  980. struct ocfs2_extent_block *eb;
  981. struct ocfs2_extent_list *el;
  982. struct buffer_head *bh = NULL;
  983. struct buffer_head *lowest_bh = NULL;
  984. *target_bh = NULL;
  985. el = et->et_root_el;
  986. while(le16_to_cpu(el->l_tree_depth) > 1) {
  987. if (le16_to_cpu(el->l_next_free_rec) == 0) {
  988. ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
  989. "Owner %llu has empty "
  990. "extent list (next_free_rec == 0)",
  991. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
  992. status = -EIO;
  993. goto bail;
  994. }
  995. i = le16_to_cpu(el->l_next_free_rec) - 1;
  996. blkno = le64_to_cpu(el->l_recs[i].e_blkno);
  997. if (!blkno) {
  998. ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
  999. "Owner %llu has extent "
  1000. "list where extent # %d has no physical "
  1001. "block start",
  1002. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), i);
  1003. status = -EIO;
  1004. goto bail;
  1005. }
  1006. brelse(bh);
  1007. bh = NULL;
  1008. status = ocfs2_read_extent_block(et->et_ci, blkno, &bh);
  1009. if (status < 0) {
  1010. mlog_errno(status);
  1011. goto bail;
  1012. }
  1013. eb = (struct ocfs2_extent_block *) bh->b_data;
  1014. el = &eb->h_list;
  1015. if (le16_to_cpu(el->l_next_free_rec) <
  1016. le16_to_cpu(el->l_count)) {
  1017. brelse(lowest_bh);
  1018. lowest_bh = bh;
  1019. get_bh(lowest_bh);
  1020. }
  1021. }
  1022. el = et->et_root_el;
  1023. if (!lowest_bh && (el->l_next_free_rec == el->l_count))
  1024. status = 1;
  1025. *target_bh = lowest_bh;
  1026. bail:
  1027. brelse(bh);
  1028. return status;
  1029. }
  1030. static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et,
  1031. int *final_depth, struct buffer_head **last_eb_bh,
  1032. struct ocfs2_alloc_context *meta_ac)
  1033. {
  1034. int ret, shift;
  1035. struct ocfs2_extent_list *el = et->et_root_el;
  1036. int depth = le16_to_cpu(el->l_tree_depth);
  1037. struct buffer_head *bh = NULL;
  1038. BUG_ON(meta_ac == NULL);
  1039. shift = ocfs2_find_branch_target(et, &bh);
  1040. if (shift < 0) {
  1041. ret = shift;
  1042. mlog_errno(ret);
  1043. goto out;
  1044. }
  1045. if (shift) {
  1046. BUG_ON(bh);
  1047. trace_ocfs2_grow_tree(
  1048. (unsigned long long)
  1049. ocfs2_metadata_cache_owner(et->et_ci),
  1050. depth);
  1051. ret = ocfs2_shift_tree_depth(handle, et, meta_ac, &bh);
  1052. if (ret < 0) {
  1053. mlog_errno(ret);
  1054. goto out;
  1055. }
  1056. depth++;
  1057. if (depth == 1) {
  1058. BUG_ON(*last_eb_bh);
  1059. get_bh(bh);
  1060. *last_eb_bh = bh;
  1061. goto out;
  1062. }
  1063. }
  1064. ret = ocfs2_add_branch(handle, et, bh, last_eb_bh,
  1065. meta_ac);
  1066. if (ret < 0) {
  1067. mlog_errno(ret);
  1068. goto out;
  1069. }
  1070. out:
  1071. if (final_depth)
  1072. *final_depth = depth;
  1073. brelse(bh);
  1074. return ret;
  1075. }
  1076. static void ocfs2_shift_records_right(struct ocfs2_extent_list *el)
  1077. {
  1078. int next_free = le16_to_cpu(el->l_next_free_rec);
  1079. int count = le16_to_cpu(el->l_count);
  1080. unsigned int num_bytes;
  1081. BUG_ON(!next_free);
  1082. BUG_ON(next_free >= count);
  1083. num_bytes = sizeof(struct ocfs2_extent_rec) * next_free;
  1084. memmove(&el->l_recs[1], &el->l_recs[0], num_bytes);
  1085. }
  1086. static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el,
  1087. struct ocfs2_extent_rec *insert_rec)
  1088. {
  1089. int i, insert_index, next_free, has_empty, num_bytes;
  1090. u32 insert_cpos = le32_to_cpu(insert_rec->e_cpos);
  1091. struct ocfs2_extent_rec *rec;
  1092. next_free = le16_to_cpu(el->l_next_free_rec);
  1093. has_empty = ocfs2_is_empty_extent(&el->l_recs[0]);
  1094. BUG_ON(!next_free);
  1095. BUG_ON(el->l_next_free_rec == el->l_count && !has_empty);
  1096. if (has_empty) {
  1097. for(i = 0; i < (next_free - 1); i++)
  1098. el->l_recs[i] = el->l_recs[i+1];
  1099. next_free--;
  1100. }
  1101. for(i = 0; i < next_free; i++) {
  1102. rec = &el->l_recs[i];
  1103. if (insert_cpos < le32_to_cpu(rec->e_cpos))
  1104. break;
  1105. }
  1106. insert_index = i;
  1107. trace_ocfs2_rotate_leaf(insert_cpos, insert_index,
  1108. has_empty, next_free,
  1109. le16_to_cpu(el->l_count));
  1110. BUG_ON(insert_index < 0);
  1111. BUG_ON(insert_index >= le16_to_cpu(el->l_count));
  1112. BUG_ON(insert_index > next_free);
  1113. if (insert_index != next_free) {
  1114. BUG_ON(next_free >= le16_to_cpu(el->l_count));
  1115. num_bytes = next_free - insert_index;
  1116. num_bytes *= sizeof(struct ocfs2_extent_rec);
  1117. memmove(&el->l_recs[insert_index + 1],
  1118. &el->l_recs[insert_index],
  1119. num_bytes);
  1120. }
  1121. next_free++;
  1122. el->l_next_free_rec = cpu_to_le16(next_free);
  1123. BUG_ON(le16_to_cpu(el->l_next_free_rec) > le16_to_cpu(el->l_count));
  1124. el->l_recs[insert_index] = *insert_rec;
  1125. }
  1126. static void ocfs2_remove_empty_extent(struct ocfs2_extent_list *el)
  1127. {
  1128. int size, num_recs = le16_to_cpu(el->l_next_free_rec);
  1129. BUG_ON(num_recs == 0);
  1130. if (ocfs2_is_empty_extent(&el->l_recs[0])) {
  1131. num_recs--;
  1132. size = num_recs * sizeof(struct ocfs2_extent_rec);
  1133. memmove(&el->l_recs[0], &el->l_recs[1], size);
  1134. memset(&el->l_recs[num_recs], 0,
  1135. sizeof(struct ocfs2_extent_rec));
  1136. el->l_next_free_rec = cpu_to_le16(num_recs);
  1137. }
  1138. }
  1139. static void ocfs2_create_empty_extent(struct ocfs2_extent_list *el)
  1140. {
  1141. int next_free = le16_to_cpu(el->l_next_free_rec);
  1142. BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
  1143. if (next_free == 0)
  1144. goto set_and_inc;
  1145. if (ocfs2_is_empty_extent(&el->l_recs[0]))
  1146. return;
  1147. mlog_bug_on_msg(el->l_count == el->l_next_free_rec,
  1148. "Asked to create an empty extent in a full list:\n"
  1149. "count = %u, tree depth = %u",
  1150. le16_to_cpu(el->l_count),
  1151. le16_to_cpu(el->l_tree_depth));
  1152. ocfs2_shift_records_right(el);
  1153. set_and_inc:
  1154. le16_add_cpu(&el->l_next_free_rec, 1);
  1155. memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
  1156. }
  1157. int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
  1158. struct ocfs2_path *left,
  1159. struct ocfs2_path *right)
  1160. {
  1161. int i = 0;
  1162. BUG_ON(path_root_bh(left) != path_root_bh(right));
  1163. do {
  1164. i++;
  1165. mlog_bug_on_msg(i > left->p_tree_depth,
  1166. "Owner %llu, left depth %u, right depth %u\n"
  1167. "left leaf blk %llu, right leaf blk %llu\n",
  1168. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
  1169. left->p_tree_depth, right->p_tree_depth,
  1170. (unsigned long long)path_leaf_bh(left)->b_blocknr,
  1171. (unsigned long long)path_leaf_bh(right)->b_blocknr);
  1172. } while (left->p_node[i].bh->b_blocknr ==
  1173. right->p_node[i].bh->b_blocknr);
  1174. return i - 1;
  1175. }
  1176. typedef void (path_insert_t)(void *, struct buffer_head *);
  1177. static int __ocfs2_find_path(struct ocfs2_caching_info *ci,
  1178. struct ocfs2_extent_list *root_el, u32 cpos,
  1179. path_insert_t *func, void *data)
  1180. {
  1181. int i, ret = 0;
  1182. u32 range;
  1183. u64 blkno;
  1184. struct buffer_head *bh = NULL;
  1185. struct ocfs2_extent_block *eb;
  1186. struct ocfs2_extent_list *el;
  1187. struct ocfs2_extent_rec *rec;
  1188. el = root_el;
  1189. while (el->l_tree_depth) {
  1190. if (le16_to_cpu(el->l_next_free_rec) == 0) {
  1191. ocfs2_error(ocfs2_metadata_cache_get_super(ci),
  1192. "Owner %llu has empty extent list at "
  1193. "depth %u\n",
  1194. (unsigned long long)ocfs2_metadata_cache_owner(ci),
  1195. le16_to_cpu(el->l_tree_depth));
  1196. ret = -EROFS;
  1197. goto out;
  1198. }
  1199. for(i = 0; i < le16_to_cpu(el->l_next_free_rec) - 1; i++) {
  1200. rec = &el->l_recs[i];
  1201. range = le32_to_cpu(rec->e_cpos) +
  1202. ocfs2_rec_clusters(el, rec);
  1203. if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range)
  1204. break;
  1205. }
  1206. blkno = le64_to_cpu(el->l_recs[i].e_blkno);
  1207. if (blkno == 0) {
  1208. ocfs2_error(ocfs2_metadata_cache_get_super(ci),
  1209. "Owner %llu has bad blkno in extent list "
  1210. "at depth %u (index %d)\n",
  1211. (unsigned long long)ocfs2_metadata_cache_owner(ci),
  1212. le16_to_cpu(el->l_tree_depth), i);
  1213. ret = -EROFS;
  1214. goto out;
  1215. }
  1216. brelse(bh);
  1217. bh = NULL;
  1218. ret = ocfs2_read_extent_block(ci, blkno, &bh);
  1219. if (ret) {
  1220. mlog_errno(ret);
  1221. goto out;
  1222. }
  1223. eb = (struct ocfs2_extent_block *) bh->b_data;
  1224. el = &eb->h_list;
  1225. if (le16_to_cpu(el->l_next_free_rec) >
  1226. le16_to_cpu(el->l_count)) {
  1227. ocfs2_error(ocfs2_metadata_cache_get_super(ci),
  1228. "Owner %llu has bad count in extent list "
  1229. "at block %llu (next free=%u, count=%u)\n",
  1230. (unsigned long long)ocfs2_metadata_cache_owner(ci),
  1231. (unsigned long long)bh->b_blocknr,
  1232. le16_to_cpu(el->l_next_free_rec),
  1233. le16_to_cpu(el->l_count));
  1234. ret = -EROFS;
  1235. goto out;
  1236. }
  1237. if (func)
  1238. func(data, bh);
  1239. }
  1240. out:
  1241. brelse(bh);
  1242. return ret;
  1243. }
  1244. struct find_path_data {
  1245. int index;
  1246. struct ocfs2_path *path;
  1247. };
  1248. static void find_path_ins(void *data, struct buffer_head *bh)
  1249. {
  1250. struct find_path_data *fp = data;
  1251. get_bh(bh);
  1252. ocfs2_path_insert_eb(fp->path, fp->index, bh);
  1253. fp->index++;
  1254. }
  1255. int ocfs2_find_path(struct ocfs2_caching_info *ci,
  1256. struct ocfs2_path *path, u32 cpos)
  1257. {
  1258. struct find_path_data data;
  1259. data.index = 1;
  1260. data.path = path;
  1261. return __ocfs2_find_path(ci, path_root_el(path), cpos,
  1262. find_path_ins, &data);
  1263. }
  1264. static void find_leaf_ins(void *data, struct buffer_head *bh)
  1265. {
  1266. struct ocfs2_extent_block *eb =(struct ocfs2_extent_block *)bh->b_data;
  1267. struct ocfs2_extent_list *el = &eb->h_list;
  1268. struct buffer_head **ret = data;
  1269. if (le16_to_cpu(el->l_tree_depth) == 0) {
  1270. get_bh(bh);
  1271. *ret = bh;
  1272. }
  1273. }
  1274. int ocfs2_find_leaf(struct ocfs2_caching_info *ci,
  1275. struct ocfs2_extent_list *root_el, u32 cpos,
  1276. struct buffer_head **leaf_bh)
  1277. {
  1278. int ret;
  1279. struct buffer_head *bh = NULL;
  1280. ret = __ocfs2_find_path(ci, root_el, cpos, find_leaf_ins, &bh);
  1281. if (ret) {
  1282. mlog_errno(ret);
  1283. goto out;
  1284. }
  1285. *leaf_bh = bh;
  1286. out:
  1287. return ret;
  1288. }
  1289. static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
  1290. struct ocfs2_extent_list *left_child_el,
  1291. struct ocfs2_extent_rec *right_rec,
  1292. struct ocfs2_extent_list *right_child_el)
  1293. {
  1294. u32 left_clusters, right_end;
  1295. left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
  1296. if (!ocfs2_rec_clusters(right_child_el, &right_child_el->l_recs[0])) {
  1297. BUG_ON(right_child_el->l_tree_depth);
  1298. BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
  1299. left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
  1300. }
  1301. left_clusters -= le32_to_cpu(left_rec->e_cpos);
  1302. left_rec->e_int_clusters = cpu_to_le32(left_clusters);
  1303. right_end = le32_to_cpu(right_rec->e_cpos);
  1304. right_end += le32_to_cpu(right_rec->e_int_clusters);
  1305. right_rec->e_cpos = left_rec->e_cpos;
  1306. le32_add_cpu(&right_rec->e_cpos, left_clusters);
  1307. right_end -= le32_to_cpu(right_rec->e_cpos);
  1308. right_rec->e_int_clusters = cpu_to_le32(right_end);
  1309. }
  1310. static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el,
  1311. struct ocfs2_extent_list *left_el,
  1312. struct ocfs2_extent_list *right_el,
  1313. u64 left_el_blkno)
  1314. {
  1315. int i;
  1316. BUG_ON(le16_to_cpu(root_el->l_tree_depth) <=
  1317. le16_to_cpu(left_el->l_tree_depth));
  1318. for(i = 0; i < le16_to_cpu(root_el->l_next_free_rec) - 1; i++) {
  1319. if (le64_to_cpu(root_el->l_recs[i].e_blkno) == left_el_blkno)
  1320. break;
  1321. }
  1322. BUG_ON(i >= (le16_to_cpu(root_el->l_next_free_rec) - 1));
  1323. ocfs2_adjust_adjacent_records(&root_el->l_recs[i], left_el,
  1324. &root_el->l_recs[i + 1], right_el);
  1325. }
  1326. static void ocfs2_complete_edge_insert(handle_t *handle,
  1327. struct ocfs2_path *left_path,
  1328. struct ocfs2_path *right_path,
  1329. int subtree_index)
  1330. {
  1331. int i, idx;
  1332. struct ocfs2_extent_list *el, *left_el, *right_el;
  1333. struct ocfs2_extent_rec *left_rec, *right_rec;
  1334. struct buffer_head *root_bh = left_path->p_node[subtree_index].bh;
  1335. left_el = path_leaf_el(left_path);
  1336. right_el = path_leaf_el(right_path);
  1337. for(i = left_path->p_tree_depth - 1; i > subtree_index; i--) {
  1338. trace_ocfs2_complete_edge_insert(i);
  1339. el = left_path->p_node[i].el;
  1340. idx = le16_to_cpu(left_el->l_next_free_rec) - 1;
  1341. left_rec = &el->l_recs[idx];
  1342. el = right_path->p_node[i].el;
  1343. right_rec = &el->l_recs[0];
  1344. ocfs2_adjust_adjacent_records(left_rec, left_el, right_rec,
  1345. right_el);
  1346. ocfs2_journal_dirty(handle, left_path->p_node[i].bh);
  1347. ocfs2_journal_dirty(handle, right_path->p_node[i].bh);
  1348. left_el = left_path->p_node[i].el;
  1349. right_el = right_path->p_node[i].el;
  1350. }
  1351. el = left_path->p_node[subtree_index].el;
  1352. left_el = left_path->p_node[subtree_index + 1].el;
  1353. right_el = right_path->p_node[subtree_index + 1].el;
  1354. ocfs2_adjust_root_records(el, left_el, right_el,
  1355. left_path->p_node[subtree_index + 1].bh->b_blocknr);
  1356. root_bh = left_path->p_node[subtree_index].bh;
  1357. ocfs2_journal_dirty(handle, root_bh);
  1358. }
  1359. static int ocfs2_rotate_subtree_right(handle_t *handle,
  1360. struct ocfs2_extent_tree *et,
  1361. struct ocfs2_path *left_path,
  1362. struct ocfs2_path *right_path,
  1363. int subtree_index)
  1364. {
  1365. int ret, i;
  1366. struct buffer_head *right_leaf_bh;
  1367. struct buffer_head *left_leaf_bh = NULL;
  1368. struct buffer_head *root_bh;
  1369. struct ocfs2_extent_list *right_el, *left_el;
  1370. struct ocfs2_extent_rec move_rec;
  1371. left_leaf_bh = path_leaf_bh(left_path);
  1372. left_el = path_leaf_el(left_path);
  1373. if (left_el->l_next_free_rec != left_el->l_count) {
  1374. ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
  1375. "Inode %llu has non-full interior leaf node %llu"
  1376. "(next free = %u)",
  1377. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
  1378. (unsigned long long)left_leaf_bh->b_blocknr,
  1379. le16_to_cpu(left_el->l_next_free_rec));
  1380. return -EROFS;
  1381. }
  1382. if (ocfs2_is_empty_extent(&left_el->l_recs[0]))
  1383. return 0;
  1384. root_bh = left_path->p_node[subtree_index].bh;
  1385. BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
  1386. ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
  1387. subtree_index);
  1388. if (ret) {
  1389. mlog_errno(ret);
  1390. goto out;
  1391. }
  1392. for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
  1393. ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
  1394. right_path, i);
  1395. if (ret) {
  1396. mlog_errno(ret);
  1397. goto out;
  1398. }
  1399. ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
  1400. left_path, i);
  1401. if (ret) {
  1402. mlog_errno(ret);
  1403. goto out;
  1404. }
  1405. }
  1406. right_leaf_bh = path_leaf_bh(right_path);
  1407. right_el = path_leaf_el(right_path);
  1408. mlog_bug_on_msg(!right_el->l_next_free_rec, "Inode %llu: Rotate fails "
  1409. "because rightmost leaf block %llu is empty\n",
  1410. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
  1411. (unsigned long long)right_leaf_bh->b_blocknr);
  1412. ocfs2_create_empty_extent(right_el);
  1413. ocfs2_journal_dirty(handle, right_leaf_bh);
  1414. i = le16_to_cpu(left_el->l_next_free_rec) - 1;
  1415. move_rec = left_el->l_recs[i];
  1416. right_el->l_recs[0] = move_rec;
  1417. le16_add_cpu(&left_el->l_next_free_rec, -1);
  1418. ocfs2_shift_records_right(left_el);
  1419. memset(&left_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
  1420. le16_add_cpu(&left_el->l_next_free_rec, 1);
  1421. ocfs2_journal_dirty(handle, left_leaf_bh);
  1422. ocfs2_complete_edge_insert(handle, left_path, right_path,
  1423. subtree_index);
  1424. out:
  1425. return ret;
  1426. }
  1427. int ocfs2_find_cpos_for_left_leaf(struct super_block *sb,
  1428. struct ocfs2_path *path, u32 *cpos)
  1429. {
  1430. int i, j, ret = 0;
  1431. u64 blkno;
  1432. struct ocfs2_extent_list *el;
  1433. BUG_ON(path->p_tree_depth == 0);
  1434. *cpos = 0;
  1435. blkno = path_leaf_bh(path)->b_blocknr;
  1436. i = path->p_tree_depth - 1;
  1437. while (i >= 0) {
  1438. el = path->p_node[i].el;
  1439. for(j = 0; j < le16_to_cpu(el->l_next_free_rec); j++) {
  1440. if (le64_to_cpu(el->l_recs[j].e_blkno) == blkno) {
  1441. if (j == 0) {
  1442. if (i == 0) {
  1443. goto out;
  1444. }
  1445. goto next_node;
  1446. }
  1447. *cpos = le32_to_cpu(el->l_recs[j - 1].e_cpos);
  1448. *cpos = *cpos + ocfs2_rec_clusters(el,
  1449. &el->l_recs[j - 1]);
  1450. *cpos = *cpos - 1;
  1451. goto out;
  1452. }
  1453. }
  1454. ocfs2_error(sb,
  1455. "Invalid extent tree at extent block %llu\n",
  1456. (unsigned long long)blkno);
  1457. ret = -EROFS;
  1458. goto out;
  1459. next_node:
  1460. blkno = path->p_node[i].bh->b_blocknr;
  1461. i--;
  1462. }
  1463. out:
  1464. return ret;
  1465. }
  1466. static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth,
  1467. int op_credits,
  1468. struct ocfs2_path *path)
  1469. {
  1470. int ret = 0;
  1471. int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
  1472. if (handle->h_buffer_credits < credits)
  1473. ret = ocfs2_extend_trans(handle,
  1474. credits - handle->h_buffer_credits);
  1475. return ret;
  1476. }
  1477. static int ocfs2_rotate_requires_path_adjustment(struct ocfs2_path *left_path,
  1478. u32 insert_cpos)
  1479. {
  1480. struct ocfs2_extent_list *left_el;
  1481. struct ocfs2_extent_rec *rec;
  1482. int next_free;
  1483. left_el = path_leaf_el(left_path);
  1484. next_free = le16_to_cpu(left_el->l_next_free_rec);
  1485. rec = &left_el->l_recs[next_free - 1];
  1486. if (insert_cpos > le32_to_cpu(rec->e_cpos))
  1487. return 1;
  1488. return 0;
  1489. }
  1490. static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos)
  1491. {
  1492. int next_free = le16_to_cpu(el->l_next_free_rec);
  1493. unsigned int range;
  1494. struct ocfs2_extent_rec *rec;
  1495. if (next_free == 0)
  1496. return 0;
  1497. rec = &el->l_recs[0];
  1498. if (ocfs2_is_empty_extent(rec)) {
  1499. if (next_free == 1)
  1500. return 0;
  1501. rec = &el->l_recs[1];
  1502. }
  1503. range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
  1504. if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range)
  1505. return 1;
  1506. return 0;
  1507. }
  1508. static int ocfs2_rotate_tree_right(handle_t *handle,
  1509. struct ocfs2_extent_tree *et,
  1510. enum ocfs2_split_type split,
  1511. u32 insert_cpos,
  1512. struct ocfs2_path *right_path,
  1513. struct ocfs2_path **ret_left_path)
  1514. {
  1515. int ret, start, orig_credits = handle->h_buffer_credits;
  1516. u32 cpos;
  1517. struct ocfs2_path *left_path = NULL;
  1518. struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
  1519. *ret_left_path = NULL;
  1520. left_path = ocfs2_new_path_from_path(right_path);
  1521. if (!left_path) {
  1522. ret = -ENOMEM;
  1523. mlog_errno(ret);
  1524. goto out;
  1525. }
  1526. ret = ocfs2_find_cpos_for_left_leaf(sb, right_path, &cpos);
  1527. if (ret) {
  1528. mlog_errno(ret);
  1529. goto out;
  1530. }
  1531. trace_ocfs2_rotate_tree_right(
  1532. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
  1533. insert_cpos, cpos);
  1534. while (cpos && insert_cpos <= cpos) {
  1535. trace_ocfs2_rotate_tree_right(
  1536. (unsigned long long)
  1537. ocfs2_metadata_cache_owner(et->et_ci),
  1538. insert_cpos, cpos);
  1539. ret = ocfs2_find_path(et->et_ci, left_path, cpos);
  1540. if (ret) {
  1541. mlog_errno(ret);
  1542. goto out;
  1543. }
  1544. mlog_bug_on_msg(path_leaf_bh(left_path) ==
  1545. path_leaf_bh(right_path),
  1546. "Owner %llu: error during insert of %u "
  1547. "(left path cpos %u) results in two identical "
  1548. "paths ending at %llu\n",
  1549. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
  1550. insert_cpos, cpos,
  1551. (unsigned long long)
  1552. path_leaf_bh(left_path)->b_blocknr);
  1553. if (split == SPLIT_NONE &&
  1554. ocfs2_rotate_requires_path_adjustment(left_path,
  1555. insert_cpos)) {
  1556. *ret_left_path = left_path;
  1557. goto out_ret_path;
  1558. }
  1559. start = ocfs2_find_subtree_root(et, left_path, right_path);
  1560. trace_ocfs2_rotate_subtree(start,
  1561. (unsigned long long)
  1562. right_path->p_node[start].bh->b_blocknr,
  1563. right_path->p_tree_depth);
  1564. ret = ocfs2_extend_rotate_transaction(handle, start,
  1565. orig_credits, right_path);
  1566. if (ret) {
  1567. mlog_errno(ret);
  1568. goto out;
  1569. }
  1570. ret = ocfs2_rotate_subtree_right(handle, et, left_path,
  1571. right_path, start);
  1572. if (ret) {
  1573. mlog_errno(ret);
  1574. goto out;
  1575. }
  1576. if (split != SPLIT_NONE &&
  1577. ocfs2_leftmost_rec_contains(path_leaf_el(right_path),
  1578. insert_cpos)) {
  1579. *ret_left_path = left_path;
  1580. goto out_ret_path;
  1581. }
  1582. ocfs2_mv_path(right_path, left_path);
  1583. ret = ocfs2_find_cpos_for_left_leaf(sb, right_path, &cpos);
  1584. if (ret) {
  1585. mlog_errno(ret);
  1586. goto out;
  1587. }
  1588. }
  1589. out:
  1590. ocfs2_free_path(left_path);
  1591. out_ret_path:
  1592. return ret;
  1593. }
  1594. static int ocfs2_update_edge_lengths(handle_t *handle,
  1595. struct ocfs2_extent_tree *et,
  1596. int subtree_index, struct ocfs2_path *path)
  1597. {
  1598. int i, idx, ret;
  1599. struct ocfs2_extent_rec *rec;
  1600. struct ocfs2_extent_list *el;
  1601. struct ocfs2_extent_block *eb;
  1602. u32 range;
  1603. ret = ocfs2_extend_trans(handle, subtree_index);
  1604. if (ret) {
  1605. mlog_errno(ret);
  1606. goto out;
  1607. }
  1608. ret = ocfs2_journal_access_path(et->et_ci, handle, path);
  1609. if (ret) {
  1610. mlog_errno(ret);
  1611. goto out;
  1612. }
  1613. eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
  1614. BUG_ON(eb->h_next_leaf_blk != 0ULL);
  1615. el = &eb->h_list;
  1616. BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0);
  1617. idx = le16_to_cpu(el->l_next_free_rec) - 1;
  1618. rec = &el->l_recs[idx];
  1619. range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
  1620. for (i = 0; i < path->p_tree_depth; i++) {
  1621. el = path->p_node[i].el;
  1622. idx = le16_to_cpu(el->l_next_free_rec) - 1;
  1623. rec = &el->l_recs[idx];
  1624. rec->e_int_clusters = cpu_to_le32(range);
  1625. le32_add_cpu(&rec->e_int_clusters, -le32_to_cpu(rec->e_cpos));
  1626. ocfs2_journal_dirty(handle, path->p_node[i].bh);
  1627. }
  1628. out:
  1629. return ret;
  1630. }
  1631. static void ocfs2_unlink_path(handle_t *handle,
  1632. struct ocfs2_extent_tree *et,
  1633. struct ocfs2_cached_dealloc_ctxt *dealloc,
  1634. struct ocfs2_path *path, int unlink_start)
  1635. {
  1636. int ret, i;
  1637. struct ocfs2_extent_block *eb;
  1638. struct ocfs2_extent_list *el;
  1639. struct buffer_head *bh;
  1640. for(i = unlink_start; i < path_num_items(path); i++) {
  1641. bh = path->p_node[i].bh;
  1642. eb = (struct ocfs2_extent_block *)bh->b_data;
  1643. el = &eb->h_list;
  1644. if (le16_to_cpu(el->l_next_free_rec) > 1) {
  1645. mlog(ML_ERROR,
  1646. "Inode %llu, attempted to remove extent block "
  1647. "%llu with %u records\n",
  1648. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
  1649. (unsigned long long)le64_to_cpu(eb->h_blkno),
  1650. le16_to_cpu(el->l_next_free_rec));
  1651. ocfs2_journal_dirty(handle, bh);
  1652. ocfs2_remove_from_cache(et->et_ci, bh);
  1653. continue;
  1654. }
  1655. el->l_next_free_rec = 0;
  1656. memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
  1657. ocfs2_journal_dirty(handle, bh);
  1658. ret = ocfs2_cache_extent_block_free(dealloc, eb);
  1659. if (ret)
  1660. mlog_errno(ret);
  1661. ocfs2_remove_from_cache(et->et_ci, bh);
  1662. }
  1663. }
  1664. static void ocfs2_unlink_subtree(handle_t *handle,
  1665. struct ocfs2_extent_tree *et,
  1666. struct ocfs2_path *left_path,
  1667. struct ocfs2_path *right_path,
  1668. int subtree_index,
  1669. struct ocfs2_cached_dealloc_ctxt *dealloc)
  1670. {
  1671. int i;
  1672. struct buffer_head *root_bh = left_path->p_node[subtree_index].bh;
  1673. struct ocfs2_extent_list *root_el = left_path->p_node[subtree_index].el;
  1674. struct ocfs2_extent_list *el;
  1675. struct ocfs2_extent_block *eb;
  1676. el = path_leaf_el(left_path);
  1677. eb = (struct ocfs2_extent_block *)right_path->p_node[subtree_index + 1].bh->b_data;
  1678. for(i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++)
  1679. if (root_el->l_recs[i].e_blkno == eb->h_blkno)
  1680. break;
  1681. BUG_ON(i >= le16_to_cpu(root_el->l_next_free_rec));
  1682. memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
  1683. le16_add_cpu(&root_el->l_next_free_rec, -1);
  1684. eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
  1685. eb->h_next_leaf_blk = 0;
  1686. ocfs2_journal_dirty(handle, root_bh);
  1687. ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
  1688. ocfs2_unlink_path(handle, et, dealloc, right_path,
  1689. subtree_index + 1);
  1690. }
  1691. static int ocfs2_rotate_subtree_left(handle_t *handle,
  1692. struct ocfs2_extent_tree *et,
  1693. struct ocfs2_path *left_path,
  1694. struct ocfs2_path *right_path,
  1695. int subtree_index,
  1696. struct ocfs2_cached_dealloc_ctxt *dealloc,
  1697. int *deleted)
  1698. {
  1699. int ret, i, del_right_subtree = 0, right_has_empty = 0;
  1700. struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path);
  1701. struct ocfs2_extent_list *right_leaf_el, *left_leaf_el;
  1702. struct ocfs2_extent_block *eb;
  1703. *deleted = 0;
  1704. right_leaf_el = path_leaf_el(right_path);
  1705. left_leaf_el = path_leaf_el(left_path);
  1706. root_bh = left_path->p_node[subtree_index].bh;
  1707. BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
  1708. if (!ocfs2_is_empty_extent(&left_leaf_el->l_recs[0]))
  1709. return 0;
  1710. eb = (struct ocfs2_extent_block *)path_leaf_bh(right_path)->b_data;
  1711. if (ocfs2_is_empty_extent(&right_leaf_el->l_recs[0])) {
  1712. if (eb->h_next_leaf_blk != 0ULL)
  1713. return -EAGAIN;
  1714. if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) {
  1715. ret = ocfs2_journal_access_eb(handle, et->et_ci,
  1716. path_leaf_bh(right_path),
  1717. OCFS2_JOURNAL_ACCESS_WRITE);
  1718. if (ret) {
  1719. mlog_errno(ret);
  1720. goto out;
  1721. }
  1722. ocfs2_remove_empty_extent(right_leaf_el);
  1723. } else
  1724. right_has_empty = 1;
  1725. }
  1726. if (eb->h_next_leaf_blk == 0ULL &&
  1727. le16_to_cpu(right_leaf_el->l_next_free_rec) == 1) {
  1728. ret = ocfs2_et_root_journal_access(handle, et,
  1729. OCFS2_JOURNAL_ACCESS_WRITE);
  1730. if (ret) {
  1731. mlog_errno(ret);
  1732. goto out;
  1733. }
  1734. del_right_subtree = 1;
  1735. }
  1736. BUG_ON(right_has_empty && !del_right_subtree);
  1737. ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
  1738. subtree_index);
  1739. if (ret) {
  1740. mlog_errno(ret);
  1741. goto out;
  1742. }
  1743. for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
  1744. ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
  1745. right_path, i);
  1746. if (ret) {
  1747. mlog_errno(ret);
  1748. goto out;
  1749. }
  1750. ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
  1751. left_path, i);
  1752. if (ret) {
  1753. mlog_errno(ret);
  1754. goto out;
  1755. }
  1756. }
  1757. if (!right_has_empty) {
  1758. ocfs2_rotate_leaf(left_leaf_el, &right_leaf_el->l_recs[0]);
  1759. memset(&right_leaf_el->l_recs[0], 0,
  1760. sizeof(struct ocfs2_extent_rec));
  1761. }
  1762. if (eb->h_next_leaf_blk == 0ULL) {
  1763. ocfs2_remove_empty_extent(right_leaf_el);
  1764. }
  1765. ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
  1766. ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
  1767. if (del_right_subtree) {
  1768. ocfs2_unlink_subtree(handle, et, left_path, right_path,
  1769. subtree_index, dealloc);
  1770. ret = ocfs2_update_edge_lengths(handle, et, subtree_index,
  1771. left_path);
  1772. if (ret) {
  1773. mlog_errno(ret);
  1774. goto out;
  1775. }
  1776. eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
  1777. ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
  1778. if (right_has_empty)
  1779. ocfs2_remove_empty_extent(left_leaf_el);
  1780. ocfs2_journal_dirty(handle, et_root_bh);
  1781. *deleted = 1;
  1782. } else
  1783. ocfs2_complete_edge_insert(handle, left_path, right_path,
  1784. subtree_index);
  1785. out:
  1786. return ret;
  1787. }
  1788. int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
  1789. struct ocfs2_path *path, u32 *cpos)
  1790. {
  1791. int i, j, ret = 0;
  1792. u64 blkno;
  1793. struct ocfs2_extent_list *el;
  1794. *cpos = 0;
  1795. if (path->p_tree_depth == 0)
  1796. return 0;
  1797. blkno = path_leaf_bh(path)->b_blocknr;
  1798. i = path->p_tree_depth - 1;
  1799. while (i >= 0) {
  1800. int next_free;
  1801. el = path->p_node[i].el;
  1802. next_free = le16_to_cpu(el->l_next_free_rec);
  1803. for(j = 0; j < le16_to_cpu(el->l_next_free_rec); j++) {
  1804. if (le64_to_cpu(el->l_recs[j].e_blkno) == blkno) {
  1805. if (j == (next_free - 1)) {
  1806. if (i == 0) {
  1807. goto out;
  1808. }
  1809. goto next_node;
  1810. }
  1811. *cpos = le32_to_cpu(el->l_recs[j + 1].e_cpos);
  1812. goto out;
  1813. }
  1814. }
  1815. ocfs2_error(sb,
  1816. "Invalid extent tree at extent block %llu\n",
  1817. (unsigned long long)blkno);
  1818. ret = -EROFS;
  1819. goto out;
  1820. next_node:
  1821. blkno = path->p_node[i].bh->b_blocknr;
  1822. i--;
  1823. }
  1824. out:
  1825. return ret;
  1826. }
  1827. static int ocfs2_rotate_rightmost_leaf_left(handle_t *handle,
  1828. struct ocfs2_extent_tree *et,
  1829. struct ocfs2_path *path)
  1830. {
  1831. int ret;
  1832. struct buffer_head *bh = path_leaf_bh(path);
  1833. struct ocfs2_extent_list *el = path_leaf_el(path);
  1834. if (!ocfs2_is_empty_extent(&el->l_recs[0]))
  1835. return 0;
  1836. ret = ocfs2_path_bh_journal_access(handle, et->et_ci, path,
  1837. path_num_items(path) - 1);
  1838. if (ret) {
  1839. mlog_errno(ret);
  1840. goto out;
  1841. }
  1842. ocfs2_remove_empty_extent(el);
  1843. ocfs2_journal_dirty(handle, bh);
  1844. out:
  1845. return ret;
  1846. }
  1847. static int __ocfs2_rotate_tree_left(handle_t *handle,
  1848. struct ocfs2_extent_tree *et,
  1849. int orig_credits,
  1850. struct ocfs2_path *path,
  1851. struct ocfs2_cached_dealloc_ctxt *dealloc,
  1852. struct ocfs2_path **empty_extent_path)
  1853. {
  1854. int ret, subtree_root, deleted;
  1855. u32 right_cpos;
  1856. struct ocfs2_path *left_path = NULL;
  1857. struct ocfs2_path *right_path = NULL;
  1858. struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
  1859. BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0])));
  1860. *empty_extent_path = NULL;
  1861. ret = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
  1862. if (ret) {
  1863. mlog_errno(ret);
  1864. goto out;
  1865. }
  1866. left_path = ocfs2_new_path_from_path(path);
  1867. if (!left_path) {
  1868. ret = -ENOMEM;
  1869. mlog_errno(ret);
  1870. goto out;
  1871. }
  1872. ocfs2_cp_path(left_path, path);
  1873. right_path = ocfs2_new_path_from_path(path);
  1874. if (!right_path) {
  1875. ret = -ENOMEM;
  1876. mlog_errno(ret);
  1877. goto out;
  1878. }
  1879. while (right_cpos) {
  1880. ret = ocfs2_find_path(et->et_ci, right_path, right_cpos);
  1881. if (ret) {
  1882. mlog_errno(ret);
  1883. goto out;
  1884. }
  1885. subtree_root = ocfs2_find_subtree_root(et, left_path,
  1886. right_path);
  1887. trace_ocfs2_rotate_subtree(subtree_root,
  1888. (unsigned long long)
  1889. right_path->p_node[subtree_root].bh->b_blocknr,
  1890. right_path->p_tree_depth);
  1891. ret = ocfs2_extend_rotate_transaction(handle, subtree_root,
  1892. orig_credits, left_path);
  1893. if (ret) {
  1894. mlog_errno(ret);
  1895. goto out;
  1896. }
  1897. ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
  1898. left_path, 0);
  1899. if (ret) {
  1900. mlog_errno(ret);
  1901. goto out;
  1902. }
  1903. ret = ocfs2_rotate_subtree_left(handle, et, left_path,
  1904. right_path, subtree_root,
  1905. dealloc, &deleted);
  1906. if (ret == -EAGAIN) {
  1907. *empty_extent_path = right_path;
  1908. right_path = NULL;
  1909. goto out;
  1910. }
  1911. if (ret) {
  1912. mlog_errno(ret);
  1913. goto out;
  1914. }
  1915. if (deleted)
  1916. break;
  1917. ocfs2_mv_path(left_path, right_path);
  1918. ret = ocfs2_find_cpos_for_right_leaf(sb, left_path,
  1919. &right_cpos);
  1920. if (ret) {
  1921. mlog_errno(ret);
  1922. goto out;
  1923. }
  1924. }
  1925. out:
  1926. ocfs2_free_path(right_path);
  1927. ocfs2_free_path(left_path);
  1928. return ret;
  1929. }
  1930. static int ocfs2_remove_rightmost_path(handle_t *handle,
  1931. struct ocfs2_extent_tree *et,
  1932. struct ocfs2_path *path,
  1933. struct ocfs2_cached_dealloc_ctxt *dealloc)
  1934. {
  1935. int ret, subtree_index;
  1936. u32 cpos;
  1937. struct ocfs2_path *left_path = NULL;
  1938. struct ocfs2_extent_block *eb;
  1939. struct ocfs2_extent_list *el;
  1940. ret = ocfs2_et_sanity_check(et);
  1941. if (ret)
  1942. goto out;
  1943. ret = ocfs2_extend_rotate_transaction(handle, 0,
  1944. handle->h_buffer_credits,
  1945. path);
  1946. if (ret) {
  1947. mlog_errno(ret);
  1948. goto out;
  1949. }
  1950. ret = ocfs2_journal_access_path(et->et_ci, handle, path);
  1951. if (ret) {
  1952. mlog_errno(ret);
  1953. goto out;
  1954. }
  1955. ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
  1956. path, &cpos);
  1957. if (ret) {
  1958. mlog_errno(ret);
  1959. goto out;
  1960. }
  1961. if (cpos) {
  1962. left_path = ocfs2_new_path_from_path(path);
  1963. if (!left_path) {
  1964. ret = -ENOMEM;
  1965. mlog_errno(ret);
  1966. goto out;
  1967. }
  1968. ret = ocfs2_find_path(et->et_ci, left_path, cpos);
  1969. if (ret) {
  1970. mlog_errno(ret);
  1971. goto out;
  1972. }
  1973. ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
  1974. if (ret) {
  1975. mlog_errno(ret);
  1976. goto out;
  1977. }
  1978. subtree_index = ocfs2_find_subtree_root(et, left_path, path);
  1979. ocfs2_unlink_subtree(handle, et, left_path, path,
  1980. subtree_index, dealloc);
  1981. ret = ocfs2_update_edge_lengths(handle, et, subtree_index,
  1982. left_path);
  1983. if (ret) {
  1984. mlog_errno(ret);
  1985. goto out;
  1986. }
  1987. eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
  1988. ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
  1989. } else {
  1990. ocfs2_unlink_path(handle, et, dealloc, path, 1);
  1991. el = et->et_root_el;
  1992. el->l_tree_depth = 0;
  1993. el->l_next_free_rec = 0;
  1994. memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
  1995. ocfs2_et_set_last_eb_blk(et, 0);
  1996. }
  1997. ocfs2_journal_dirty(handle, path_root_bh(path));
  1998. out:
  1999. ocfs2_free_path(left_path);
  2000. return ret;
  2001. }
  2002. static int ocfs2_rotate_tree_left(handle_t *handle,
  2003. struct ocfs2_extent_tree *et,
  2004. struct ocfs2_path *path,
  2005. struct ocfs2_cached_dealloc_ctxt *dealloc)
  2006. {
  2007. int ret, orig_credits = handle->h_buffer_credits;
  2008. struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
  2009. struct ocfs2_extent_block *eb;
  2010. struct ocfs2_extent_list *el;
  2011. el = path_leaf_el(path);
  2012. if (!ocfs2_is_empty_extent(&el->l_recs[0]))
  2013. return 0;
  2014. if (path->p_tree_depth == 0) {
  2015. rightmost_no_delete:
  2016. ret = ocfs2_rotate_rightmost_leaf_left(handle, et, path);
  2017. if (ret)
  2018. mlog_errno(ret);
  2019. goto out;
  2020. }
  2021. eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
  2022. el = &eb->h_list;
  2023. if (eb->h_next_leaf_blk == 0) {
  2024. if (le16_to_cpu(el->l_next_free_rec) > 1)
  2025. goto rightmost_no_delete;
  2026. if (le16_to_cpu(el->l_next_free_rec) == 0) {
  2027. ret = -EIO;
  2028. ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
  2029. "Owner %llu has empty extent block at %llu",
  2030. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
  2031. (unsigned long long)le64_to_cpu(eb->h_blkno));
  2032. goto out;
  2033. }
  2034. ret = ocfs2_remove_rightmost_path(handle, et, path,
  2035. dealloc);
  2036. if (ret)
  2037. mlog_errno(ret);
  2038. goto out;
  2039. }
  2040. try_rotate:
  2041. ret = __ocfs2_rotate_tree_left(handle, et, orig_credits, path,
  2042. dealloc, &restart_path);
  2043. if (ret && ret != -EAGAIN) {
  2044. mlog_errno(ret);
  2045. goto out;
  2046. }
  2047. while (ret == -EAGAIN) {
  2048. tmp_path = restart_path;
  2049. restart_path = NULL;
  2050. ret = __ocfs2_rotate_tree_left(handle, et, orig_credits,
  2051. tmp_path, dealloc,
  2052. &restart_path);
  2053. if (ret && ret != -EAGAIN) {
  2054. mlog_errno(ret);
  2055. goto out;
  2056. }
  2057. ocfs2_free_path(tmp_path);
  2058. tmp_path = NULL;
  2059. if (ret == 0)
  2060. goto try_rotate;
  2061. }
  2062. out:
  2063. ocfs2_free_path(tmp_path);
  2064. ocfs2_free_path(restart_path);
  2065. return ret;
  2066. }
  2067. static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el,
  2068. int index)
  2069. {
  2070. struct ocfs2_extent_rec *rec = &el->l_recs[index];
  2071. unsigned int size;
  2072. if (rec->e_leaf_clusters == 0) {
  2073. if (index > 0) {
  2074. BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0]));
  2075. size = index * sizeof(struct ocfs2_extent_rec);
  2076. memmove(&el->l_recs[1], &el->l_recs[0], size);
  2077. }
  2078. memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
  2079. }
  2080. }
  2081. static int ocfs2_get_right_path(struct ocfs2_extent_tree *et,
  2082. struct ocfs2_path *left_path,
  2083. struct ocfs2_path **ret_right_path)
  2084. {
  2085. int ret;
  2086. u32 right_cpos;
  2087. struct ocfs2_path *right_path = NULL;
  2088. struct ocfs2_extent_list *left_el;
  2089. *ret_right_path = NULL;
  2090. BUG_ON(left_path->p_tree_depth == 0);
  2091. left_el = path_leaf_el(left_path);
  2092. BUG_ON(left_el->l_next_free_rec != left_el->l_count);
  2093. ret = ocfs2_find_cpos_for_right_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
  2094. left_path, &right_cpos);
  2095. if (ret) {
  2096. mlog_errno(ret);
  2097. goto out;
  2098. }
  2099. BUG_ON(right_cpos == 0);
  2100. right_path = ocfs2_new_path_from_path(left_path);
  2101. if (!right_path) {
  2102. ret = -ENOMEM;
  2103. mlog_errno(ret);
  2104. goto out;
  2105. }
  2106. ret = ocfs2_find_path(et->et_ci, right_path, right_cpos);
  2107. if (ret) {
  2108. mlog_errno(ret);
  2109. goto out;
  2110. }
  2111. *ret_right_path = right_path;
  2112. out:
  2113. if (ret)
  2114. ocfs2_free_path(right_path);
  2115. return ret;
  2116. }
  2117. static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
  2118. handle_t *handle,
  2119. struct ocfs2_extent_tree *et,
  2120. struct ocfs2_extent_rec *split_rec,
  2121. int index)
  2122. {
  2123. int ret, next_free, i;
  2124. unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
  2125. struct ocfs2_extent_rec *left_rec;
  2126. struct ocfs2_extent_rec *right_rec;
  2127. struct ocfs2_extent_list *right_el;
  2128. struct ocfs2_path *right_path = NULL;
  2129. int subtree_index = 0;
  2130. struct ocfs2_extent_list *el = path_leaf_el(left_path);
  2131. struct buffer_head *bh = path_leaf_bh(left_path);
  2132. struct buffer_head *root_bh = NULL;
  2133. BUG_ON(index >= le16_to_cpu(el->l_next_free_rec));
  2134. left_rec = &el->l_recs[index];
  2135. if (index == le16_to_cpu(el->l_next_free_rec) - 1 &&
  2136. le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) {
  2137. ret = ocfs2_get_right_path(et, left_path, &right_path);
  2138. if (ret) {
  2139. mlog_errno(ret);
  2140. goto out;
  2141. }
  2142. right_el = path_leaf_el(right_path);
  2143. next_free = le16_to_cpu(right_el->l_next_free_rec);
  2144. BUG_ON(next_free <= 0);
  2145. right_rec = &right_el->l_recs[0];
  2146. if (ocfs2_is_empty_extent(right_rec)) {
  2147. BUG_ON(next_free <= 1);
  2148. right_rec = &right_el->l_recs[1];
  2149. }
  2150. BUG_ON(le32_to_cpu(left_rec->e_cpos) +
  2151. le16_to_cpu(left_rec->e_leaf_clusters) !=
  2152. le32_to_cpu(right_rec->e_cpos));
  2153. subtree_index = ocfs2_find_subtree_root(et, left_path,
  2154. right_path);
  2155. ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
  2156. handle->h_buffer_credits,
  2157. right_path);
  2158. if (ret) {
  2159. mlog_errno(ret);
  2160. goto out;
  2161. }
  2162. root_bh = left_path->p_node[subtree_index].bh;
  2163. BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
  2164. ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
  2165. subtree_index);
  2166. if (ret) {
  2167. mlog_errno(ret);
  2168. goto out;
  2169. }
  2170. for (i = subtree_index + 1;
  2171. i < path_num_items(right_path); i++) {
  2172. ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
  2173. right_path, i);
  2174. if (ret) {
  2175. mlog_errno(ret);
  2176. goto out;
  2177. }
  2178. ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
  2179. left_path, i);
  2180. if (ret) {
  2181. mlog_errno(ret);
  2182. goto out;
  2183. }
  2184. }
  2185. } else {
  2186. BUG_ON(index == le16_to_cpu(el->l_next_free_rec) - 1);
  2187. right_rec = &el->l_recs[index + 1];
  2188. }
  2189. ret = ocfs2_path_bh_journal_access(handle, et->et_ci, left_path,
  2190. path_num_items(left_path) - 1);
  2191. if (ret) {
  2192. mlog_errno(ret);
  2193. goto out;
  2194. }
  2195. le16_add_cpu(&left_rec->e_leaf_clusters, -split_clusters);
  2196. le32_add_cpu(&right_rec->e_cpos, -split_clusters);
  2197. le64_add_cpu(&right_rec->e_blkno,
  2198. -ocfs2_clusters_to_blocks(ocfs2_metadata_cache_get_super(et->et_ci),
  2199. split_clusters));
  2200. le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters);
  2201. ocfs2_cleanup_merge(el, index);
  2202. ocfs2_journal_dirty(handle, bh);
  2203. if (right_path) {
  2204. ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
  2205. ocfs2_complete_edge_insert(handle, left_path, right_path,
  2206. subtree_index);
  2207. }
  2208. out:
  2209. if (right_path)
  2210. ocfs2_free_path(right_path);
  2211. return ret;
  2212. }
  2213. static int ocfs2_get_left_path(struct ocfs2_extent_tree *et,
  2214. struct ocfs2_path *right_path,
  2215. struct ocfs2_path **ret_left_path)
  2216. {
  2217. int ret;
  2218. u32 left_cpos;
  2219. struct ocfs2_path *left_path = NULL;
  2220. *ret_left_path = NULL;
  2221. BUG_ON(right_path->p_tree_depth == 0);
  2222. ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
  2223. right_path, &left_cpos);
  2224. if (ret) {
  2225. mlog_errno(ret);
  2226. goto out;
  2227. }
  2228. BUG_ON(left_cpos == 0);
  2229. left_path = ocfs2_new_path_from_path(right_path);
  2230. if (!left_path) {
  2231. ret = -ENOMEM;
  2232. mlog_errno(ret);
  2233. goto out;
  2234. }
  2235. ret = ocfs2_find_path(et->et_ci, left_path, left_cpos);
  2236. if (ret) {
  2237. mlog_errno(ret);
  2238. goto out;
  2239. }
  2240. *ret_left_path = left_path;
  2241. out:
  2242. if (ret)
  2243. ocfs2_free_path(left_path);
  2244. return ret;
  2245. }
  2246. static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
  2247. handle_t *handle,
  2248. struct ocfs2_extent_tree *et,
  2249. struct ocfs2_extent_rec *split_rec,
  2250. struct ocfs2_cached_dealloc_ctxt *dealloc,
  2251. int index)
  2252. {
  2253. int ret, i, subtree_index = 0, has_empty_extent = 0;
  2254. unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
  2255. struct ocfs2_extent_rec *left_rec;
  2256. struct ocfs2_extent_rec *right_rec;
  2257. struct ocfs2_extent_list *el = path_leaf_el(right_path);
  2258. struct buffer_head *bh = path_leaf_bh(right_path);
  2259. struct buffer_head *root_bh = NULL;
  2260. struct ocfs2_path *left_path = NULL;
  2261. struct ocfs2_extent_list *left_el;
  2262. BUG_ON(index < 0);
  2263. right_rec = &el->l_recs[index];
  2264. if (index == 0) {
  2265. ret = ocfs2_get_left_path(et, right_path, &left_path);
  2266. if (ret) {
  2267. mlog_errno(ret);
  2268. goto out;
  2269. }
  2270. left_el = path_leaf_el(left_path);
  2271. BUG_ON(le16_to_cpu(left_el->l_next_free_rec) !=
  2272. le16_to_cpu(left_el->l_count));
  2273. left_rec = &left_el->l_recs[
  2274. le16_to_cpu(left_el->l_next_free_rec) - 1];
  2275. BUG_ON(le32_to_cpu(left_rec->e_cpos) +
  2276. le16_to_cpu(left_rec->e_leaf_clusters) !=
  2277. le32_to_cpu(split_rec->e_cpos));
  2278. subtree_index = ocfs2_find_subtree_root(et, left_path,
  2279. right_path);
  2280. ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
  2281. handle->h_buffer_credits,
  2282. left_path);
  2283. if (ret) {
  2284. mlog_errno(ret);
  2285. goto out;
  2286. }
  2287. root_bh = left_path->p_node[subtree_index].bh;
  2288. BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
  2289. ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
  2290. subtree_index);
  2291. if (ret) {
  2292. mlog_errno(ret);
  2293. goto out;
  2294. }
  2295. for (i = subtree_index + 1;
  2296. i < path_num_items(right_path); i++) {
  2297. ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
  2298. right_path, i);
  2299. if (ret) {
  2300. mlog_errno(ret);
  2301. goto out;
  2302. }
  2303. ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
  2304. left_path, i);
  2305. if (ret) {
  2306. mlog_errno(ret);
  2307. goto out;
  2308. }
  2309. }
  2310. } else {
  2311. left_rec = &el->l_recs[index - 1];
  2312. if (ocfs2_is_empty_extent(&el->l_recs[0]))
  2313. has_empty_extent = 1;
  2314. }
  2315. ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
  2316. path_num_items(right_path) - 1);
  2317. if (ret) {
  2318. mlog_errno(ret);
  2319. goto out;
  2320. }
  2321. if (has_empty_extent && index == 1) {
  2322. *left_rec = *split_rec;
  2323. has_empty_extent = 0;
  2324. } else
  2325. le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters);
  2326. le32_add_cpu(&right_rec->e_cpos, split_clusters);
  2327. le64_add_cpu(&right_rec->e_blkno,
  2328. ocfs2_clusters_to_blocks(ocfs2_metadata_cache_get_super(et->et_ci),
  2329. split_clusters));
  2330. le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters);
  2331. ocfs2_cleanup_merge(el, index);
  2332. ocfs2_journal_dirty(handle, bh);
  2333. if (left_path) {
  2334. ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
  2335. if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 &&
  2336. le16_to_cpu(el->l_next_free_rec) == 1) {
  2337. ret = ocfs2_remove_rightmost_path(handle, et,
  2338. right_path,
  2339. dealloc);
  2340. if (ret) {
  2341. mlog_errno(ret);
  2342. goto out;
  2343. }
  2344. ocfs2_mv_path(right_path, left_path);
  2345. left_path = NULL;
  2346. } else
  2347. ocfs2_complete_edge_insert(handle, left_path,
  2348. right_path, subtree_index);
  2349. }
  2350. out:
  2351. if (left_path)
  2352. ocfs2_free_path(left_path);
  2353. return ret;
  2354. }
  2355. static int ocfs2_try_to_merge_extent(handle_t *handle,
  2356. struct ocfs2_extent_tree *et,
  2357. struct ocfs2_path *path,
  2358. int split_index,
  2359. struct ocfs2_extent_rec *split_rec,
  2360. struct ocfs2_cached_dealloc_ctxt *dealloc,
  2361. struct ocfs2_merge_ctxt *ctxt)
  2362. {
  2363. int ret = 0;
  2364. struct ocfs2_extent_list *el = path_leaf_el(path);
  2365. struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
  2366. BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
  2367. if (ctxt->c_split_covers_rec && ctxt->c_has_empty_extent) {
  2368. ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
  2369. if (ret) {
  2370. mlog_errno(ret);
  2371. goto out;
  2372. }
  2373. split_index--;
  2374. rec = &el->l_recs[split_index];
  2375. }
  2376. if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) {
  2377. BUG_ON(!ctxt->c_split_covers_rec);
  2378. ret = ocfs2_merge_rec_right(path, handle, et, split_rec,
  2379. split_index);
  2380. if (ret) {
  2381. mlog_errno(ret);
  2382. goto out;
  2383. }
  2384. BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
  2385. ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
  2386. if (ret) {
  2387. mlog_errno(ret);
  2388. goto out;
  2389. }
  2390. rec = &el->l_recs[split_index];
  2391. ret = ocfs2_merge_rec_left(path, handle, et, rec,
  2392. dealloc, split_index);
  2393. if (ret) {
  2394. mlog_errno(ret);
  2395. goto out;
  2396. }
  2397. ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
  2398. if (ret)
  2399. mlog_errno(ret);
  2400. ret = 0;
  2401. } else {
  2402. if (ctxt->c_contig_type == CONTIG_RIGHT) {
  2403. ret = ocfs2_merge_rec_left(path, handle, et,
  2404. split_rec, dealloc,
  2405. split_index);
  2406. if (ret) {
  2407. mlog_errno(ret);
  2408. goto out;
  2409. }
  2410. } else {
  2411. ret = ocfs2_merge_rec_right(path, handle,
  2412. et, split_rec,
  2413. split_index);
  2414. if (ret) {
  2415. mlog_errno(ret);
  2416. goto out;
  2417. }
  2418. }
  2419. if (ctxt->c_split_covers_rec) {
  2420. ret = ocfs2_rotate_tree_left(handle, et, path,
  2421. dealloc);
  2422. if (ret)
  2423. mlog_errno(ret);
  2424. ret = 0;
  2425. }
  2426. }
  2427. out:
  2428. return ret;
  2429. }
  2430. static void ocfs2_subtract_from_rec(struct super_block *sb,
  2431. enum ocfs2_split_type split,
  2432. struct ocfs2_extent_rec *rec,
  2433. struct ocfs2_extent_rec *split_rec)
  2434. {
  2435. u64 len_blocks;
  2436. len_blocks = ocfs2_clusters_to_blocks(sb,
  2437. le16_to_cpu(split_rec->e_leaf_clusters));
  2438. if (split == SPLIT_LEFT) {
  2439. le32_add_cpu(&rec->e_cpos,
  2440. le16_to_cpu(split_rec->e_leaf_clusters));
  2441. le64_add_cpu(&rec->e_blkno, len_blocks);
  2442. le16_add_cpu(&rec->e_leaf_clusters,
  2443. -le16_to_cpu(split_rec->e_leaf_clusters));
  2444. } else {
  2445. le16_add_cpu(&rec->e_leaf_clusters,
  2446. -le16_to_cpu(split_rec->e_leaf_clusters));
  2447. }
  2448. }
  2449. static void ocfs2_insert_at_leaf(struct ocfs2_extent_tree *et,
  2450. struct ocfs2_extent_rec *insert_rec,
  2451. struct ocfs2_extent_list *el,
  2452. struct ocfs2_insert_type *insert)
  2453. {
  2454. int i = insert->ins_contig_index;
  2455. unsigned int range;
  2456. struct ocfs2_extent_rec *rec;
  2457. BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
  2458. if (insert->ins_split != SPLIT_NONE) {
  2459. i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos));
  2460. BUG_ON(i == -1);
  2461. rec = &el->l_recs[i];
  2462. ocfs2_subtract_from_rec(ocfs2_metadata_cache_get_super(et->et_ci),
  2463. insert->ins_split, rec,
  2464. insert_rec);
  2465. goto rotate;
  2466. }
  2467. if (insert->ins_contig != CONTIG_NONE) {
  2468. rec = &el->l_recs[i];
  2469. if (insert->ins_contig == CONTIG_LEFT) {
  2470. rec->e_blkno = insert_rec->e_blkno;
  2471. rec->e_cpos = insert_rec->e_cpos;
  2472. }
  2473. le16_add_cpu(&rec->e_leaf_clusters,
  2474. le16_to_cpu(insert_rec->e_leaf_clusters));
  2475. return;
  2476. }
  2477. if (le16_to_cpu(el->l_next_free_rec) == 0 ||
  2478. ((le16_to_cpu(el->l_next_free_rec) == 1) &&
  2479. ocfs2_is_empty_extent(&el->l_recs[0]))) {
  2480. el->l_recs[0] = *insert_rec;
  2481. el->l_next_free_rec = cpu_to_le16(1);
  2482. return;
  2483. }
  2484. if (insert->ins_appending == APPEND_TAIL) {
  2485. i = le16_to_cpu(el->l_next_free_rec) - 1;
  2486. rec = &el->l_recs[i];
  2487. range = le32_to_cpu(rec->e_cpos)
  2488. + le16_to_cpu(rec->e_leaf_clusters);
  2489. BUG_ON(le32_to_cpu(insert_rec->e_cpos) < range);
  2490. mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >=
  2491. le16_to_cpu(el->l_count),
  2492. "owner %llu, depth %u, count %u, next free %u, "
  2493. "rec.cpos %u, rec.clusters %u, "
  2494. "insert.cpos %u, insert.clusters %u\n",
  2495. ocfs2_metadata_cache_owner(et->et_ci),
  2496. le16_to_cpu(el->l_tree_depth),
  2497. le16_to_cpu(el->l_count),
  2498. le16_to_cpu(el->l_next_free_rec),
  2499. le32_to_cpu(el->l_recs[i].e_cpos),
  2500. le16_to_cpu(el->l_recs[i].e_leaf_clusters),
  2501. le32_to_cpu(insert_rec->e_cpos),
  2502. le16_to_cpu(insert_rec->e_leaf_clusters));
  2503. i++;
  2504. el->l_recs[i] = *insert_rec;
  2505. le16_add_cpu(&el->l_next_free_rec, 1);
  2506. return;
  2507. }
  2508. rotate:
  2509. ocfs2_rotate_leaf(el, insert_rec);
  2510. }
  2511. static void ocfs2_adjust_rightmost_records(handle_t *handle,
  2512. struct ocfs2_extent_tree *et,
  2513. struct ocfs2_path *path,
  2514. struct ocfs2_extent_rec *insert_rec)
  2515. {
  2516. int ret, i, next_free;
  2517. struct buffer_head *bh;
  2518. struct ocfs2_extent_list *el;
  2519. struct ocfs2_extent_rec *rec;
  2520. for (i = 0; i < path->p_tree_depth; i++) {
  2521. bh = path->p_node[i].bh;
  2522. el = path->p_node[i].el;
  2523. next_free = le16_to_cpu(el->l_next_free_rec);
  2524. if (next_free == 0) {
  2525. ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
  2526. "Owner %llu has a bad extent list",
  2527. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
  2528. ret = -EIO;
  2529. return;
  2530. }
  2531. rec = &el->l_recs[next_free - 1];
  2532. rec->e_int_clusters = insert_rec->e_cpos;
  2533. le32_add_cpu(&rec->e_int_clusters,
  2534. le16_to_cpu(insert_rec->e_leaf_clusters));
  2535. le32_add_cpu(&rec->e_int_clusters,
  2536. -le32_to_cpu(rec->e_cpos));
  2537. ocfs2_journal_dirty(handle, bh);
  2538. }
  2539. }
  2540. static int ocfs2_append_rec_to_path(handle_t *handle,
  2541. struct ocfs2_extent_tree *et,
  2542. struct ocfs2_extent_rec *insert_rec,
  2543. struct ocfs2_path *right_path,
  2544. struct ocfs2_path **ret_left_path)
  2545. {
  2546. int ret, next_free;
  2547. struct ocfs2_extent_list *el;
  2548. struct ocfs2_path *left_path = NULL;
  2549. *ret_left_path = NULL;
  2550. BUG_ON(right_path->p_tree_depth == 0);
  2551. el = path_leaf_el(right_path);
  2552. next_free = le16_to_cpu(el->l_next_free_rec);
  2553. if (next_free == 0 ||
  2554. (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) {
  2555. u32 left_cpos;
  2556. ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
  2557. right_path, &left_cpos);
  2558. if (ret) {
  2559. mlog_errno(ret);
  2560. goto out;
  2561. }
  2562. trace_ocfs2_append_rec_to_path(
  2563. (unsigned long long)
  2564. ocfs2_metadata_cache_owner(et->et_ci),
  2565. le32_to_cpu(insert_rec->e_cpos),
  2566. left_cpos);
  2567. if (left_cpos) {
  2568. left_path = ocfs2_new_path_from_path(right_path);
  2569. if (!left_path) {
  2570. ret = -ENOMEM;
  2571. mlog_errno(ret);
  2572. goto out;
  2573. }
  2574. ret = ocfs2_find_path(et->et_ci, left_path,
  2575. left_cpos);
  2576. if (ret) {
  2577. mlog_errno(ret);
  2578. goto out;
  2579. }
  2580. }
  2581. }
  2582. ret = ocfs2_journal_access_path(et->et_ci, handle, right_path);
  2583. if (ret) {
  2584. mlog_errno(ret);
  2585. goto out;
  2586. }
  2587. ocfs2_adjust_rightmost_records(handle, et, right_path, insert_rec);
  2588. *ret_left_path = left_path;
  2589. ret = 0;
  2590. out:
  2591. if (ret != 0)
  2592. ocfs2_free_path(left_path);
  2593. return ret;
  2594. }
  2595. static void ocfs2_split_record(struct ocfs2_extent_tree *et,
  2596. struct ocfs2_path *left_path,
  2597. struct ocfs2_path *right_path,
  2598. struct ocfs2_extent_rec *split_rec,
  2599. enum ocfs2_split_type split)
  2600. {
  2601. int index;
  2602. u32 cpos = le32_to_cpu(split_rec->e_cpos);
  2603. struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el;
  2604. struct ocfs2_extent_rec *rec, *tmprec;
  2605. right_el = path_leaf_el(right_path);
  2606. if (left_path)
  2607. left_el = path_leaf_el(left_path);
  2608. el = right_el;
  2609. insert_el = right_el;
  2610. index = ocfs2_search_extent_list(el, cpos);
  2611. if (index != -1) {
  2612. if (index == 0 && left_path) {
  2613. BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0]));
  2614. if (split == SPLIT_LEFT) {
  2615. insert_el = left_el;
  2616. } else {
  2617. tmprec = &right_el->l_recs[index];
  2618. ocfs2_rotate_leaf(left_el, tmprec);
  2619. el = left_el;
  2620. memset(tmprec, 0, sizeof(*tmprec));
  2621. index = ocfs2_search_extent_list(left_el, cpos);
  2622. BUG_ON(index == -1);
  2623. }
  2624. }
  2625. } else {
  2626. BUG_ON(!left_path);
  2627. BUG_ON(!ocfs2_is_empty_extent(&left_el->l_recs[0]));
  2628. el = left_el;
  2629. insert_el = left_el;
  2630. index = ocfs2_search_extent_list(el, cpos);
  2631. BUG_ON(index == -1);
  2632. }
  2633. rec = &el->l_recs[index];
  2634. ocfs2_subtract_from_rec(ocfs2_metadata_cache_get_super(et->et_ci),
  2635. split, rec, split_rec);
  2636. ocfs2_rotate_leaf(insert_el, split_rec);
  2637. }
  2638. static int ocfs2_insert_path(handle_t *handle,
  2639. struct ocfs2_extent_tree *et,
  2640. struct ocfs2_path *left_path,
  2641. struct ocfs2_path *right_path,
  2642. struct ocfs2_extent_rec *insert_rec,
  2643. struct ocfs2_insert_type *insert)
  2644. {
  2645. int ret, subtree_index;
  2646. struct buffer_head *leaf_bh = path_leaf_bh(right_path);
  2647. if (left_path) {
  2648. ret = ocfs2_extend_trans(handle, left_path->p_tree_depth);
  2649. if (ret < 0) {
  2650. mlog_errno(ret);
  2651. goto out;
  2652. }
  2653. ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
  2654. if (ret < 0) {
  2655. mlog_errno(ret);
  2656. goto out;
  2657. }
  2658. }
  2659. ret = ocfs2_journal_access_path(et->et_ci, handle, right_path);
  2660. if (ret < 0) {
  2661. mlog_errno(ret);
  2662. goto out;
  2663. }
  2664. if (insert->ins_split != SPLIT_NONE) {
  2665. ocfs2_split_record(et, left_path, right_path,
  2666. insert_rec, insert->ins_split);
  2667. if (left_path)
  2668. ocfs2_journal_dirty(handle,
  2669. path_leaf_bh(left_path));
  2670. } else
  2671. ocfs2_insert_at_leaf(et, insert_rec, path_leaf_el(right_path),
  2672. insert);
  2673. ocfs2_journal_dirty(handle, leaf_bh);
  2674. if (left_path) {
  2675. subtree_index = ocfs2_find_subtree_root(et, left_path,
  2676. right_path);
  2677. ocfs2_complete_edge_insert(handle, left_path, right_path,
  2678. subtree_index);
  2679. }
  2680. ret = 0;
  2681. out:
  2682. return ret;
  2683. }
  2684. static int ocfs2_do_insert_extent(handle_t *handle,
  2685. struct ocfs2_extent_tree *et,
  2686. struct ocfs2_extent_rec *insert_rec,
  2687. struct ocfs2_insert_type *type)
  2688. {
  2689. int ret, rotate = 0;
  2690. u32 cpos;
  2691. struct ocfs2_path *right_path = NULL;
  2692. struct ocfs2_path *left_path = NULL;
  2693. struct ocfs2_extent_list *el;
  2694. el = et->et_root_el;
  2695. ret = ocfs2_et_root_journal_access(handle, et,
  2696. OCFS2_JOURNAL_ACCESS_WRITE);
  2697. if (ret) {
  2698. mlog_errno(ret);
  2699. goto out;
  2700. }
  2701. if (le16_to_cpu(el->l_tree_depth) == 0) {
  2702. ocfs2_insert_at_leaf(et, insert_rec, el, type);
  2703. goto out_update_clusters;
  2704. }
  2705. right_path = ocfs2_new_path_from_et(et);
  2706. if (!right_path) {
  2707. ret = -ENOMEM;
  2708. mlog_errno(ret);
  2709. goto out;
  2710. }
  2711. cpos = le32_to_cpu(insert_rec->e_cpos);
  2712. if (type->ins_appending == APPEND_NONE &&
  2713. type->ins_contig == CONTIG_NONE) {
  2714. rotate = 1;
  2715. cpos = UINT_MAX;
  2716. }
  2717. ret = ocfs2_find_path(et->et_ci, right_path, cpos);
  2718. if (ret) {
  2719. mlog_errno(ret);
  2720. goto out;
  2721. }
  2722. if (rotate) {
  2723. ret = ocfs2_rotate_tree_right(handle, et, type->ins_split,
  2724. le32_to_cpu(insert_rec->e_cpos),
  2725. right_path, &left_path);
  2726. if (ret) {
  2727. mlog_errno(ret);
  2728. goto out;
  2729. }
  2730. ret = ocfs2_et_root_journal_access(handle, et,
  2731. OCFS2_JOURNAL_ACCESS_WRITE);
  2732. if (ret) {
  2733. mlog_errno(ret);
  2734. goto out;
  2735. }
  2736. } else if (type->ins_appending == APPEND_TAIL
  2737. && type->ins_contig != CONTIG_LEFT) {
  2738. ret = ocfs2_append_rec_to_path(handle, et, insert_rec,
  2739. right_path, &left_path);
  2740. if (ret) {
  2741. mlog_errno(ret);
  2742. goto out;
  2743. }
  2744. }
  2745. ret = ocfs2_insert_path(handle, et, left_path, right_path,
  2746. insert_rec, type);
  2747. if (ret) {
  2748. mlog_errno(ret);
  2749. goto out;
  2750. }
  2751. out_update_clusters:
  2752. if (type->ins_split == SPLIT_NONE)
  2753. ocfs2_et_update_clusters(et,
  2754. le16_to_cpu(insert_rec->e_leaf_clusters));
  2755. ocfs2_journal_dirty(handle, et->et_root_bh);
  2756. out:
  2757. ocfs2_free_path(left_path);
  2758. ocfs2_free_path(right_path);
  2759. return ret;
  2760. }
  2761. static enum ocfs2_contig_type
  2762. ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
  2763. struct ocfs2_path *path,
  2764. struct ocfs2_extent_list *el, int index,
  2765. struct ocfs2_extent_rec *split_rec)
  2766. {
  2767. int status;
  2768. enum ocfs2_contig_type ret = CONTIG_NONE;
  2769. u32 left_cpos, right_cpos;
  2770. struct ocfs2_extent_rec *rec = NULL;
  2771. struct ocfs2_extent_list *new_el;
  2772. struct ocfs2_path *left_path = NULL, *right_path = NULL;
  2773. struct buffer_head *bh;
  2774. struct ocfs2_extent_block *eb;
  2775. struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
  2776. if (index > 0) {
  2777. rec = &el->l_recs[index - 1];
  2778. } else if (path->p_tree_depth > 0) {
  2779. status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
  2780. if (status)
  2781. goto out;
  2782. if (left_cpos != 0) {
  2783. left_path = ocfs2_new_path_from_path(path);
  2784. if (!left_path)
  2785. goto out;
  2786. status = ocfs2_find_path(et->et_ci, left_path,
  2787. left_cpos);
  2788. if (status)
  2789. goto out;
  2790. new_el = path_leaf_el(left_path);
  2791. if (le16_to_cpu(new_el->l_next_free_rec) !=
  2792. le16_to_cpu(new_el->l_count)) {
  2793. bh = path_leaf_bh(left_path);
  2794. eb = (struct ocfs2_extent_block *)bh->b_data;
  2795. ocfs2_error(sb,
  2796. "Extent block #%llu has an "
  2797. "invalid l_next_free_rec of "
  2798. "%d. It should have "
  2799. "matched the l_count of %d",
  2800. (unsigned long long)le64_to_cpu(eb->h_blkno),
  2801. le16_to_cpu(new_el->l_next_free_rec),
  2802. le16_to_cpu(new_el->l_count));
  2803. status = -EINVAL;
  2804. goto out;
  2805. }
  2806. rec = &new_el->l_recs[
  2807. le16_to_cpu(new_el->l_next_free_rec) - 1];
  2808. }
  2809. }
  2810. if (rec) {
  2811. if (index == 1 && ocfs2_is_empty_extent(rec)) {
  2812. if (split_rec->e_cpos == el->l_recs[index].e_cpos)
  2813. ret = CONTIG_RIGHT;
  2814. } else {
  2815. ret = ocfs2_et_extent_contig(et, rec, split_rec);
  2816. }
  2817. }
  2818. rec = NULL;
  2819. if (index < (le16_to_cpu(el->l_next_free_rec) - 1))
  2820. rec = &el->l_recs[index + 1];
  2821. else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) &&
  2822. path->p_tree_depth > 0) {
  2823. status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
  2824. if (status)
  2825. goto out;
  2826. if (right_cpos == 0)
  2827. goto out;
  2828. right_path = ocfs2_new_path_from_path(path);
  2829. if (!right_path)
  2830. goto out;
  2831. status = ocfs2_find_path(et->et_ci, right_path, right_cpos);
  2832. if (status)
  2833. goto out;
  2834. new_el = path_leaf_el(right_path);
  2835. rec = &new_el->l_recs[0];
  2836. if (ocfs2_is_empty_extent(rec)) {
  2837. if (le16_to_cpu(new_el->l_next_free_rec) <= 1) {
  2838. bh = path_leaf_bh(right_path);
  2839. eb = (struct ocfs2_extent_block *)bh->b_data;
  2840. ocfs2_error(sb,
  2841. "Extent block #%llu has an "
  2842. "invalid l_next_free_rec of %d",
  2843. (unsigned long long)le64_to_cpu(eb->h_blkno),
  2844. le16_to_cpu(new_el->l_next_free_rec));
  2845. status = -EINVAL;
  2846. goto out;
  2847. }
  2848. rec = &new_el->l_recs[1];
  2849. }
  2850. }
  2851. if (rec) {
  2852. enum ocfs2_contig_type contig_type;
  2853. contig_type = ocfs2_et_extent_contig(et, rec, split_rec);
  2854. if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
  2855. ret = CONTIG_LEFTRIGHT;
  2856. else if (ret == CONTIG_NONE)
  2857. ret = contig_type;
  2858. }
  2859. out:
  2860. if (left_path)
  2861. ocfs2_free_path(left_path);
  2862. if (right_path)
  2863. ocfs2_free_path(right_path);
  2864. return ret;
  2865. }
  2866. static void ocfs2_figure_contig_type(struct ocfs2_extent_tree *et,
  2867. struct ocfs2_insert_type *insert,
  2868. struct ocfs2_extent_list *el,
  2869. struct ocfs2_extent_rec *insert_rec)
  2870. {
  2871. int i;
  2872. enum ocfs2_contig_type contig_type = CONTIG_NONE;
  2873. BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
  2874. for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
  2875. contig_type = ocfs2_et_extent_contig(et, &el->l_recs[i],
  2876. insert_rec);
  2877. if (contig_type != CONTIG_NONE) {
  2878. insert->ins_contig_index = i;
  2879. break;
  2880. }
  2881. }
  2882. insert->ins_contig = contig_type;
  2883. if (insert->ins_contig != CONTIG_NONE) {
  2884. struct ocfs2_extent_rec *rec =
  2885. &el->l_recs[insert->ins_contig_index];
  2886. unsigned int len = le16_to_cpu(rec->e_leaf_clusters) +
  2887. le16_to_cpu(insert_rec->e_leaf_clusters);
  2888. if (et->et_max_leaf_clusters &&
  2889. (len > et->et_max_leaf_clusters))
  2890. insert->ins_contig = CONTIG_NONE;
  2891. }
  2892. }
  2893. static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert,
  2894. struct ocfs2_extent_list *el,
  2895. struct ocfs2_extent_rec *insert_rec)
  2896. {
  2897. int i;
  2898. u32 cpos = le32_to_cpu(insert_rec->e_cpos);
  2899. struct ocfs2_extent_rec *rec;
  2900. insert->ins_appending = APPEND_NONE;
  2901. BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
  2902. if (!el->l_next_free_rec)
  2903. goto set_tail_append;
  2904. if (ocfs2_is_empty_extent(&el->l_recs[0])) {
  2905. if (le16_to_cpu(el->l_next_free_rec) == 1)
  2906. goto set_tail_append;
  2907. }
  2908. i = le16_to_cpu(el->l_next_free_rec) - 1;
  2909. rec = &el->l_recs[i];
  2910. if (cpos >=
  2911. (le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)))
  2912. goto set_tail_append;
  2913. return;
  2914. set_tail_append:
  2915. insert->ins_appending = APPEND_TAIL;
  2916. }
  2917. static int ocfs2_figure_insert_type(struct ocfs2_extent_tree *et,
  2918. struct buffer_head **last_eb_bh,
  2919. struct ocfs2_extent_rec *insert_rec,
  2920. int *free_records,
  2921. struct ocfs2_insert_type *insert)
  2922. {
  2923. int ret;
  2924. struct ocfs2_extent_block *eb;
  2925. struct ocfs2_extent_list *el;
  2926. struct ocfs2_path *path = NULL;
  2927. struct buffer_head *bh = NULL;
  2928. insert->ins_split = SPLIT_NONE;
  2929. el = et->et_root_el;
  2930. insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth);
  2931. if (el->l_tree_depth) {
  2932. ret = ocfs2_read_extent_block(et->et_ci,
  2933. ocfs2_et_get_last_eb_blk(et),
  2934. &bh);
  2935. if (ret) {
  2936. mlog_errno(ret);
  2937. goto out;
  2938. }
  2939. eb = (struct ocfs2_extent_block *) bh->b_data;
  2940. el = &eb->h_list;
  2941. }
  2942. *free_records = le16_to_cpu(el->l_count) -
  2943. le16_to_cpu(el->l_next_free_rec);
  2944. if (!insert->ins_tree_depth) {
  2945. ocfs2_figure_contig_type(et, insert, el, insert_rec);
  2946. ocfs2_figure_appending_type(insert, el, insert_rec);
  2947. return 0;
  2948. }
  2949. path = ocfs2_new_path_from_et(et);
  2950. if (!path) {
  2951. ret = -ENOMEM;
  2952. mlog_errno(ret);
  2953. goto out;
  2954. }
  2955. ret = ocfs2_find_path(et->et_ci, path, le32_to_cpu(insert_rec->e_cpos));
  2956. if (ret) {
  2957. mlog_errno(ret);
  2958. goto out;
  2959. }
  2960. el = path_leaf_el(path);
  2961. ocfs2_figure_contig_type(et, insert, el, insert_rec);
  2962. if (insert->ins_contig == CONTIG_LEFT &&
  2963. insert->ins_contig_index == 0)
  2964. insert->ins_contig = CONTIG_NONE;
  2965. if (ocfs2_et_get_last_eb_blk(et) ==
  2966. path_leaf_bh(path)->b_blocknr) {
  2967. ocfs2_figure_appending_type(insert, el, insert_rec);
  2968. }
  2969. out:
  2970. ocfs2_free_path(path);
  2971. if (ret == 0)
  2972. *last_eb_bh = bh;
  2973. else
  2974. brelse(bh);
  2975. return ret;
  2976. }
  2977. int ocfs2_insert_extent(handle_t *handle,
  2978. struct ocfs2_extent_tree *et,
  2979. u32 cpos,
  2980. u64 start_blk,
  2981. u32 new_clusters,
  2982. u8 flags,
  2983. struct ocfs2_alloc_context *meta_ac)
  2984. {
  2985. int status;
  2986. int uninitialized_var(free_records);
  2987. struct buffer_head *last_eb_bh = NULL;
  2988. struct ocfs2_insert_type insert = {0, };
  2989. struct ocfs2_extent_rec rec;
  2990. trace_ocfs2_insert_extent_start(
  2991. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
  2992. cpos, new_clusters);
  2993. memset(&rec, 0, sizeof(rec));
  2994. rec.e_cpos = cpu_to_le32(cpos);
  2995. rec.e_blkno = cpu_to_le64(start_blk);
  2996. rec.e_leaf_clusters = cpu_to_le16(new_clusters);
  2997. rec.e_flags = flags;
  2998. status = ocfs2_et_insert_check(et, &rec);
  2999. if (status) {
  3000. mlog_errno(status);
  3001. goto bail;
  3002. }
  3003. status = ocfs2_figure_insert_type(et, &last_eb_bh, &rec,
  3004. &free_records, &insert);
  3005. if (status < 0) {
  3006. mlog_errno(status);
  3007. goto bail;
  3008. }
  3009. trace_ocfs2_insert_extent(insert.ins_appending, insert.ins_contig,
  3010. insert.ins_contig_index, free_records,
  3011. insert.ins_tree_depth);
  3012. if (insert.ins_contig == CONTIG_NONE && free_records == 0) {
  3013. status = ocfs2_grow_tree(handle, et,
  3014. &insert.ins_tree_depth, &last_eb_bh,
  3015. meta_ac);
  3016. if (status) {
  3017. mlog_errno(status);
  3018. goto bail;
  3019. }
  3020. }
  3021. status = ocfs2_do_insert_extent(handle, et, &rec, &insert);
  3022. if (status < 0)
  3023. mlog_errno(status);
  3024. else
  3025. ocfs2_et_extent_map_insert(et, &rec);
  3026. bail:
  3027. brelse(last_eb_bh);
  3028. return status;
  3029. }
  3030. int ocfs2_add_clusters_in_btree(handle_t *handle,
  3031. struct ocfs2_extent_tree *et,
  3032. u32 *logical_offset,
  3033. u32 clusters_to_add,
  3034. int mark_unwritten,
  3035. struct ocfs2_alloc_context *data_ac,
  3036. struct ocfs2_alloc_context *meta_ac,
  3037. enum ocfs2_alloc_restarted *reason_ret)
  3038. {
  3039. int status = 0, err = 0;
  3040. int free_extents;
  3041. enum ocfs2_alloc_restarted reason = RESTART_NONE;
  3042. u32 bit_off, num_bits;
  3043. u64 block;
  3044. u8 flags = 0;
  3045. struct ocfs2_super *osb =
  3046. OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
  3047. BUG_ON(!clusters_to_add);
  3048. if (mark_unwritten)
  3049. flags = OCFS2_EXT_UNWRITTEN;
  3050. free_extents = ocfs2_num_free_extents(osb, et);
  3051. if (free_extents < 0) {
  3052. status = free_extents;
  3053. mlog_errno(status);
  3054. goto leave;
  3055. }
  3056. if (!free_extents && !meta_ac) {
  3057. err = -1;
  3058. status = -EAGAIN;
  3059. reason = RESTART_META;
  3060. goto leave;
  3061. } else if ((!free_extents)
  3062. && (ocfs2_alloc_context_bits_left(meta_ac)
  3063. < ocfs2_extend_meta_needed(et->et_root_el))) {
  3064. err = -2;
  3065. status = -EAGAIN;
  3066. reason = RESTART_META;
  3067. goto leave;
  3068. }
  3069. status = __ocfs2_claim_clusters(handle, data_ac, 1,
  3070. clusters_to_add, &bit_off, &num_bits);
  3071. if (status < 0) {
  3072. if (status != -ENOSPC)
  3073. mlog_errno(status);
  3074. goto leave;
  3075. }
  3076. BUG_ON(num_bits > clusters_to_add);
  3077. status = ocfs2_et_root_journal_access(handle, et,
  3078. OCFS2_JOURNAL_ACCESS_WRITE);
  3079. if (status < 0) {
  3080. mlog_errno(status);
  3081. goto leave;
  3082. }
  3083. block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
  3084. trace_ocfs2_add_clusters_in_btree(
  3085. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
  3086. bit_off, num_bits);
  3087. status = ocfs2_insert_extent(handle, et, *logical_offset, block,
  3088. num_bits, flags, meta_ac);
  3089. if (status < 0) {
  3090. mlog_errno(status);
  3091. goto leave;
  3092. }
  3093. ocfs2_journal_dirty(handle, et->et_root_bh);
  3094. clusters_to_add -= num_bits;
  3095. *logical_offset += num_bits;
  3096. if (clusters_to_add) {
  3097. err = clusters_to_add;
  3098. status = -EAGAIN;
  3099. reason = RESTART_TRANS;
  3100. }
  3101. leave:
  3102. if (reason_ret)
  3103. *reason_ret = reason;
  3104. trace_ocfs2_add_clusters_in_btree_ret(status, reason, err);
  3105. return status;
  3106. }
  3107. static void ocfs2_make_right_split_rec(struct super_block *sb,
  3108. struct ocfs2_extent_rec *split_rec,
  3109. u32 cpos,
  3110. struct ocfs2_extent_rec *rec)
  3111. {
  3112. u32 rec_cpos = le32_to_cpu(rec->e_cpos);
  3113. u32 rec_range = rec_cpos + le16_to_cpu(rec->e_leaf_clusters);
  3114. memset(split_rec, 0, sizeof(struct ocfs2_extent_rec));
  3115. split_rec->e_cpos = cpu_to_le32(cpos);
  3116. split_rec->e_leaf_clusters = cpu_to_le16(rec_range - cpos);
  3117. split_rec->e_blkno = rec->e_blkno;
  3118. le64_add_cpu(&split_rec->e_blkno,
  3119. ocfs2_clusters_to_blocks(sb, cpos - rec_cpos));
  3120. split_rec->e_flags = rec->e_flags;
  3121. }
  3122. static int ocfs2_split_and_insert(handle_t *handle,
  3123. struct ocfs2_extent_tree *et,
  3124. struct ocfs2_path *path,
  3125. struct buffer_head **last_eb_bh,
  3126. int split_index,
  3127. struct ocfs2_extent_rec *orig_split_rec,
  3128. struct ocfs2_alloc_context *meta_ac)
  3129. {
  3130. int ret = 0, depth;
  3131. unsigned int insert_range, rec_range, do_leftright = 0;
  3132. struct ocfs2_extent_rec tmprec;
  3133. struct ocfs2_extent_list *rightmost_el;
  3134. struct ocfs2_extent_rec rec;
  3135. struct ocfs2_extent_rec split_rec = *orig_split_rec;
  3136. struct ocfs2_insert_type insert;
  3137. struct ocfs2_extent_block *eb;
  3138. leftright:
  3139. rec = path_leaf_el(path)->l_recs[split_index];
  3140. rightmost_el = et->et_root_el;
  3141. depth = le16_to_cpu(rightmost_el->l_tree_depth);
  3142. if (depth) {
  3143. BUG_ON(!(*last_eb_bh));
  3144. eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
  3145. rightmost_el = &eb->h_list;
  3146. }
  3147. if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
  3148. le16_to_cpu(rightmost_el->l_count)) {
  3149. ret = ocfs2_grow_tree(handle, et,
  3150. &depth, last_eb_bh, meta_ac);
  3151. if (ret) {
  3152. mlog_errno(ret);
  3153. goto out;
  3154. }
  3155. }
  3156. memset(&insert, 0, sizeof(struct ocfs2_insert_type));
  3157. insert.ins_appending = APPEND_NONE;
  3158. insert.ins_contig = CONTIG_NONE;
  3159. insert.ins_tree_depth = depth;
  3160. insert_range = le32_to_cpu(split_rec.e_cpos) +
  3161. le16_to_cpu(split_rec.e_leaf_clusters);
  3162. rec_range = le32_to_cpu(rec.e_cpos) +
  3163. le16_to_cpu(rec.e_leaf_clusters);
  3164. if (split_rec.e_cpos == rec.e_cpos) {
  3165. insert.ins_split = SPLIT_LEFT;
  3166. } else if (insert_range == rec_range) {
  3167. insert.ins_split = SPLIT_RIGHT;
  3168. } else {
  3169. insert.ins_split = SPLIT_RIGHT;
  3170. ocfs2_make_right_split_rec(ocfs2_metadata_cache_get_super(et->et_ci),
  3171. &tmprec, insert_range, &rec);
  3172. split_rec = tmprec;
  3173. BUG_ON(do_leftright);
  3174. do_leftright = 1;
  3175. }
  3176. ret = ocfs2_do_insert_extent(handle, et, &split_rec, &insert);
  3177. if (ret) {
  3178. mlog_errno(ret);
  3179. goto out;
  3180. }
  3181. if (do_leftright == 1) {
  3182. u32 cpos;
  3183. struct ocfs2_extent_list *el;
  3184. do_leftright++;
  3185. split_rec = *orig_split_rec;
  3186. ocfs2_reinit_path(path, 1);
  3187. cpos = le32_to_cpu(split_rec.e_cpos);
  3188. ret = ocfs2_find_path(et->et_ci, path, cpos);
  3189. if (ret) {
  3190. mlog_errno(ret);
  3191. goto out;
  3192. }
  3193. el = path_leaf_el(path);
  3194. split_index = ocfs2_search_extent_list(el, cpos);
  3195. goto leftright;
  3196. }
  3197. out:
  3198. return ret;
  3199. }
  3200. static int ocfs2_replace_extent_rec(handle_t *handle,
  3201. struct ocfs2_extent_tree *et,
  3202. struct ocfs2_path *path,
  3203. struct ocfs2_extent_list *el,
  3204. int split_index,
  3205. struct ocfs2_extent_rec *split_rec)
  3206. {
  3207. int ret;
  3208. ret = ocfs2_path_bh_journal_access(handle, et->et_ci, path,
  3209. path_num_items(path) - 1);
  3210. if (ret) {
  3211. mlog_errno(ret);
  3212. goto out;
  3213. }
  3214. el->l_recs[split_index] = *split_rec;
  3215. ocfs2_journal_dirty(handle, path_leaf_bh(path));
  3216. out:
  3217. return ret;
  3218. }
  3219. int ocfs2_split_extent(handle_t *handle,
  3220. struct ocfs2_extent_tree *et,
  3221. struct ocfs2_path *path,
  3222. int split_index,
  3223. struct ocfs2_extent_rec *split_rec,
  3224. struct ocfs2_alloc_context *meta_ac,
  3225. struct ocfs2_cached_dealloc_ctxt *dealloc)
  3226. {
  3227. int ret = 0;
  3228. struct ocfs2_extent_list *el = path_leaf_el(path);
  3229. struct buffer_head *last_eb_bh = NULL;
  3230. struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
  3231. struct ocfs2_merge_ctxt ctxt;
  3232. struct ocfs2_extent_list *rightmost_el;
  3233. if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) ||
  3234. ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) <
  3235. (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) {
  3236. ret = -EIO;
  3237. mlog_errno(ret);
  3238. goto out;
  3239. }
  3240. ctxt.c_contig_type = ocfs2_figure_merge_contig_type(et, path, el,
  3241. split_index,
  3242. split_rec);
  3243. if (path->p_tree_depth) {
  3244. struct ocfs2_extent_block *eb;
  3245. ret = ocfs2_read_extent_block(et->et_ci,
  3246. ocfs2_et_get_last_eb_blk(et),
  3247. &last_eb_bh);
  3248. if (ret) {
  3249. mlog_errno(ret);
  3250. goto out;
  3251. }
  3252. eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
  3253. rightmost_el = &eb->h_list;
  3254. } else
  3255. rightmost_el = path_root_el(path);
  3256. if (rec->e_cpos == split_rec->e_cpos &&
  3257. rec->e_leaf_clusters == split_rec->e_leaf_clusters)
  3258. ctxt.c_split_covers_rec = 1;
  3259. else
  3260. ctxt.c_split_covers_rec = 0;
  3261. ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]);
  3262. trace_ocfs2_split_extent(split_index, ctxt.c_contig_type,
  3263. ctxt.c_has_empty_extent,
  3264. ctxt.c_split_covers_rec);
  3265. if (ctxt.c_contig_type == CONTIG_NONE) {
  3266. if (ctxt.c_split_covers_rec)
  3267. ret = ocfs2_replace_extent_rec(handle, et, path, el,
  3268. split_index, split_rec);
  3269. else
  3270. ret = ocfs2_split_and_insert(handle, et, path,
  3271. &last_eb_bh, split_index,
  3272. split_rec, meta_ac);
  3273. if (ret)
  3274. mlog_errno(ret);
  3275. } else {
  3276. ret = ocfs2_try_to_merge_extent(handle, et, path,
  3277. split_index, split_rec,
  3278. dealloc, &ctxt);
  3279. if (ret)
  3280. mlog_errno(ret);
  3281. }
  3282. out:
  3283. brelse(last_eb_bh);
  3284. return ret;
  3285. }
  3286. int ocfs2_change_extent_flag(handle_t *handle,
  3287. struct ocfs2_extent_tree *et,
  3288. u32 cpos, u32 len, u32 phys,
  3289. struct ocfs2_alloc_context *meta_ac,
  3290. struct ocfs2_cached_dealloc_ctxt *dealloc,
  3291. int new_flags, int clear_flags)
  3292. {
  3293. int ret, index;
  3294. struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
  3295. u64 start_blkno = ocfs2_clusters_to_blocks(sb, phys);
  3296. struct ocfs2_extent_rec split_rec;
  3297. struct ocfs2_path *left_path = NULL;
  3298. struct ocfs2_extent_list *el;
  3299. struct ocfs2_extent_rec *rec;
  3300. left_path = ocfs2_new_path_from_et(et);
  3301. if (!left_path) {
  3302. ret = -ENOMEM;
  3303. mlog_errno(ret);
  3304. goto out;
  3305. }
  3306. ret = ocfs2_find_path(et->et_ci, left_path, cpos);
  3307. if (ret) {
  3308. mlog_errno(ret);
  3309. goto out;
  3310. }
  3311. el = path_leaf_el(left_path);
  3312. index = ocfs2_search_extent_list(el, cpos);
  3313. if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
  3314. ocfs2_error(sb,
  3315. "Owner %llu has an extent at cpos %u which can no "
  3316. "longer be found.\n",
  3317. (unsigned long long)
  3318. ocfs2_metadata_cache_owner(et->et_ci), cpos);
  3319. ret = -EROFS;
  3320. goto out;
  3321. }
  3322. ret = -EIO;
  3323. rec = &el->l_recs[index];
  3324. if (new_flags && (rec->e_flags & new_flags)) {
  3325. mlog(ML_ERROR, "Owner %llu tried to set %d flags on an "
  3326. "extent that already had them",
  3327. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
  3328. new_flags);
  3329. goto out;
  3330. }
  3331. if (clear_flags && !(rec->e_flags & clear_flags)) {
  3332. mlog(ML_ERROR, "Owner %llu tried to clear %d flags on an "
  3333. "extent that didn't have them",
  3334. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
  3335. clear_flags);
  3336. goto out;
  3337. }
  3338. memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec));
  3339. split_rec.e_cpos = cpu_to_le32(cpos);
  3340. split_rec.e_leaf_clusters = cpu_to_le16(len);
  3341. split_rec.e_blkno = cpu_to_le64(start_blkno);
  3342. split_rec.e_flags = rec->e_flags;
  3343. if (new_flags)
  3344. split_rec.e_flags |= new_flags;
  3345. if (clear_flags)
  3346. split_rec.e_flags &= ~clear_flags;
  3347. ret = ocfs2_split_extent(handle, et, left_path,
  3348. index, &split_rec, meta_ac,
  3349. dealloc);
  3350. if (ret)
  3351. mlog_errno(ret);
  3352. out:
  3353. ocfs2_free_path(left_path);
  3354. return ret;
  3355. }
  3356. /*
  3357. * Mark the already-existing extent at cpos as written for len clusters.
  3358. * This removes the unwritten extent flag.
  3359. *
  3360. * If the existing extent is larger than the request, initiate a
  3361. * split. An attempt will be made at merging with adjacent extents.
  3362. *
  3363. * The caller is responsible for passing down meta_ac if we'll need it.
  3364. */
  3365. int ocfs2_mark_extent_written(struct inode *inode,
  3366. struct ocfs2_extent_tree *et,
  3367. handle_t *handle, u32 cpos, u32 len, u32 phys,
  3368. struct ocfs2_alloc_context *meta_ac,
  3369. struct ocfs2_cached_dealloc_ctxt *dealloc)
  3370. {
  3371. int ret;
  3372. trace_ocfs2_mark_extent_written(
  3373. (unsigned long long)OCFS2_I(inode)->ip_blkno,
  3374. cpos, len, phys);
  3375. if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
  3376. ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents "
  3377. "that are being written to, but the feature bit "
  3378. "is not set in the super block.",
  3379. (unsigned long long)OCFS2_I(inode)->ip_blkno);
  3380. ret = -EROFS;
  3381. goto out;
  3382. }
  3383. ocfs2_et_extent_map_truncate(et, 0);
  3384. ret = ocfs2_change_extent_flag(handle, et, cpos,
  3385. len, phys, meta_ac, dealloc,
  3386. 0, OCFS2_EXT_UNWRITTEN);
  3387. if (ret)
  3388. mlog_errno(ret);
  3389. out:
  3390. return ret;
  3391. }
  3392. static int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et,
  3393. struct ocfs2_path *path,
  3394. int index, u32 new_range,
  3395. struct ocfs2_alloc_context *meta_ac)
  3396. {
  3397. int ret, depth, credits;
  3398. struct buffer_head *last_eb_bh = NULL;
  3399. struct ocfs2_extent_block *eb;
  3400. struct ocfs2_extent_list *rightmost_el, *el;
  3401. struct ocfs2_extent_rec split_rec;
  3402. struct ocfs2_extent_rec *rec;
  3403. struct ocfs2_insert_type insert;
  3404. el = path_leaf_el(path);
  3405. rec = &el->l_recs[index];
  3406. ocfs2_make_right_split_rec(ocfs2_metadata_cache_get_super(et->et_ci),
  3407. &split_rec, new_range, rec);
  3408. depth = path->p_tree_depth;
  3409. if (depth > 0) {
  3410. ret = ocfs2_read_extent_block(et->et_ci,
  3411. ocfs2_et_get_last_eb_blk(et),
  3412. &last_eb_bh);
  3413. if (ret < 0) {
  3414. mlog_errno(ret);
  3415. goto out;
  3416. }
  3417. eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
  3418. rightmost_el = &eb->h_list;
  3419. } else
  3420. rightmost_el = path_leaf_el(path);
  3421. credits = path->p_tree_depth +
  3422. ocfs2_extend_meta_needed(et->et_root_el);
  3423. ret = ocfs2_extend_trans(handle, credits);
  3424. if (ret) {
  3425. mlog_errno(ret);
  3426. goto out;
  3427. }
  3428. if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
  3429. le16_to_cpu(rightmost_el->l_count)) {
  3430. ret = ocfs2_grow_tree(handle, et, &depth, &last_eb_bh,
  3431. meta_ac);
  3432. if (ret) {
  3433. mlog_errno(ret);
  3434. goto out;
  3435. }
  3436. }
  3437. memset(&insert, 0, sizeof(struct ocfs2_insert_type));
  3438. insert.ins_appending = APPEND_NONE;
  3439. insert.ins_contig = CONTIG_NONE;
  3440. insert.ins_split = SPLIT_RIGHT;
  3441. insert.ins_tree_depth = depth;
  3442. ret = ocfs2_do_insert_extent(handle, et, &split_rec, &insert);
  3443. if (ret)
  3444. mlog_errno(ret);
  3445. out:
  3446. brelse(last_eb_bh);
  3447. return ret;
  3448. }
  3449. static int ocfs2_truncate_rec(handle_t *handle,
  3450. struct ocfs2_extent_tree *et,
  3451. struct ocfs2_path *path, int index,
  3452. struct ocfs2_cached_dealloc_ctxt *dealloc,
  3453. u32 cpos, u32 len)
  3454. {
  3455. int ret;
  3456. u32 left_cpos, rec_range, trunc_range;
  3457. int wants_rotate = 0, is_rightmost_tree_rec = 0;
  3458. struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
  3459. struct ocfs2_path *left_path = NULL;
  3460. struct ocfs2_extent_list *el = path_leaf_el(path);
  3461. struct ocfs2_extent_rec *rec;
  3462. struct ocfs2_extent_block *eb;
  3463. if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
  3464. ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
  3465. if (ret) {
  3466. mlog_errno(ret);
  3467. goto out;
  3468. }
  3469. index--;
  3470. }
  3471. if (index == (le16_to_cpu(el->l_next_free_rec) - 1) &&
  3472. path->p_tree_depth) {
  3473. eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
  3474. if (eb->h_next_leaf_blk == 0)
  3475. is_rightmost_tree_rec = 1;
  3476. }
  3477. rec = &el->l_recs[index];
  3478. if (index == 0 && path->p_tree_depth &&
  3479. le32_to_cpu(rec->e_cpos) == cpos) {
  3480. ret = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
  3481. if (ret) {
  3482. mlog_errno(ret);
  3483. goto out;
  3484. }
  3485. if (left_cpos && le16_to_cpu(el->l_next_free_rec) > 1) {
  3486. left_path = ocfs2_new_path_from_path(path);
  3487. if (!left_path) {
  3488. ret = -ENOMEM;
  3489. mlog_errno(ret);
  3490. goto out;
  3491. }
  3492. ret = ocfs2_find_path(et->et_ci, left_path,
  3493. left_cpos);
  3494. if (ret) {
  3495. mlog_errno(ret);
  3496. goto out;
  3497. }
  3498. }
  3499. }
  3500. ret = ocfs2_extend_rotate_transaction(handle, 0,
  3501. handle->h_buffer_credits,
  3502. path);
  3503. if (ret) {
  3504. mlog_errno(ret);
  3505. goto out;
  3506. }
  3507. ret = ocfs2_journal_access_path(et->et_ci, handle, path);
  3508. if (ret) {
  3509. mlog_errno(ret);
  3510. goto out;
  3511. }
  3512. ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
  3513. if (ret) {
  3514. mlog_errno(ret);
  3515. goto out;
  3516. }
  3517. rec_range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
  3518. trunc_range = cpos + len;
  3519. if (le32_to_cpu(rec->e_cpos) == cpos && rec_range == trunc_range) {
  3520. int next_free;
  3521. memset(rec, 0, sizeof(*rec));
  3522. ocfs2_cleanup_merge(el, index);
  3523. wants_rotate = 1;
  3524. next_free = le16_to_cpu(el->l_next_free_rec);
  3525. if (is_rightmost_tree_rec && next_free > 1) {
  3526. rec = &el->l_recs[next_free - 1];
  3527. ocfs2_adjust_rightmost_records(handle, et, path,
  3528. rec);
  3529. }
  3530. } else if (le32_to_cpu(rec->e_cpos) == cpos) {
  3531. le32_add_cpu(&rec->e_cpos, len);
  3532. le64_add_cpu(&rec->e_blkno, ocfs2_clusters_to_blocks(sb, len));
  3533. le16_add_cpu(&rec->e_leaf_clusters, -len);
  3534. } else if (rec_range == trunc_range) {
  3535. le16_add_cpu(&rec->e_leaf_clusters, -len);
  3536. if (is_rightmost_tree_rec)
  3537. ocfs2_adjust_rightmost_records(handle, et, path, rec);
  3538. } else {
  3539. mlog(ML_ERROR, "Owner %llu: Invalid record truncate: (%u, %u) "
  3540. "(%u, %u)\n",
  3541. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
  3542. le32_to_cpu(rec->e_cpos),
  3543. le16_to_cpu(rec->e_leaf_clusters), cpos, len);
  3544. BUG();
  3545. }
  3546. if (left_path) {
  3547. int subtree_index;
  3548. subtree_index = ocfs2_find_subtree_root(et, left_path, path);
  3549. ocfs2_complete_edge_insert(handle, left_path, path,
  3550. subtree_index);
  3551. }
  3552. ocfs2_journal_dirty(handle, path_leaf_bh(path));
  3553. ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
  3554. if (ret) {
  3555. mlog_errno(ret);
  3556. goto out;
  3557. }
  3558. out:
  3559. ocfs2_free_path(left_path);
  3560. return ret;
  3561. }
  3562. int ocfs2_remove_extent(handle_t *handle,
  3563. struct ocfs2_extent_tree *et,
  3564. u32 cpos, u32 len,
  3565. struct ocfs2_alloc_context *meta_ac,
  3566. struct ocfs2_cached_dealloc_ctxt *dealloc)
  3567. {
  3568. int ret, index;
  3569. u32 rec_range, trunc_range;
  3570. struct ocfs2_extent_rec *rec;
  3571. struct ocfs2_extent_list *el;
  3572. struct ocfs2_path *path = NULL;
  3573. ocfs2_et_extent_map_truncate(et, 0);
  3574. path = ocfs2_new_path_from_et(et);
  3575. if (!path) {
  3576. ret = -ENOMEM;
  3577. mlog_errno(ret);
  3578. goto out;
  3579. }
  3580. ret = ocfs2_find_path(et->et_ci, path, cpos);
  3581. if (ret) {
  3582. mlog_errno(ret);
  3583. goto out;
  3584. }
  3585. el = path_leaf_el(path);
  3586. index = ocfs2_search_extent_list(el, cpos);
  3587. if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
  3588. ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
  3589. "Owner %llu has an extent at cpos %u which can no "
  3590. "longer be found.\n",
  3591. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
  3592. cpos);
  3593. ret = -EROFS;
  3594. goto out;
  3595. }
  3596. rec = &el->l_recs[index];
  3597. rec_range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
  3598. trunc_range = cpos + len;
  3599. BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range);
  3600. trace_ocfs2_remove_extent(
  3601. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
  3602. cpos, len, index, le32_to_cpu(rec->e_cpos),
  3603. ocfs2_rec_clusters(el, rec));
  3604. if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) {
  3605. ret = ocfs2_truncate_rec(handle, et, path, index, dealloc,
  3606. cpos, len);
  3607. if (ret) {
  3608. mlog_errno(ret);
  3609. goto out;
  3610. }
  3611. } else {
  3612. ret = ocfs2_split_tree(handle, et, path, index,
  3613. trunc_range, meta_ac);
  3614. if (ret) {
  3615. mlog_errno(ret);
  3616. goto out;
  3617. }
  3618. ocfs2_reinit_path(path, 1);
  3619. ret = ocfs2_find_path(et->et_ci, path, cpos);
  3620. if (ret) {
  3621. mlog_errno(ret);
  3622. goto out;
  3623. }
  3624. el = path_leaf_el(path);
  3625. index = ocfs2_search_extent_list(el, cpos);
  3626. if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
  3627. ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
  3628. "Owner %llu: split at cpos %u lost record.",
  3629. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
  3630. cpos);
  3631. ret = -EROFS;
  3632. goto out;
  3633. }
  3634. rec = &el->l_recs[index];
  3635. rec_range = le32_to_cpu(rec->e_cpos) +
  3636. ocfs2_rec_clusters(el, rec);
  3637. if (rec_range != trunc_range) {
  3638. ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
  3639. "Owner %llu: error after split at cpos %u"
  3640. "trunc len %u, existing record is (%u,%u)",
  3641. (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
  3642. cpos, len, le32_to_cpu(rec->e_cpos),
  3643. ocfs2_rec_clusters(el, rec));
  3644. ret = -EROFS;
  3645. goto out;
  3646. }
  3647. ret = ocfs2_truncate_rec(handle, et, path, index, dealloc,
  3648. cpos, len);
  3649. if (ret) {
  3650. mlog_errno(ret);
  3651. goto out;
  3652. }
  3653. }
  3654. out:
  3655. ocfs2_free_path(path);
  3656. return ret;
  3657. }
  3658. static int ocfs2_reserve_blocks_for_rec_trunc(struct inode *inode,
  3659. struct ocfs2_extent_tree *et,
  3660. u32 extents_to_split,
  3661. struct ocfs2_alloc_context **ac,
  3662. int extra_blocks)
  3663. {
  3664. int ret = 0, num_free_extents;
  3665. unsigned int max_recs_needed = 2 * extents_to_split;
  3666. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  3667. *ac = NULL;
  3668. num_free_extents = ocfs2_num_free_extents(osb, et);
  3669. if (num_free_extents < 0) {
  3670. ret = num_free_extents;
  3671. mlog_errno(ret);
  3672. goto out;
  3673. }
  3674. if (!num_free_extents ||
  3675. (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed))
  3676. extra_blocks += ocfs2_extend_meta_needed(et->et_root_el);
  3677. if (extra_blocks) {
  3678. ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, ac);
  3679. if (ret < 0) {
  3680. if (ret != -ENOSPC)
  3681. mlog_errno(ret);
  3682. goto out;
  3683. }
  3684. }
  3685. out:
  3686. if (ret) {
  3687. if (*ac) {
  3688. ocfs2_free_alloc_context(*ac);
  3689. *ac = NULL;
  3690. }
  3691. }
  3692. return ret;
  3693. }
  3694. int ocfs2_remove_btree_range(struct inode *inode,
  3695. struct ocfs2_extent_tree *et,
  3696. u32 cpos, u32 phys_cpos, u32 len, int flags,
  3697. struct ocfs2_cached_dealloc_ctxt *dealloc,
  3698. u64 refcount_loc)
  3699. {
  3700. int ret, credits = 0, extra_blocks = 0;
  3701. u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
  3702. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  3703. struct inode *tl_inode = osb->osb_tl_inode;
  3704. handle_t *handle;
  3705. struct ocfs2_alloc_context *meta_ac = NULL;
  3706. struct ocfs2_refcount_tree *ref_tree = NULL;
  3707. if ((flags & OCFS2_EXT_REFCOUNTED) && len) {
  3708. BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
  3709. OCFS2_HAS_REFCOUNT_FL));
  3710. ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
  3711. &ref_tree, NULL);
  3712. if (ret) {
  3713. mlog_errno(ret);
  3714. goto out;
  3715. }
  3716. ret = ocfs2_prepare_refcount_change_for_del(inode,
  3717. refcount_loc,
  3718. phys_blkno,
  3719. len,
  3720. &credits,
  3721. &extra_blocks);
  3722. if (ret < 0) {
  3723. mlog_errno(ret);
  3724. goto out;
  3725. }
  3726. }
  3727. ret = ocfs2_reserve_blocks_for_rec_trunc(inode, et, 1, &meta_ac,
  3728. extra_blocks);
  3729. if (ret) {
  3730. mlog_errno(ret);
  3731. return ret;
  3732. }
  3733. mutex_lock(&tl_inode->i_mutex);
  3734. if (ocfs2_truncate_log_needs_flush(osb)) {
  3735. ret = __ocfs2_flush_truncate_log(osb);
  3736. if (ret < 0) {
  3737. mlog_errno(ret);
  3738. goto out;
  3739. }
  3740. }
  3741. handle = ocfs2_start_trans(osb,
  3742. ocfs2_remove_extent_credits(osb->sb) + credits);
  3743. if (IS_ERR(handle)) {
  3744. ret = PTR_ERR(handle);
  3745. mlog_errno(ret);
  3746. goto out;
  3747. }
  3748. ret = ocfs2_et_root_journal_access(handle, et,
  3749. OCFS2_JOURNAL_ACCESS_WRITE);
  3750. if (ret) {
  3751. mlog_errno(ret);
  3752. goto out_commit;
  3753. }
  3754. dquot_free_space_nodirty(inode,
  3755. ocfs2_clusters_to_bytes(inode->i_sb, len));
  3756. ret = ocfs2_remove_extent(handle, et, cpos, len, meta_ac, dealloc);
  3757. if (ret) {
  3758. mlog_errno(ret);
  3759. goto out_commit;
  3760. }
  3761. ocfs2_et_update_clusters(et, -len);
  3762. ocfs2_journal_dirty(handle, et->et_root_bh);
  3763. if (phys_blkno) {
  3764. if (flags & OCFS2_EXT_REFCOUNTED)
  3765. ret = ocfs2_decrease_refcount(inode, handle,
  3766. ocfs2_blocks_to_clusters(osb->sb,
  3767. phys_blkno),
  3768. len, meta_ac,
  3769. dealloc, 1);
  3770. else
  3771. ret = ocfs2_truncate_log_append(osb, handle,
  3772. phys_blkno, len);
  3773. if (ret)
  3774. mlog_errno(ret);
  3775. }
  3776. out_commit:
  3777. ocfs2_commit_trans(osb, handle);
  3778. out:
  3779. mutex_unlock(&tl_inode->i_mutex);
  3780. if (meta_ac)
  3781. ocfs2_free_alloc_context(meta_ac);
  3782. if (ref_tree)
  3783. ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
  3784. return ret;
  3785. }
  3786. int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
  3787. {
  3788. struct buffer_head *tl_bh = osb->osb_tl_bh;
  3789. struct ocfs2_dinode *di;
  3790. struct ocfs2_truncate_log *tl;
  3791. di = (struct ocfs2_dinode *) tl_bh->b_data;
  3792. tl = &di->id2.i_dealloc;
  3793. mlog_bug_on_msg(le16_to_cpu(tl->tl_used) > le16_to_cpu(tl->tl_count),
  3794. "slot %d, invalid truncate log parameters: used = "
  3795. "%u, count = %u\n", osb->slot_num,
  3796. le16_to_cpu(tl->tl_used), le16_to_cpu(tl->tl_count));
  3797. return le16_to_cpu(tl->tl_used) == le16_to_cpu(tl->tl_count);
  3798. }
  3799. static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl,
  3800. unsigned int new_start)
  3801. {
  3802. unsigned int tail_index;
  3803. unsigned int current_tail;
  3804. if (!le16_to_cpu(tl->tl_used))
  3805. return 0;
  3806. tail_index = le16_to_cpu(tl->tl_used) - 1;
  3807. current_tail = le32_to_cpu(tl->tl_recs[tail_index].t_start);
  3808. current_tail += le32_to_cpu(tl->tl_recs[tail_index].t_clusters);
  3809. return current_tail == new_start;
  3810. }
  3811. int ocfs2_truncate_log_append(struct ocfs2_super *osb,
  3812. handle_t *handle,
  3813. u64 start_blk,
  3814. unsigned int num_clusters)
  3815. {
  3816. int status, index;
  3817. unsigned int start_cluster, tl_count;
  3818. struct inode *tl_inode = osb->osb_tl_inode;
  3819. struct buffer_head *tl_bh = osb->osb_tl_bh;
  3820. struct ocfs2_dinode *di;
  3821. struct ocfs2_truncate_log *tl;
  3822. BUG_ON(mutex_trylock(&tl_inode->i_mutex));
  3823. start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
  3824. di = (struct ocfs2_dinode *) tl_bh->b_data;
  3825. BUG_ON(!OCFS2_IS_VALID_DINODE(di));
  3826. tl = &di->id2.i_dealloc;
  3827. tl_count = le16_to_cpu(tl->tl_count);
  3828. mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
  3829. tl_count == 0,
  3830. "Truncate record count on #%llu invalid "
  3831. "wanted %u, actual %u\n",
  3832. (unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
  3833. ocfs2_truncate_recs_per_inode(osb->sb),
  3834. le16_to_cpu(tl->tl_count));
  3835. index = le16_to_cpu(tl->tl_used);
  3836. if (index >= tl_count) {
  3837. status = -ENOSPC;
  3838. mlog_errno(status);
  3839. goto bail;
  3840. }
  3841. status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
  3842. OCFS2_JOURNAL_ACCESS_WRITE);
  3843. if (status < 0) {
  3844. mlog_errno(status);
  3845. goto bail;
  3846. }
  3847. trace_ocfs2_truncate_log_append(
  3848. (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index,
  3849. start_cluster, num_clusters);
  3850. if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) {
  3851. index--;
  3852. num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters);
  3853. trace_ocfs2_truncate_log_append(
  3854. (unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
  3855. index, le32_to_cpu(tl->tl_recs[index].t_start),
  3856. num_clusters);
  3857. } else {
  3858. tl->tl_recs[index].t_start = cpu_to_le32(start_cluster);
  3859. tl->tl_used = cpu_to_le16(index + 1);
  3860. }
  3861. tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters);
  3862. ocfs2_journal_dirty(handle, tl_bh);
  3863. osb->truncated_clusters += num_clusters;
  3864. bail:
  3865. return status;
  3866. }
  3867. static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
  3868. handle_t *handle,
  3869. struct inode *data_alloc_inode,
  3870. struct buffer_head *data_alloc_bh)
  3871. {
  3872. int status = 0;
  3873. int i;
  3874. unsigned int num_clusters;
  3875. u64 start_blk;
  3876. struct ocfs2_truncate_rec rec;
  3877. struct ocfs2_dinode *di;
  3878. struct ocfs2_truncate_log *tl;
  3879. struct inode *tl_inode = osb->osb_tl_inode;
  3880. struct buffer_head *tl_bh = osb->osb_tl_bh;
  3881. di = (struct ocfs2_dinode *) tl_bh->b_data;
  3882. tl = &di->id2.i_dealloc;
  3883. i = le16_to_cpu(tl->tl_used) - 1;
  3884. while (i >= 0) {
  3885. status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
  3886. OCFS2_JOURNAL_ACCESS_WRITE);
  3887. if (status < 0) {
  3888. mlog_errno(status);
  3889. goto bail;
  3890. }
  3891. tl->tl_used = cpu_to_le16(i);
  3892. ocfs2_journal_dirty(handle, tl_bh);
  3893. status = ocfs2_extend_trans(handle,
  3894. OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
  3895. if (status < 0) {
  3896. mlog_errno(status);
  3897. goto bail;
  3898. }
  3899. rec = tl->tl_recs[i];
  3900. start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb,
  3901. le32_to_cpu(rec.t_start));
  3902. num_clusters = le32_to_cpu(rec.t_clusters);
  3903. if (start_blk) {
  3904. trace_ocfs2_replay_truncate_records(
  3905. (unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
  3906. i, le32_to_cpu(rec.t_start), num_clusters);
  3907. status = ocfs2_free_clusters(handle, data_alloc_inode,
  3908. data_alloc_bh, start_blk,
  3909. num_clusters);
  3910. if (status < 0) {
  3911. mlog_errno(status);
  3912. goto bail;
  3913. }
  3914. }
  3915. i--;
  3916. }
  3917. osb->truncated_clusters = 0;
  3918. bail:
  3919. return status;
  3920. }
  3921. int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
  3922. {
  3923. int status;
  3924. unsigned int num_to_flush;
  3925. handle_t *handle;
  3926. struct inode *tl_inode = osb->osb_tl_inode;
  3927. struct inode *data_alloc_inode = NULL;
  3928. struct buffer_head *tl_bh = osb->osb_tl_bh;
  3929. struct buffer_head *data_alloc_bh = NULL;
  3930. struct ocfs2_dinode *di;
  3931. struct ocfs2_truncate_log *tl;
  3932. BUG_ON(mutex_trylock(&tl_inode->i_mutex));
  3933. di = (struct ocfs2_dinode *) tl_bh->b_data;
  3934. BUG_ON(!OCFS2_IS_VALID_DINODE(di));
  3935. tl = &di->id2.i_dealloc;
  3936. num_to_flush = le16_to_cpu(tl->tl_used);
  3937. trace_ocfs2_flush_truncate_log(
  3938. (unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
  3939. num_to_flush);
  3940. if (!num_to_flush) {
  3941. status = 0;
  3942. goto out;
  3943. }
  3944. data_alloc_inode = ocfs2_get_system_file_inode(osb,
  3945. GLOBAL_BITMAP_SYSTEM_INODE,
  3946. OCFS2_INVALID_SLOT);
  3947. if (!data_alloc_inode) {
  3948. status = -EINVAL;
  3949. mlog(ML_ERROR, "Could not get bitmap inode!\n");
  3950. goto out;
  3951. }
  3952. mutex_lock(&data_alloc_inode->i_mutex);
  3953. status = ocfs2_inode_lock(data_alloc_inode, &data_alloc_bh, 1);
  3954. if (status < 0) {
  3955. mlog_errno(status);
  3956. goto out_mutex;
  3957. }
  3958. handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
  3959. if (IS_ERR(handle)) {
  3960. status = PTR_ERR(handle);
  3961. mlog_errno(status);
  3962. goto out_unlock;
  3963. }
  3964. status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode,
  3965. data_alloc_bh);
  3966. if (status < 0)
  3967. mlog_errno(status);
  3968. ocfs2_commit_trans(osb, handle);
  3969. out_unlock:
  3970. brelse(data_alloc_bh);
  3971. ocfs2_inode_unlock(data_alloc_inode, 1);
  3972. out_mutex:
  3973. mutex_unlock(&data_alloc_inode->i_mutex);
  3974. iput(data_alloc_inode);
  3975. out:
  3976. return status;
  3977. }
  3978. int ocfs2_flush_truncate_log(struct ocfs2_super *osb)
  3979. {
  3980. int status;
  3981. struct inode *tl_inode = osb->osb_tl_inode;
  3982. mutex_lock(&tl_inode->i_mutex);
  3983. status = __ocfs2_flush_truncate_log(osb);
  3984. mutex_unlock(&tl_inode->i_mutex);
  3985. return status;
  3986. }
  3987. static void ocfs2_truncate_log_worker(struct work_struct *work)
  3988. {
  3989. int status;
  3990. struct ocfs2_super *osb =
  3991. container_of(work, struct ocfs2_super,
  3992. osb_truncate_log_wq.work);
  3993. status = ocfs2_flush_truncate_log(osb);
  3994. if (status < 0)
  3995. mlog_errno(status);
  3996. else
  3997. ocfs2_init_steal_slots(osb);
  3998. }
  3999. #define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ)
  4000. void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
  4001. int cancel)
  4002. {
  4003. if (osb->osb_tl_inode) {
  4004. if (cancel)
  4005. cancel_delayed_work(&osb->osb_truncate_log_wq);
  4006. queue_delayed_work(ocfs2_wq, &osb->osb_truncate_log_wq,
  4007. OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL);
  4008. }
  4009. }
  4010. static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
  4011. int slot_num,
  4012. struct inode **tl_inode,
  4013. struct buffer_head **tl_bh)
  4014. {
  4015. int status;
  4016. struct inode *inode = NULL;
  4017. struct buffer_head *bh = NULL;
  4018. inode = ocfs2_get_system_file_inode(osb,
  4019. TRUNCATE_LOG_SYSTEM_INODE,
  4020. slot_num);
  4021. if (!inode) {
  4022. status = -EINVAL;
  4023. mlog(ML_ERROR, "Could not get load truncate log inode!\n");
  4024. goto bail;
  4025. }
  4026. status = ocfs2_read_inode_block(inode, &bh);
  4027. if (status < 0) {
  4028. iput(inode);
  4029. mlog_errno(status);
  4030. goto bail;
  4031. }
  4032. *tl_inode = inode;
  4033. *tl_bh = bh;
  4034. bail:
  4035. return status;
  4036. }
  4037. int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
  4038. int slot_num,
  4039. struct ocfs2_dinode **tl_copy)
  4040. {
  4041. int status;
  4042. struct inode *tl_inode = NULL;
  4043. struct buffer_head *tl_bh = NULL;
  4044. struct ocfs2_dinode *di;
  4045. struct ocfs2_truncate_log *tl;
  4046. *tl_copy = NULL;
  4047. trace_ocfs2_begin_truncate_log_recovery(slot_num);
  4048. status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh);
  4049. if (status < 0) {
  4050. mlog_errno(status);
  4051. goto bail;
  4052. }
  4053. di = (struct ocfs2_dinode *) tl_bh->b_data;
  4054. BUG_ON(!OCFS2_IS_VALID_DINODE(di));
  4055. tl = &di->id2.i_dealloc;
  4056. if (le16_to_cpu(tl->tl_used)) {
  4057. trace_ocfs2_truncate_log_recovery_num(le16_to_cpu(tl->tl_used));
  4058. *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
  4059. if (!(*tl_copy)) {
  4060. status = -ENOMEM;
  4061. mlog_errno(status);
  4062. goto bail;
  4063. }
  4064. memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size);
  4065. tl->tl_used = 0;
  4066. ocfs2_compute_meta_ecc(osb->sb, tl_bh->b_data, &di->i_check);
  4067. status = ocfs2_write_block(osb, tl_bh, INODE_CACHE(tl_inode));
  4068. if (status < 0) {
  4069. mlog_errno(status);
  4070. goto bail;
  4071. }
  4072. }
  4073. bail:
  4074. if (tl_inode)
  4075. iput(tl_inode);
  4076. brelse(tl_bh);
  4077. if (status < 0 && (*tl_copy)) {
  4078. kfree(*tl_copy);
  4079. *tl_copy = NULL;
  4080. mlog_errno(status);
  4081. }
  4082. return status;
  4083. }
  4084. int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
  4085. struct ocfs2_dinode *tl_copy)
  4086. {
  4087. int status = 0;
  4088. int i;
  4089. unsigned int clusters, num_recs, start_cluster;
  4090. u64 start_blk;
  4091. handle_t *handle;
  4092. struct inode *tl_inode = osb->osb_tl_inode;
  4093. struct ocfs2_truncate_log *tl;
  4094. if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) {
  4095. mlog(ML_ERROR, "Asked to recover my own truncate log!\n");
  4096. return -EINVAL;
  4097. }
  4098. tl = &tl_copy->id2.i_dealloc;
  4099. num_recs = le16_to_cpu(tl->tl_used);
  4100. trace_ocfs2_complete_truncate_log_recovery(
  4101. (unsigned long long)le64_to_cpu(tl_copy->i_blkno),
  4102. num_recs);
  4103. mutex_lock(&tl_inode->i_mutex);
  4104. for(i = 0; i < num_recs; i++) {
  4105. if (ocfs2_truncate_log_needs_flush(osb)) {
  4106. status = __ocfs2_flush_truncate_log(osb);
  4107. if (status < 0) {
  4108. mlog_errno(status);
  4109. goto bail_up;
  4110. }
  4111. }
  4112. handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
  4113. if (IS_ERR(handle)) {
  4114. status = PTR_ERR(handle);
  4115. mlog_errno(status);
  4116. goto bail_up;
  4117. }
  4118. clusters = le32_to_cpu(tl->tl_recs[i].t_clusters);
  4119. start_cluster = le32_to_cpu(tl->tl_recs[i].t_start);
  4120. start_blk = ocfs2_clusters_to_blocks(osb->sb, start_cluster);
  4121. status = ocfs2_truncate_log_append(osb, handle,
  4122. start_blk, clusters);
  4123. ocfs2_commit_trans(osb, handle);
  4124. if (status < 0) {
  4125. mlog_errno(status);
  4126. goto bail_up;
  4127. }
  4128. }
  4129. bail_up:
  4130. mutex_unlock(&tl_inode->i_mutex);
  4131. return status;
  4132. }
  4133. void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb)
  4134. {
  4135. int status;
  4136. struct inode *tl_inode = osb->osb_tl_inode;
  4137. if (tl_inode) {
  4138. cancel_delayed_work(&osb->osb_truncate_log_wq);
  4139. flush_workqueue(ocfs2_wq);
  4140. status = ocfs2_flush_truncate_log(osb);
  4141. if (status < 0)
  4142. mlog_errno(status);
  4143. brelse(osb->osb_tl_bh);
  4144. iput(osb->osb_tl_inode);
  4145. }
  4146. }
  4147. int ocfs2_truncate_log_init(struct ocfs2_super *osb)
  4148. {
  4149. int status;
  4150. struct inode *tl_inode = NULL;
  4151. struct buffer_head *tl_bh = NULL;
  4152. status = ocfs2_get_truncate_log_info(osb,
  4153. osb->slot_num,
  4154. &tl_inode,
  4155. &tl_bh);
  4156. if (status < 0)
  4157. mlog_errno(status);
  4158. INIT_DELAYED_WORK(&osb->osb_truncate_log_wq,
  4159. ocfs2_truncate_log_worker);
  4160. osb->osb_tl_bh = tl_bh;
  4161. osb->osb_tl_inode = tl_inode;
  4162. return status;
  4163. }
  4164. struct ocfs2_cached_block_free {
  4165. struct ocfs2_cached_block_free *free_next;
  4166. u64 free_bg;
  4167. u64 free_blk;
  4168. unsigned int free_bit;
  4169. };
  4170. struct ocfs2_per_slot_free_list {
  4171. struct ocfs2_per_slot_free_list *f_next_suballocator;
  4172. int f_inode_type;
  4173. int f_slot;
  4174. struct ocfs2_cached_block_free *f_first;
  4175. };
  4176. static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
  4177. int sysfile_type,
  4178. int slot,
  4179. struct ocfs2_cached_block_free *head)
  4180. {
  4181. int ret;
  4182. u64 bg_blkno;
  4183. handle_t *handle;
  4184. struct inode *inode;
  4185. struct buffer_head *di_bh = NULL;
  4186. struct ocfs2_cached_block_free *tmp;
  4187. inode = ocfs2_get_system_file_inode(osb, sysfile_type, slot);
  4188. if (!inode) {
  4189. ret = -EINVAL;
  4190. mlog_errno(ret);
  4191. goto out;
  4192. }
  4193. mutex_lock(&inode->i_mutex);
  4194. ret = ocfs2_inode_lock(inode, &di_bh, 1);
  4195. if (ret) {
  4196. mlog_errno(ret);
  4197. goto out_mutex;
  4198. }
  4199. handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
  4200. if (IS_ERR(handle)) {
  4201. ret = PTR_ERR(handle);
  4202. mlog_errno(ret);
  4203. goto out_unlock;
  4204. }
  4205. while (head) {
  4206. if (head->free_bg)
  4207. bg_blkno = head->free_bg;
  4208. else
  4209. bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
  4210. head->free_bit);
  4211. trace_ocfs2_free_cached_blocks(
  4212. (unsigned long long)head->free_blk, head->free_bit);
  4213. ret = ocfs2_free_suballoc_bits(handle, inode, di_bh,
  4214. head->free_bit, bg_blkno, 1);
  4215. if (ret) {
  4216. mlog_errno(ret);
  4217. goto out_journal;
  4218. }
  4219. ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE);
  4220. if (ret) {
  4221. mlog_errno(ret);
  4222. goto out_journal;
  4223. }
  4224. tmp = head;
  4225. head = head->free_next;
  4226. kfree(tmp);
  4227. }
  4228. out_journal:
  4229. ocfs2_commit_trans(osb, handle);
  4230. out_unlock:
  4231. ocfs2_inode_unlock(inode, 1);
  4232. brelse(di_bh);
  4233. out_mutex:
  4234. mutex_unlock(&inode->i_mutex);
  4235. iput(inode);
  4236. out:
  4237. while(head) {
  4238. tmp = head;
  4239. head = head->free_next;
  4240. kfree(tmp);
  4241. }
  4242. return ret;
  4243. }
  4244. int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
  4245. u64 blkno, unsigned int bit)
  4246. {
  4247. int ret = 0;
  4248. struct ocfs2_cached_block_free *item;
  4249. item = kzalloc(sizeof(*item), GFP_NOFS);
  4250. if (item == NULL) {
  4251. ret = -ENOMEM;
  4252. mlog_errno(ret);
  4253. return ret;
  4254. }
  4255. trace_ocfs2_cache_cluster_dealloc((unsigned long long)blkno, bit);
  4256. item->free_blk = blkno;
  4257. item->free_bit = bit;
  4258. item->free_next = ctxt->c_global_allocator;
  4259. ctxt->c_global_allocator = item;
  4260. return ret;
  4261. }
  4262. static int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
  4263. struct ocfs2_cached_block_free *head)
  4264. {
  4265. struct ocfs2_cached_block_free *tmp;
  4266. struct inode *tl_inode = osb->osb_tl_inode;
  4267. handle_t *handle;
  4268. int ret = 0;
  4269. mutex_lock(&tl_inode->i_mutex);
  4270. while (head) {
  4271. if (ocfs2_truncate_log_needs_flush(osb)) {
  4272. ret = __ocfs2_flush_truncate_log(osb);
  4273. if (ret < 0) {
  4274. mlog_errno(ret);
  4275. break;
  4276. }
  4277. }
  4278. handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
  4279. if (IS_ERR(handle)) {
  4280. ret = PTR_ERR(handle);
  4281. mlog_errno(ret);
  4282. break;
  4283. }
  4284. ret = ocfs2_truncate_log_append(osb, handle, head->free_blk,
  4285. head->free_bit);
  4286. ocfs2_commit_trans(osb, handle);
  4287. tmp = head;
  4288. head = head->free_next;
  4289. kfree(tmp);
  4290. if (ret < 0) {
  4291. mlog_errno(ret);
  4292. break;
  4293. }
  4294. }
  4295. mutex_unlock(&tl_inode->i_mutex);
  4296. while (head) {
  4297. tmp = head;
  4298. head = head->free_next;
  4299. kfree(tmp);
  4300. }
  4301. return ret;
  4302. }
  4303. int ocfs2_run_deallocs(struct ocfs2_super *osb,
  4304. struct ocfs2_cached_dealloc_ctxt *ctxt)
  4305. {
  4306. int ret = 0, ret2;
  4307. struct ocfs2_per_slot_free_list *fl;
  4308. if (!ctxt)
  4309. return 0;
  4310. while (ctxt->c_first_suballocator) {
  4311. fl = ctxt->c_first_suballocator;
  4312. if (fl->f_first) {
  4313. trace_ocfs2_run_deallocs(fl->f_inode_type,
  4314. fl->f_slot);
  4315. ret2 = ocfs2_free_cached_blocks(osb,
  4316. fl->f_inode_type,
  4317. fl->f_slot,
  4318. fl->f_first);
  4319. if (ret2)
  4320. mlog_errno(ret2);
  4321. if (!ret)
  4322. ret = ret2;
  4323. }
  4324. ctxt->c_first_suballocator = fl->f_next_suballocator;
  4325. kfree(fl);
  4326. }
  4327. if (ctxt->c_global_allocator) {
  4328. ret2 = ocfs2_free_cached_clusters(osb,
  4329. ctxt->c_global_allocator);
  4330. if (ret2)
  4331. mlog_errno(ret2);
  4332. if (!ret)
  4333. ret = ret2;
  4334. ctxt->c_global_allocator = NULL;
  4335. }
  4336. return ret;
  4337. }
  4338. static struct ocfs2_per_slot_free_list *
  4339. ocfs2_find_per_slot_free_list(int type,
  4340. int slot,
  4341. struct ocfs2_cached_dealloc_ctxt *ctxt)
  4342. {
  4343. struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator;
  4344. while (fl) {
  4345. if (fl->f_inode_type == type && fl->f_slot == slot)
  4346. return fl;
  4347. fl = fl->f_next_suballocator;
  4348. }
  4349. fl = kmalloc(sizeof(*fl), GFP_NOFS);
  4350. if (fl) {
  4351. fl->f_inode_type = type;
  4352. fl->f_slot = slot;
  4353. fl->f_first = NULL;
  4354. fl->f_next_suballocator = ctxt->c_first_suballocator;
  4355. ctxt->c_first_suballocator = fl;
  4356. }
  4357. return fl;
  4358. }
  4359. int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
  4360. int type, int slot, u64 suballoc,
  4361. u64 blkno, unsigned int bit)
  4362. {
  4363. int ret;
  4364. struct ocfs2_per_slot_free_list *fl;
  4365. struct ocfs2_cached_block_free *item;
  4366. fl = ocfs2_find_per_slot_free_list(type, slot, ctxt);
  4367. if (fl == NULL) {
  4368. ret = -ENOMEM;
  4369. mlog_errno(ret);
  4370. goto out;
  4371. }
  4372. item = kzalloc(sizeof(*item), GFP_NOFS);
  4373. if (item == NULL) {
  4374. ret = -ENOMEM;
  4375. mlog_errno(ret);
  4376. goto out;
  4377. }
  4378. trace_ocfs2_cache_block_dealloc(type, slot,
  4379. (unsigned long long)suballoc,
  4380. (unsigned long long)blkno, bit);
  4381. item->free_bg = suballoc;
  4382. item->free_blk = blkno;
  4383. item->free_bit = bit;
  4384. item->free_next = fl->f_first;
  4385. fl->f_first = item;
  4386. ret = 0;
  4387. out:
  4388. return ret;
  4389. }
  4390. static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
  4391. struct ocfs2_extent_block *eb)
  4392. {
  4393. return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE,
  4394. le16_to_cpu(eb->h_suballoc_slot),
  4395. le64_to_cpu(eb->h_suballoc_loc),
  4396. le64_to_cpu(eb->h_blkno),
  4397. le16_to_cpu(eb->h_suballoc_bit));
  4398. }
  4399. static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
  4400. {
  4401. set_buffer_uptodate(bh);
  4402. mark_buffer_dirty(bh);
  4403. return 0;
  4404. }
  4405. void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
  4406. unsigned int from, unsigned int to,
  4407. struct page *page, int zero, u64 *phys)
  4408. {
  4409. int ret, partial = 0;
  4410. ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0);
  4411. if (ret)
  4412. mlog_errno(ret);
  4413. if (zero)
  4414. zero_user_segment(page, from, to);
  4415. ret = walk_page_buffers(handle, page_buffers(page),
  4416. from, to, &partial,
  4417. ocfs2_zero_func);
  4418. if (ret < 0)
  4419. mlog_errno(ret);
  4420. else if (ocfs2_should_order_data(inode)) {
  4421. ret = ocfs2_jbd2_file_inode(handle, inode);
  4422. if (ret < 0)
  4423. mlog_errno(ret);
  4424. }
  4425. if (!partial)
  4426. SetPageUptodate(page);
  4427. flush_dcache_page(page);
  4428. }
  4429. static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start,
  4430. loff_t end, struct page **pages,
  4431. int numpages, u64 phys, handle_t *handle)
  4432. {
  4433. int i;
  4434. struct page *page;
  4435. unsigned int from, to = PAGE_CACHE_SIZE;
  4436. struct super_block *sb = inode->i_sb;
  4437. BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
  4438. if (numpages == 0)
  4439. goto out;
  4440. to = PAGE_CACHE_SIZE;
  4441. for(i = 0; i < numpages; i++) {
  4442. page = pages[i];
  4443. from = start & (PAGE_CACHE_SIZE - 1);
  4444. if ((end >> PAGE_CACHE_SHIFT) == page->index)
  4445. to = end & (PAGE_CACHE_SIZE - 1);
  4446. BUG_ON(from > PAGE_CACHE_SIZE);
  4447. BUG_ON(to > PAGE_CACHE_SIZE);
  4448. ocfs2_map_and_dirty_page(inode, handle, from, to, page, 1,
  4449. &phys);
  4450. start = (page->index + 1) << PAGE_CACHE_SHIFT;
  4451. }
  4452. out:
  4453. if (pages)
  4454. ocfs2_unlock_and_free_pages(pages, numpages);
  4455. }
  4456. int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
  4457. struct page **pages, int *num)
  4458. {
  4459. int numpages, ret = 0;
  4460. struct address_space *mapping = inode->i_mapping;
  4461. unsigned long index;
  4462. loff_t last_page_bytes;
  4463. BUG_ON(start > end);
  4464. numpages = 0;
  4465. last_page_bytes = PAGE_ALIGN(end);
  4466. index = start >> PAGE_CACHE_SHIFT;
  4467. do {
  4468. pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS);
  4469. if (!pages[numpages]) {
  4470. ret = -ENOMEM;
  4471. mlog_errno(ret);
  4472. goto out;
  4473. }
  4474. numpages++;
  4475. index++;
  4476. } while (index < (last_page_bytes >> PAGE_CACHE_SHIFT));
  4477. out:
  4478. if (ret != 0) {
  4479. if (pages)
  4480. ocfs2_unlock_and_free_pages(pages, numpages);
  4481. numpages = 0;
  4482. }
  4483. *num = numpages;
  4484. return ret;
  4485. }
  4486. static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
  4487. struct page **pages, int *num)
  4488. {
  4489. struct super_block *sb = inode->i_sb;
  4490. BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
  4491. (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
  4492. return ocfs2_grab_pages(inode, start, end, pages, num);
  4493. }
  4494. int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
  4495. u64 range_start, u64 range_end)
  4496. {
  4497. int ret = 0, numpages;
  4498. struct page **pages = NULL;
  4499. u64 phys;
  4500. unsigned int ext_flags;
  4501. struct super_block *sb = inode->i_sb;
  4502. if (!ocfs2_sparse_alloc(OCFS2_SB(sb)))
  4503. return 0;
  4504. pages = kcalloc(ocfs2_pages_per_cluster(sb),
  4505. sizeof(struct page *), GFP_NOFS);
  4506. if (pages == NULL) {
  4507. ret = -ENOMEM;
  4508. mlog_errno(ret);
  4509. goto out;
  4510. }
  4511. if (range_start == range_end)
  4512. goto out;
  4513. ret = ocfs2_extent_map_get_blocks(inode,
  4514. range_start >> sb->s_blocksize_bits,
  4515. &phys, NULL, &ext_flags);
  4516. if (ret) {
  4517. mlog_errno(ret);
  4518. goto out;
  4519. }
  4520. /*
  4521. * Tail is a hole, or is marked unwritten. In either case, we
  4522. * can count on read and write to return/push zero's.
  4523. */
  4524. if (phys == 0 || ext_flags & OCFS2_EXT_UNWRITTEN)
  4525. goto out;
  4526. ret = ocfs2_grab_eof_pages(inode, range_start, range_end, pages,
  4527. &numpages);
  4528. if (ret) {
  4529. mlog_errno(ret);
  4530. goto out;
  4531. }
  4532. ocfs2_zero_cluster_pages(inode, range_start, range_end, pages,
  4533. numpages, phys, handle);
  4534. ret = filemap_fdatawrite_range(inode->i_mapping, range_start,
  4535. range_end - 1);
  4536. if (ret)
  4537. mlog_errno(ret);
  4538. out:
  4539. if (pages)
  4540. kfree(pages);
  4541. return ret;
  4542. }
  4543. static void ocfs2_zero_dinode_id2_with_xattr(struct inode *inode,
  4544. struct ocfs2_dinode *di)
  4545. {
  4546. unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits;
  4547. unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size);
  4548. if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL)
  4549. memset(&di->id2, 0, blocksize -
  4550. offsetof(struct ocfs2_dinode, id2) -
  4551. xattrsize);
  4552. else
  4553. memset(&di->id2, 0, blocksize -
  4554. offsetof(struct ocfs2_dinode, id2));
  4555. }
  4556. void ocfs2_dinode_new_extent_list(struct inode *inode,
  4557. struct ocfs2_dinode *di)
  4558. {
  4559. ocfs2_zero_dinode_id2_with_xattr(inode, di);
  4560. di->id2.i_list.l_tree_depth = 0;
  4561. di->id2.i_list.l_next_free_rec = 0;
  4562. di->id2.i_list.l_count = cpu_to_le16(
  4563. ocfs2_extent_recs_per_inode_with_xattr(inode->i_sb, di));
  4564. }
  4565. void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
  4566. {
  4567. struct ocfs2_inode_info *oi = OCFS2_I(inode);
  4568. struct ocfs2_inline_data *idata = &di->id2.i_data;
  4569. spin_lock(&oi->ip_lock);
  4570. oi->ip_dyn_features |= OCFS2_INLINE_DATA_FL;
  4571. di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
  4572. spin_unlock(&oi->ip_lock);
  4573. ocfs2_zero_dinode_id2_with_xattr(inode, di);
  4574. idata->id_count = cpu_to_le16(
  4575. ocfs2_max_inline_data_with_xattr(inode->i_sb, di));
  4576. }
  4577. int ocfs2_convert_inline_data_to_extents(struct inode *inode,
  4578. struct buffer_head *di_bh)
  4579. {
  4580. int ret, i, has_data, num_pages = 0;
  4581. handle_t *handle;
  4582. u64 uninitialized_var(block);
  4583. struct ocfs2_inode_info *oi = OCFS2_I(inode);
  4584. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  4585. struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  4586. struct ocfs2_alloc_context *data_ac = NULL;
  4587. struct page **pages = NULL;
  4588. loff_t end = osb->s_clustersize;
  4589. struct ocfs2_extent_tree et;
  4590. int did_quota = 0;
  4591. has_data = i_size_read(inode) ? 1 : 0;
  4592. if (has_data) {
  4593. pages = kcalloc(ocfs2_pages_per_cluster(osb->sb),
  4594. sizeof(struct page *), GFP_NOFS);
  4595. if (pages == NULL) {
  4596. ret = -ENOMEM;
  4597. mlog_errno(ret);
  4598. goto out;
  4599. }
  4600. ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
  4601. if (ret) {
  4602. mlog_errno(ret);
  4603. goto out;
  4604. }
  4605. }
  4606. handle = ocfs2_start_trans(osb,
  4607. ocfs2_inline_to_extents_credits(osb->sb));
  4608. if (IS_ERR(handle)) {
  4609. ret = PTR_ERR(handle);
  4610. mlog_errno(ret);
  4611. goto out_unlock;
  4612. }
  4613. ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
  4614. OCFS2_JOURNAL_ACCESS_WRITE);
  4615. if (ret) {
  4616. mlog_errno(ret);
  4617. goto out_commit;
  4618. }
  4619. if (has_data) {
  4620. u32 bit_off, num;
  4621. unsigned int page_end;
  4622. u64 phys;
  4623. ret = dquot_alloc_space_nodirty(inode,
  4624. ocfs2_clusters_to_bytes(osb->sb, 1));
  4625. if (ret)
  4626. goto out_commit;
  4627. did_quota = 1;
  4628. data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
  4629. ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off,
  4630. &num);
  4631. if (ret) {
  4632. mlog_errno(ret);
  4633. goto out_commit;
  4634. }
  4635. block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
  4636. if (!ocfs2_sparse_alloc(osb) &&
  4637. PAGE_CACHE_SIZE < osb->s_clustersize)
  4638. end = PAGE_CACHE_SIZE;
  4639. ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
  4640. if (ret) {
  4641. mlog_errno(ret);
  4642. goto out_commit;
  4643. }
  4644. ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
  4645. if (ret) {
  4646. mlog_errno(ret);
  4647. goto out_commit;
  4648. }
  4649. page_end = PAGE_CACHE_SIZE;
  4650. if (PAGE_CACHE_SIZE > osb->s_clustersize)
  4651. page_end = osb->s_clustersize;
  4652. for (i = 0; i < num_pages; i++)
  4653. ocfs2_map_and_dirty_page(inode, handle, 0, page_end,
  4654. pages[i], i > 0, &phys);
  4655. }
  4656. spin_lock(&oi->ip_lock);
  4657. oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
  4658. di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
  4659. spin_unlock(&oi->ip_lock);
  4660. ocfs2_dinode_new_extent_list(inode, di);
  4661. ocfs2_journal_dirty(handle, di_bh);
  4662. if (has_data) {
  4663. ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
  4664. ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL);
  4665. if (ret) {
  4666. mlog_errno(ret);
  4667. goto out_commit;
  4668. }
  4669. inode->i_blocks = ocfs2_inode_sector_count(inode);
  4670. }
  4671. out_commit:
  4672. if (ret < 0 && did_quota)
  4673. dquot_free_space_nodirty(inode,
  4674. ocfs2_clusters_to_bytes(osb->sb, 1));
  4675. ocfs2_commit_trans(osb, handle);
  4676. out_unlock:
  4677. if (data_ac)
  4678. ocfs2_free_alloc_context(data_ac);
  4679. out:
  4680. if (pages) {
  4681. ocfs2_unlock_and_free_pages(pages, num_pages);
  4682. kfree(pages);
  4683. }
  4684. return ret;
  4685. }
  4686. int ocfs2_commit_truncate(struct ocfs2_super *osb,
  4687. struct inode *inode,
  4688. struct buffer_head *di_bh)
  4689. {
  4690. int status = 0, i, flags = 0;
  4691. u32 new_highest_cpos, range, trunc_cpos, trunc_len, phys_cpos, coff;
  4692. u64 blkno = 0;
  4693. struct ocfs2_extent_list *el;
  4694. struct ocfs2_extent_rec *rec;
  4695. struct ocfs2_path *path = NULL;
  4696. struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  4697. struct ocfs2_extent_list *root_el = &(di->id2.i_list);
  4698. u64 refcount_loc = le64_to_cpu(di->i_refcount_loc);
  4699. struct ocfs2_extent_tree et;
  4700. struct ocfs2_cached_dealloc_ctxt dealloc;
  4701. ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
  4702. ocfs2_init_dealloc_ctxt(&dealloc);
  4703. new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
  4704. i_size_read(inode));
  4705. path = ocfs2_new_path(di_bh, &di->id2.i_list,
  4706. ocfs2_journal_access_di);
  4707. if (!path) {
  4708. status = -ENOMEM;
  4709. mlog_errno(status);
  4710. goto bail;
  4711. }
  4712. ocfs2_extent_map_trunc(inode, new_highest_cpos);
  4713. start:
  4714. if (OCFS2_I(inode)->ip_clusters == 0) {
  4715. status = 0;
  4716. goto bail;
  4717. }
  4718. status = ocfs2_find_path(INODE_CACHE(inode), path, UINT_MAX);
  4719. if (status) {
  4720. mlog_errno(status);
  4721. goto bail;
  4722. }
  4723. trace_ocfs2_commit_truncate(
  4724. (unsigned long long)OCFS2_I(inode)->ip_blkno,
  4725. new_highest_cpos,
  4726. OCFS2_I(inode)->ip_clusters,
  4727. path->p_tree_depth);
  4728. el = path_leaf_el(path);
  4729. if (le16_to_cpu(el->l_next_free_rec) == 0) {
  4730. ocfs2_error(inode->i_sb,
  4731. "Inode %llu has empty extent block at %llu\n",
  4732. (unsigned long long)OCFS2_I(inode)->ip_blkno,
  4733. (unsigned long long)path_leaf_bh(path)->b_blocknr);
  4734. status = -EROFS;
  4735. goto bail;
  4736. }
  4737. i = le16_to_cpu(el->l_next_free_rec) - 1;
  4738. rec = &el->l_recs[i];
  4739. flags = rec->e_flags;
  4740. range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
  4741. if (i == 0 && ocfs2_is_empty_extent(rec)) {
  4742. if (root_el->l_tree_depth && rec->e_int_clusters == 0) {
  4743. ocfs2_error(inode->i_sb, "Inode %lu has an empty "
  4744. "extent record, depth %u\n", inode->i_ino,
  4745. le16_to_cpu(root_el->l_tree_depth));
  4746. status = -EROFS;
  4747. goto bail;
  4748. }
  4749. trunc_cpos = le32_to_cpu(rec->e_cpos);
  4750. trunc_len = 0;
  4751. blkno = 0;
  4752. } else if (le32_to_cpu(rec->e_cpos) >= new_highest_cpos) {
  4753. trunc_cpos = le32_to_cpu(rec->e_cpos);
  4754. trunc_len = ocfs2_rec_clusters(el, rec);
  4755. blkno = le64_to_cpu(rec->e_blkno);
  4756. } else if (range > new_highest_cpos) {
  4757. trunc_cpos = new_highest_cpos;
  4758. trunc_len = range - new_highest_cpos;
  4759. coff = new_highest_cpos - le32_to_cpu(rec->e_cpos);
  4760. blkno = le64_to_cpu(rec->e_blkno) +
  4761. ocfs2_clusters_to_blocks(inode->i_sb, coff);
  4762. } else {
  4763. status = 0;
  4764. goto bail;
  4765. }
  4766. phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
  4767. status = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
  4768. phys_cpos, trunc_len, flags, &dealloc,
  4769. refcount_loc);
  4770. if (status < 0) {
  4771. mlog_errno(status);
  4772. goto bail;
  4773. }
  4774. ocfs2_reinit_path(path, 1);
  4775. goto start;
  4776. bail:
  4777. ocfs2_schedule_truncate_log_flush(osb, 1);
  4778. ocfs2_run_deallocs(osb, &dealloc);
  4779. ocfs2_free_path(path);
  4780. return status;
  4781. }
  4782. int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
  4783. unsigned int start, unsigned int end, int trunc)
  4784. {
  4785. int ret;
  4786. unsigned int numbytes;
  4787. handle_t *handle;
  4788. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  4789. struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  4790. struct ocfs2_inline_data *idata = &di->id2.i_data;
  4791. if (end > i_size_read(inode))
  4792. end = i_size_read(inode);
  4793. BUG_ON(start >= end);
  4794. if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
  4795. !(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) ||
  4796. !ocfs2_supports_inline_data(osb)) {
  4797. ocfs2_error(inode->i_sb,
  4798. "Inline data flags for inode %llu don't agree! "
  4799. "Disk: 0x%x, Memory: 0x%x, Superblock: 0x%x\n",
  4800. (unsigned long long)OCFS2_I(inode)->ip_blkno,
  4801. le16_to_cpu(di->i_dyn_features),
  4802. OCFS2_I(inode)->ip_dyn_features,
  4803. osb->s_feature_incompat);
  4804. ret = -EROFS;
  4805. goto out;
  4806. }
  4807. handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
  4808. if (IS_ERR(handle)) {
  4809. ret = PTR_ERR(handle);
  4810. mlog_errno(ret);
  4811. goto out;
  4812. }
  4813. ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
  4814. OCFS2_JOURNAL_ACCESS_WRITE);
  4815. if (ret) {
  4816. mlog_errno(ret);
  4817. goto out_commit;
  4818. }
  4819. numbytes = end - start;
  4820. memset(idata->id_data + start, 0, numbytes);
  4821. if (trunc) {
  4822. i_size_write(inode, start);
  4823. di->i_size = cpu_to_le64(start);
  4824. }
  4825. inode->i_blocks = ocfs2_inode_sector_count(inode);
  4826. inode->i_ctime = inode->i_mtime = CURRENT_TIME;
  4827. di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
  4828. di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
  4829. ocfs2_journal_dirty(handle, di_bh);
  4830. out_commit:
  4831. ocfs2_commit_trans(osb, handle);
  4832. out:
  4833. return ret;
  4834. }
  4835. static int ocfs2_trim_extent(struct super_block *sb,
  4836. struct ocfs2_group_desc *gd,
  4837. u32 start, u32 count)
  4838. {
  4839. u64 discard, bcount;
  4840. bcount = ocfs2_clusters_to_blocks(sb, count);
  4841. discard = le64_to_cpu(gd->bg_blkno) +
  4842. ocfs2_clusters_to_blocks(sb, start);
  4843. trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount);
  4844. return sb_issue_discard(sb, discard, bcount, GFP_NOFS, 0);
  4845. }
  4846. static int ocfs2_trim_group(struct super_block *sb,
  4847. struct ocfs2_group_desc *gd,
  4848. u32 start, u32 max, u32 minbits)
  4849. {
  4850. int ret = 0, count = 0, next;
  4851. void *bitmap = gd->bg_bitmap;
  4852. if (le16_to_cpu(gd->bg_free_bits_count) < minbits)
  4853. return 0;
  4854. trace_ocfs2_trim_group((unsigned long long)le64_to_cpu(gd->bg_blkno),
  4855. start, max, minbits);
  4856. while (start < max) {
  4857. start = ocfs2_find_next_zero_bit(bitmap, max, start);
  4858. if (start >= max)
  4859. break;
  4860. next = ocfs2_find_next_bit(bitmap, max, start);
  4861. if ((next - start) >= minbits) {
  4862. ret = ocfs2_trim_extent(sb, gd,
  4863. start, next - start);
  4864. if (ret < 0) {
  4865. mlog_errno(ret);
  4866. break;
  4867. }
  4868. count += next - start;
  4869. }
  4870. start = next + 1;
  4871. if (fatal_signal_pending(current)) {
  4872. count = -ERESTARTSYS;
  4873. break;
  4874. }
  4875. if ((le16_to_cpu(gd->bg_free_bits_count) - count) < minbits)
  4876. break;
  4877. }
  4878. if (ret < 0)
  4879. count = ret;
  4880. return count;
  4881. }
  4882. int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
  4883. {
  4884. struct ocfs2_super *osb = OCFS2_SB(sb);
  4885. u64 start, len, trimmed, first_group, last_group, group;
  4886. int ret, cnt;
  4887. u32 first_bit, last_bit, minlen;
  4888. struct buffer_head *main_bm_bh = NULL;
  4889. struct inode *main_bm_inode = NULL;
  4890. struct buffer_head *gd_bh = NULL;
  4891. struct ocfs2_dinode *main_bm;
  4892. struct ocfs2_group_desc *gd = NULL;
  4893. start = range->start >> osb->s_clustersize_bits;
  4894. len = range->len >> osb->s_clustersize_bits;
  4895. minlen = range->minlen >> osb->s_clustersize_bits;
  4896. trimmed = 0;
  4897. if (!len) {
  4898. range->len = 0;
  4899. return 0;
  4900. }
  4901. if (minlen >= osb->bitmap_cpg)
  4902. return -EINVAL;
  4903. main_bm_inode = ocfs2_get_system_file_inode(osb,
  4904. GLOBAL_BITMAP_SYSTEM_INODE,
  4905. OCFS2_INVALID_SLOT);
  4906. if (!main_bm_inode) {
  4907. ret = -EIO;
  4908. mlog_errno(ret);
  4909. goto out;
  4910. }
  4911. mutex_lock(&main_bm_inode->i_mutex);
  4912. ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0);
  4913. if (ret < 0) {
  4914. mlog_errno(ret);
  4915. goto out_mutex;
  4916. }
  4917. main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data;
  4918. if (start >= le32_to_cpu(main_bm->i_clusters)) {
  4919. ret = -EINVAL;
  4920. goto out_unlock;
  4921. }
  4922. if (start + len > le32_to_cpu(main_bm->i_clusters))
  4923. len = le32_to_cpu(main_bm->i_clusters) - start;
  4924. trace_ocfs2_trim_fs(start, len, minlen);
  4925. first_group = ocfs2_which_cluster_group(main_bm_inode, start);
  4926. if (first_group == osb->first_cluster_group_blkno)
  4927. first_bit = start;
  4928. else
  4929. first_bit = start - ocfs2_blocks_to_clusters(sb, first_group);
  4930. last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1);
  4931. last_bit = osb->bitmap_cpg;
  4932. for (group = first_group; group <= last_group;) {
  4933. if (first_bit + len >= osb->bitmap_cpg)
  4934. last_bit = osb->bitmap_cpg;
  4935. else
  4936. last_bit = first_bit + len;
  4937. ret = ocfs2_read_group_descriptor(main_bm_inode,
  4938. main_bm, group,
  4939. &gd_bh);
  4940. if (ret < 0) {
  4941. mlog_errno(ret);
  4942. break;
  4943. }
  4944. gd = (struct ocfs2_group_desc *)gd_bh->b_data;
  4945. cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen);
  4946. brelse(gd_bh);
  4947. gd_bh = NULL;
  4948. if (cnt < 0) {
  4949. ret = cnt;
  4950. mlog_errno(ret);
  4951. break;
  4952. }
  4953. trimmed += cnt;
  4954. len -= osb->bitmap_cpg - first_bit;
  4955. first_bit = 0;
  4956. if (group == osb->first_cluster_group_blkno)
  4957. group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
  4958. else
  4959. group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
  4960. }
  4961. range->len = trimmed * sb->s_blocksize;
  4962. out_unlock:
  4963. ocfs2_inode_unlock(main_bm_inode, 0);
  4964. brelse(main_bm_bh);
  4965. out_mutex:
  4966. mutex_unlock(&main_bm_inode->i_mutex);
  4967. iput(main_bm_inode);
  4968. out:
  4969. return ret;
  4970. }