summaryrefslogblamecommitdiffstats
path: root/fs/ocfs2/xattr.c
blob: b1f2a164e7dce23c707b42e7888ee906308b99c2 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781






                                                   


                                                















                                                                    











                             
                       
 






                                 

                    





                           
                  







                                           




                                                                   
































                                                                               
                                         





                                       











                                                                           




                                                                         






                                                                            


                                                                     

























































                                                                         













                                                                           
                                                                        
                                    


                                                                      
                                                                     
 

            

                                                                      




                                   

                                                                   





















                                                                  
                                                 


                                                     
                                                   























                                                                        
                                                                          













































                                                                                

                                    
                                                                    
 
                                                                      




























                                                                     

                                                                         






































































































                                                                                
























































                                                                            
                                     


















                                                                       
                                                        
 








                                                                            














                                                               


                                                          































































                                                                          
                                                                           







                                                         

                                     
















































                                                                              
                                          























                                                                              




                                                                         




















                                                                              
                                          
                    
                                                                



                             

                                                   















                                                                       
 











                                                                    






                                                    












                                                                              

                                                         
                                                             
                        


                                                                      








                                                                          


                                                               
 
                       
























                                                         


                                                         






































































































































































































































































































































































































































































































                                                                                
                                                               
                                                
                                                                     























































































                                                                           
















































                                                                             


                                                        




                                                                              



                   













































































                                                                           






                                                                      




                                                                       


                                                         









                                                             
 


                                                                           



















                                                               
                            











                                                                             





























































































































                                                                               
                                     



















                                                                       












                                                                    
 





                                     






                       













































                                                                          





























































































                                                                               











                                                                               

         




                                                                        




















                                                                      
                                                                           















                                         


                                                         

















































































                                                                               

                                            



                   











                                                                               




































































                                                                            




















































































































































































































































                                                                                































































































































































                                                                                




























































































































































































































































































































































































































































































































































































































































































































                                                                                

                                                         



































































































































































































































































































































































                                                                               
                                    





                                                                            
                                                               


                                                                   




                                

                                                                   




























































                                                                               











                                                                    

                                                                     

                                                                          
















































































































































































































































































                                                                               

                                          
                                      
                         


























































































                                                                            

                                                                        






























































































































































































































































                                                                                

                                                   

                                    
                                                               







                                                                        
                                                                      




























                                                                     

                                                                         


























































































































































































































































































































































































                                                                                







































































                                                                               
/* -*- mode: c; c-basic-offset: 8; -*-
 * vim: noexpandtab sw=8 ts=8 sts=0:
 *
 * xattr.c
 *
 * Copyright (C) 2008 Oracle.  All rights reserved.
 *
 * CREDITS:
 * Lots of code in this file is taken from ext3.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

#include <linux/capability.h>
#include <linux/fs.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/uio.h>
#include <linux/sched.h>
#include <linux/splice.h>
#include <linux/mount.h>
#include <linux/writeback.h>
#include <linux/falloc.h>
#include <linux/sort.h>

#define MLOG_MASK_PREFIX ML_XATTR
#include <cluster/masklog.h>

#include "ocfs2.h"
#include "alloc.h"
#include "dlmglue.h"
#include "file.h"
#include "symlink.h"
#include "sysfile.h"
#include "inode.h"
#include "journal.h"
#include "ocfs2_fs.h"
#include "suballoc.h"
#include "uptodate.h"
#include "buffer_head_io.h"
#include "super.h"
#include "xattr.h"


struct ocfs2_xattr_def_value_root {
	struct ocfs2_xattr_value_root	xv;
	struct ocfs2_extent_rec		er;
};

struct ocfs2_xattr_bucket {
	struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
	struct ocfs2_xattr_header *xh;
};

#define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
#define OCFS2_XATTR_INLINE_SIZE	80

static struct ocfs2_xattr_def_value_root def_xv = {
	.xv.xr_list.l_count = cpu_to_le16(1),
};

struct xattr_handler *ocfs2_xattr_handlers[] = {
	&ocfs2_xattr_user_handler,
	&ocfs2_xattr_trusted_handler,
	NULL
};

static struct xattr_handler *ocfs2_xattr_handler_map[] = {
	[OCFS2_XATTR_INDEX_USER]	= &ocfs2_xattr_user_handler,
	[OCFS2_XATTR_INDEX_TRUSTED]	= &ocfs2_xattr_trusted_handler,
};

struct ocfs2_xattr_info {
	int name_index;
	const char *name;
	const void *value;
	size_t value_len;
};

struct ocfs2_xattr_search {
	struct buffer_head *inode_bh;
	/*
	 * xattr_bh point to the block buffer head which has extended attribute
	 * when extended attribute in inode, xattr_bh is equal to inode_bh.
	 */
	struct buffer_head *xattr_bh;
	struct ocfs2_xattr_header *header;
	struct ocfs2_xattr_bucket bucket;
	void *base;
	void *end;
	struct ocfs2_xattr_entry *here;
	int not_found;
};

static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
					     struct ocfs2_xattr_header *xh,
					     int index,
					     int *block_off,
					     int *new_offset);

static int ocfs2_xattr_index_block_find(struct inode *inode,
					struct buffer_head *root_bh,
					int name_index,
					const char *name,
					struct ocfs2_xattr_search *xs);

static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
					struct ocfs2_xattr_tree_root *xt,
					char *buffer,
					size_t buffer_size);

static int ocfs2_xattr_create_index_block(struct inode *inode,
					  struct ocfs2_xattr_search *xs);

static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
					     struct ocfs2_xattr_info *xi,
					     struct ocfs2_xattr_search *xs);

static int ocfs2_delete_xattr_index_block(struct inode *inode,
					  struct buffer_head *xb_bh);

static inline struct xattr_handler *ocfs2_xattr_handler(int name_index)
{
	struct xattr_handler *handler = NULL;

	if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
		handler = ocfs2_xattr_handler_map[name_index];

	return handler;
}

static inline u32 ocfs2_xattr_name_hash(struct inode *inode,
					char *prefix,
					int prefix_len,
					char *name,
					int name_len)
{
	/* Get hash value of uuid from super block */
	u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
	int i;

	/* hash extended attribute prefix */
	for (i = 0; i < prefix_len; i++) {
		hash = (hash << OCFS2_HASH_SHIFT) ^
		       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
		       *prefix++;
	}
	/* hash extended attribute name */
	for (i = 0; i < name_len; i++) {
		hash = (hash << OCFS2_HASH_SHIFT) ^
		       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
		       *name++;
	}

	return hash;
}

/*
 * ocfs2_xattr_hash_entry()
 *
 * Compute the hash of an extended attribute.
 */
static void ocfs2_xattr_hash_entry(struct inode *inode,
				   struct ocfs2_xattr_header *header,
				   struct ocfs2_xattr_entry *entry)
{
	u32 hash = 0;
	struct xattr_handler *handler =
			ocfs2_xattr_handler(ocfs2_xattr_get_type(entry));
	char *prefix = handler->prefix;
	char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
	int prefix_len = strlen(handler->prefix);

	hash = ocfs2_xattr_name_hash(inode, prefix, prefix_len, name,
				     entry->xe_name_len);
	entry->xe_name_hash = cpu_to_le32(hash);

	return;
}

static int ocfs2_xattr_extend_allocation(struct inode *inode,
					 u32 clusters_to_add,
					 struct buffer_head *xattr_bh,
					 struct ocfs2_xattr_value_root *xv)
{
	int status = 0;
	int restart_func = 0;
	int credits = 0;
	handle_t *handle = NULL;
	struct ocfs2_alloc_context *data_ac = NULL;
	struct ocfs2_alloc_context *meta_ac = NULL;
	enum ocfs2_alloc_restarted why;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters);
	struct ocfs2_extent_tree et;

	mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);

	ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv);

restart_all:

	status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
				       &data_ac, &meta_ac);
	if (status) {
		mlog_errno(status);
		goto leave;
	}

	credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
					    clusters_to_add);
	handle = ocfs2_start_trans(osb, credits);
	if (IS_ERR(handle)) {
		status = PTR_ERR(handle);
		handle = NULL;
		mlog_errno(status);
		goto leave;
	}

restarted_transaction:
	status = ocfs2_journal_access(handle, inode, xattr_bh,
				      OCFS2_JOURNAL_ACCESS_WRITE);
	if (status < 0) {
		mlog_errno(status);
		goto leave;
	}

	prev_clusters = le32_to_cpu(xv->xr_clusters);
	status = ocfs2_add_clusters_in_btree(osb,
					     inode,
					     &logical_start,
					     clusters_to_add,
					     0,
					     &et,
					     handle,
					     data_ac,
					     meta_ac,
					     &why);
	if ((status < 0) && (status != -EAGAIN)) {
		if (status != -ENOSPC)
			mlog_errno(status);
		goto leave;
	}

	status = ocfs2_journal_dirty(handle, xattr_bh);
	if (status < 0) {
		mlog_errno(status);
		goto leave;
	}

	clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters;

	if (why != RESTART_NONE && clusters_to_add) {
		if (why == RESTART_META) {
			mlog(0, "restarting function.\n");
			restart_func = 1;
		} else {
			BUG_ON(why != RESTART_TRANS);

			mlog(0, "restarting transaction.\n");
			/* TODO: This can be more intelligent. */
			credits = ocfs2_calc_extend_credits(osb->sb,
							    et.et_root_el,
							    clusters_to_add);
			status = ocfs2_extend_trans(handle, credits);
			if (status < 0) {
				/* handle still has to be committed at
				 * this point. */
				status = -ENOMEM;
				mlog_errno(status);
				goto leave;
			}
			goto restarted_transaction;
		}
	}

leave:
	if (handle) {
		ocfs2_commit_trans(osb, handle);
		handle = NULL;
	}
	if (data_ac) {
		ocfs2_free_alloc_context(data_ac);
		data_ac = NULL;
	}
	if (meta_ac) {
		ocfs2_free_alloc_context(meta_ac);
		meta_ac = NULL;
	}
	if ((!status) && restart_func) {
		restart_func = 0;
		goto restart_all;
	}

	return status;
}

static int __ocfs2_remove_xattr_range(struct inode *inode,
				      struct buffer_head *root_bh,
				      struct ocfs2_xattr_value_root *xv,
				      u32 cpos, u32 phys_cpos, u32 len,
				      struct ocfs2_cached_dealloc_ctxt *dealloc)
{
	int ret;
	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	struct inode *tl_inode = osb->osb_tl_inode;
	handle_t *handle;
	struct ocfs2_alloc_context *meta_ac = NULL;
	struct ocfs2_extent_tree et;

	ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv);

	ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
	if (ret) {
		mlog_errno(ret);
		return ret;
	}

	mutex_lock(&tl_inode->i_mutex);

	if (ocfs2_truncate_log_needs_flush(osb)) {
		ret = __ocfs2_flush_truncate_log(osb);
		if (ret < 0) {
			mlog_errno(ret);
			goto out;
		}
	}

	handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		mlog_errno(ret);
		goto out;
	}

	ret = ocfs2_journal_access(handle, inode, root_bh,
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
				  dealloc);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	le32_add_cpu(&xv->xr_clusters, -len);

	ret = ocfs2_journal_dirty(handle, root_bh);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
	if (ret)
		mlog_errno(ret);

out_commit:
	ocfs2_commit_trans(osb, handle);
out:
	mutex_unlock(&tl_inode->i_mutex);

	if (meta_ac)
		ocfs2_free_alloc_context(meta_ac);

	return ret;
}

static int ocfs2_xattr_shrink_size(struct inode *inode,
				   u32 old_clusters,
				   u32 new_clusters,
				   struct buffer_head *root_bh,
				   struct ocfs2_xattr_value_root *xv)
{
	int ret = 0;
	u32 trunc_len, cpos, phys_cpos, alloc_size;
	u64 block;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	struct ocfs2_cached_dealloc_ctxt dealloc;

	ocfs2_init_dealloc_ctxt(&dealloc);

	if (old_clusters <= new_clusters)
		return 0;

	cpos = new_clusters;
	trunc_len = old_clusters - new_clusters;
	while (trunc_len) {
		ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
					       &alloc_size, &xv->xr_list);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}

		if (alloc_size > trunc_len)
			alloc_size = trunc_len;

		ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos,
						 phys_cpos, alloc_size,
						 &dealloc);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}

		block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
		ocfs2_remove_xattr_clusters_from_cache(inode, block,
						       alloc_size);
		cpos += alloc_size;
		trunc_len -= alloc_size;
	}

out:
	ocfs2_schedule_truncate_log_flush(osb, 1);
	ocfs2_run_deallocs(osb, &dealloc);

	return ret;
}

static int ocfs2_xattr_value_truncate(struct inode *inode,
				      struct buffer_head *root_bh,
				      struct ocfs2_xattr_value_root *xv,
				      int len)
{
	int ret;
	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
	u32 old_clusters = le32_to_cpu(xv->xr_clusters);

	if (new_clusters == old_clusters)
		return 0;

	if (new_clusters > old_clusters)
		ret = ocfs2_xattr_extend_allocation(inode,
						    new_clusters - old_clusters,
						    root_bh, xv);
	else
		ret = ocfs2_xattr_shrink_size(inode,
					      old_clusters, new_clusters,
					      root_bh, xv);

	return ret;
}

static int ocfs2_xattr_list_entries(struct inode *inode,
				    struct ocfs2_xattr_header *header,
				    char *buffer, size_t buffer_size)
{
	size_t rest = buffer_size;
	int i;

	for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
		struct xattr_handler *handler =
			ocfs2_xattr_handler(ocfs2_xattr_get_type(entry));

		if (handler) {
			size_t size = handler->list(inode, buffer, rest,
					((char *)header +
					le16_to_cpu(entry->xe_name_offset)),
					entry->xe_name_len);
			if (buffer) {
				if (size > rest)
					return -ERANGE;
				buffer += size;
			}
			rest -= size;
		}
	}

	return buffer_size - rest;
}

static int ocfs2_xattr_ibody_list(struct inode *inode,
				  struct ocfs2_dinode *di,
				  char *buffer,
				  size_t buffer_size)
{
	struct ocfs2_xattr_header *header = NULL;
	struct ocfs2_inode_info *oi = OCFS2_I(inode);
	int ret = 0;

	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
		return ret;

	header = (struct ocfs2_xattr_header *)
		 ((void *)di + inode->i_sb->s_blocksize -
		 le16_to_cpu(di->i_xattr_inline_size));

	ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);

	return ret;
}

static int ocfs2_xattr_block_list(struct inode *inode,
				  struct ocfs2_dinode *di,
				  char *buffer,
				  size_t buffer_size)
{
	struct buffer_head *blk_bh = NULL;
	struct ocfs2_xattr_block *xb;
	int ret = 0;

	if (!di->i_xattr_loc)
		return ret;

	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
			       le64_to_cpu(di->i_xattr_loc),
			       &blk_bh, OCFS2_BH_CACHED, inode);
	if (ret < 0) {
		mlog_errno(ret);
		return ret;
	}
	/*Verify the signature of xattr block*/
	if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
		   strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
		ret = -EFAULT;
		goto cleanup;
	}

	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;

	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
		ret = ocfs2_xattr_list_entries(inode, header,
					       buffer, buffer_size);
	} else {
		struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
		ret = ocfs2_xattr_tree_list_index_block(inode, xt,
						   buffer, buffer_size);
	}
cleanup:
	brelse(blk_bh);

	return ret;
}

ssize_t ocfs2_listxattr(struct dentry *dentry,
			char *buffer,
			size_t size)
{
	int ret = 0, i_ret = 0, b_ret = 0;
	struct buffer_head *di_bh = NULL;
	struct ocfs2_dinode *di = NULL;
	struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);

	if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
		return -EOPNOTSUPP;

	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
		return ret;

	ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
	if (ret < 0) {
		mlog_errno(ret);
		return ret;
	}

	di = (struct ocfs2_dinode *)di_bh->b_data;

	down_read(&oi->ip_xattr_sem);
	i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
	if (i_ret < 0)
		b_ret = 0;
	else {
		if (buffer) {
			buffer += i_ret;
			size -= i_ret;
		}
		b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
					       buffer, size);
		if (b_ret < 0)
			i_ret = 0;
	}
	up_read(&oi->ip_xattr_sem);
	ocfs2_inode_unlock(dentry->d_inode, 0);

	brelse(di_bh);

	return i_ret + b_ret;
}

static int ocfs2_xattr_find_entry(int name_index,
				  const char *name,
				  struct ocfs2_xattr_search *xs)
{
	struct ocfs2_xattr_entry *entry;
	size_t name_len;
	int i, cmp = 1;

	if (name == NULL)
		return -EINVAL;

	name_len = strlen(name);
	entry = xs->here;
	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
		cmp = name_index - ocfs2_xattr_get_type(entry);
		if (!cmp)
			cmp = name_len - entry->xe_name_len;
		if (!cmp)
			cmp = memcmp(name, (xs->base +
				     le16_to_cpu(entry->xe_name_offset)),
				     name_len);
		if (cmp == 0)
			break;
		entry += 1;
	}
	xs->here = entry;

	return cmp ? -ENODATA : 0;
}

static int ocfs2_xattr_get_value_outside(struct inode *inode,
					 struct ocfs2_xattr_value_root *xv,
					 void *buffer,
					 size_t len)
{
	u32 cpos, p_cluster, num_clusters, bpc, clusters;
	u64 blkno;
	int i, ret = 0;
	size_t cplen, blocksize;
	struct buffer_head *bh = NULL;
	struct ocfs2_extent_list *el;

	el = &xv->xr_list;
	clusters = le32_to_cpu(xv->xr_clusters);
	bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
	blocksize = inode->i_sb->s_blocksize;

	cpos = 0;
	while (cpos < clusters) {
		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
					       &num_clusters, el);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}

		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
		/* Copy ocfs2_xattr_value */
		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
			ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno,
					       &bh, OCFS2_BH_CACHED, inode);
			if (ret) {
				mlog_errno(ret);
				goto out;
			}

			cplen = len >= blocksize ? blocksize : len;
			memcpy(buffer, bh->b_data, cplen);
			len -= cplen;
			buffer += cplen;

			brelse(bh);
			bh = NULL;
			if (len == 0)
				break;
		}
		cpos += num_clusters;
	}
out:
	return ret;
}

static int ocfs2_xattr_ibody_get(struct inode *inode,
				 int name_index,
				 const char *name,
				 void *buffer,
				 size_t buffer_size,
				 struct ocfs2_xattr_search *xs)
{
	struct ocfs2_inode_info *oi = OCFS2_I(inode);
	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
	struct ocfs2_xattr_value_root *xv;
	size_t size;
	int ret = 0;

	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
		return -ENODATA;

	xs->end = (void *)di + inode->i_sb->s_blocksize;
	xs->header = (struct ocfs2_xattr_header *)
			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
	xs->base = (void *)xs->header;
	xs->here = xs->header->xh_entries;

	ret = ocfs2_xattr_find_entry(name_index, name, xs);
	if (ret)
		return ret;
	size = le64_to_cpu(xs->here->xe_value_size);
	if (buffer) {
		if (size > buffer_size)
			return -ERANGE;
		if (ocfs2_xattr_is_local(xs->here)) {
			memcpy(buffer, (void *)xs->base +
			       le16_to_cpu(xs->here->xe_name_offset) +
			       OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
		} else {
			xv = (struct ocfs2_xattr_value_root *)
				(xs->base + le16_to_cpu(
				 xs->here->xe_name_offset) +
				OCFS2_XATTR_SIZE(xs->here->xe_name_len));
			ret = ocfs2_xattr_get_value_outside(inode, xv,
							    buffer, size);
			if (ret < 0) {
				mlog_errno(ret);
				return ret;
			}
		}
	}

	return size;
}

static int ocfs2_xattr_block_get(struct inode *inode,
				 int name_index,
				 const char *name,
				 void *buffer,
				 size_t buffer_size,
				 struct ocfs2_xattr_search *xs)
{
	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
	struct buffer_head *blk_bh = NULL;
	struct ocfs2_xattr_block *xb;
	struct ocfs2_xattr_value_root *xv;
	size_t size;
	int ret = -ENODATA, name_offset, name_len, block_off, i;

	if (!di->i_xattr_loc)
		return ret;

	memset(&xs->bucket, 0, sizeof(xs->bucket));

	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
			       le64_to_cpu(di->i_xattr_loc),
			       &blk_bh, OCFS2_BH_CACHED, inode);
	if (ret < 0) {
		mlog_errno(ret);
		return ret;
	}
	/*Verify the signature of xattr block*/
	if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
		   strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
		ret = -EFAULT;
		goto cleanup;
	}

	xs->xattr_bh = blk_bh;
	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;

	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
		xs->header = &xb->xb_attrs.xb_header;
		xs->base = (void *)xs->header;
		xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
		xs->here = xs->header->xh_entries;

		ret = ocfs2_xattr_find_entry(name_index, name, xs);
	} else
		ret = ocfs2_xattr_index_block_find(inode, blk_bh,
						   name_index,
						   name, xs);

	if (ret)
		goto cleanup;
	size = le64_to_cpu(xs->here->xe_value_size);
	if (buffer) {
		ret = -ERANGE;
		if (size > buffer_size)
			goto cleanup;

		name_offset = le16_to_cpu(xs->here->xe_name_offset);
		name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
		i = xs->here - xs->header->xh_entries;

		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
			ret = ocfs2_xattr_bucket_get_name_value(inode,
								xs->bucket.xh,
								i,
								&block_off,
								&name_offset);
			xs->base = xs->bucket.bhs[block_off]->b_data;
		}
		if (ocfs2_xattr_is_local(xs->here)) {
			memcpy(buffer, (void *)xs->base +
			       name_offset + name_len, size);
		} else {
			xv = (struct ocfs2_xattr_value_root *)
				(xs->base + name_offset + name_len);
			ret = ocfs2_xattr_get_value_outside(inode, xv,
							    buffer, size);
			if (ret < 0) {
				mlog_errno(ret);
				goto cleanup;
			}
		}
	}
	ret = size;
cleanup:
	for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++)
		brelse(xs->bucket.bhs[i]);
	memset(&xs->bucket, 0, sizeof(xs->bucket));

	brelse(blk_bh);
	return ret;
}

/* ocfs2_xattr_get()
 *
 * Copy an extended attribute into the buffer provided.
 * Buffer is NULL to compute the size of buffer required.
 */
int ocfs2_xattr_get(struct inode *inode,
		    int name_index,
		    const char *name,
		    void *buffer,
		    size_t buffer_size)
{
	int ret;
	struct ocfs2_dinode *di = NULL;
	struct buffer_head *di_bh = NULL;
	struct ocfs2_inode_info *oi = OCFS2_I(inode);
	struct ocfs2_xattr_search xis = {
		.not_found = -ENODATA,
	};
	struct ocfs2_xattr_search xbs = {
		.not_found = -ENODATA,
	};

	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
		return -EOPNOTSUPP;

	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
		ret = -ENODATA;

	ret = ocfs2_inode_lock(inode, &di_bh, 0);
	if (ret < 0) {
		mlog_errno(ret);
		return ret;
	}
	xis.inode_bh = xbs.inode_bh = di_bh;
	di = (struct ocfs2_dinode *)di_bh->b_data;

	down_read(&oi->ip_xattr_sem);
	ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
				    buffer_size, &xis);
	if (ret == -ENODATA)
		ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
					    buffer_size, &xbs);
	up_read(&oi->ip_xattr_sem);
	ocfs2_inode_unlock(inode, 0);

	brelse(di_bh);

	return ret;
}

static int __ocfs2_xattr_set_value_outside(struct inode *inode,
					   struct ocfs2_xattr_value_root *xv,
					   const void *value,
					   int value_len)
{
	int ret = 0, i, cp_len, credits;
	u16 blocksize = inode->i_sb->s_blocksize;
	u32 p_cluster, num_clusters;
	u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
	u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
	u64 blkno;
	struct buffer_head *bh = NULL;
	handle_t *handle;

	BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));

	credits = clusters * bpc;
	handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		mlog_errno(ret);
		goto out;
	}

	while (cpos < clusters) {
		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
					       &num_clusters, &xv->xr_list);
		if (ret) {
			mlog_errno(ret);
			goto out_commit;
		}

		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);

		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
			ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno,
					       &bh, OCFS2_BH_CACHED, inode);
			if (ret) {
				mlog_errno(ret);
				goto out_commit;
			}

			ret = ocfs2_journal_access(handle,
						   inode,
						   bh,
						   OCFS2_JOURNAL_ACCESS_WRITE);
			if (ret < 0) {
				mlog_errno(ret);
				goto out_commit;
			}

			cp_len = value_len > blocksize ? blocksize : value_len;
			memcpy(bh->b_data, value, cp_len);
			value_len -= cp_len;
			value += cp_len;
			if (cp_len < blocksize)
				memset(bh->b_data + cp_len, 0,
				       blocksize - cp_len);

			ret = ocfs2_journal_dirty(handle, bh);
			if (ret < 0) {
				mlog_errno(ret);
				goto out_commit;
			}
			brelse(bh);
			bh = NULL;

			/*
			 * XXX: do we need to empty all the following
			 * blocks in this cluster?
			 */
			if (!value_len)
				break;
		}
		cpos += num_clusters;
	}
out_commit:
	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
out:
	brelse(bh);

	return ret;
}

static int ocfs2_xattr_cleanup(struct inode *inode,
			       struct ocfs2_xattr_info *xi,
			       struct ocfs2_xattr_search *xs,
			       size_t offs)
{
	handle_t *handle = NULL;
	int ret = 0;
	size_t name_len = strlen(xi->name);
	void *val = xs->base + offs;
	size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;

	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
				   OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		mlog_errno(ret);
		goto out;
	}
	ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}
	/* Decrease xattr count */
	le16_add_cpu(&xs->header->xh_count, -1);
	/* Remove the xattr entry and tree root which has already be set*/
	memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
	memset(val, 0, size);

	ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
	if (ret < 0)
		mlog_errno(ret);
out_commit:
	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
out:
	return ret;
}

static int ocfs2_xattr_update_entry(struct inode *inode,
				    struct ocfs2_xattr_info *xi,
				    struct ocfs2_xattr_search *xs,
				    size_t offs)
{
	handle_t *handle = NULL;
	int ret = 0;

	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
				   OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		mlog_errno(ret);
		goto out;
	}
	ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	xs->here->xe_name_offset = cpu_to_le16(offs);
	xs->here->xe_value_size = cpu_to_le64(xi->value_len);
	if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
		ocfs2_xattr_set_local(xs->here, 1);
	else
		ocfs2_xattr_set_local(xs->here, 0);
	ocfs2_xattr_hash_entry(inode, xs->header, xs->here);

	ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
	if (ret < 0)
		mlog_errno(ret);
out_commit:
	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
out:
	return ret;
}

/*
 * ocfs2_xattr_set_value_outside()
 *
 * Set large size value in B tree.
 */
static int ocfs2_xattr_set_value_outside(struct inode *inode,
					 struct ocfs2_xattr_info *xi,
					 struct ocfs2_xattr_search *xs,
					 size_t offs)
{
	size_t name_len = strlen(xi->name);
	void *val = xs->base + offs;
	struct ocfs2_xattr_value_root *xv = NULL;
	size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
	int ret = 0;

	memset(val, 0, size);
	memcpy(val, xi->name, name_len);
	xv = (struct ocfs2_xattr_value_root *)
		(val + OCFS2_XATTR_SIZE(name_len));
	xv->xr_clusters = 0;
	xv->xr_last_eb_blk = 0;
	xv->xr_list.l_tree_depth = 0;
	xv->xr_list.l_count = cpu_to_le16(1);
	xv->xr_list.l_next_free_rec = 0;

	ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv,
					 xi->value_len);
	if (ret < 0) {
		mlog_errno(ret);
		return ret;
	}
	ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value,
					      xi->value_len);
	if (ret < 0) {
		mlog_errno(ret);
		return ret;
	}
	ret = ocfs2_xattr_update_entry(inode, xi, xs, offs);
	if (ret < 0)
		mlog_errno(ret);

	return ret;
}

/*
 * ocfs2_xattr_set_entry_local()
 *
 * Set, replace or remove extended attribute in local.
 */
static void ocfs2_xattr_set_entry_local(struct inode *inode,
					struct ocfs2_xattr_info *xi,
					struct ocfs2_xattr_search *xs,
					struct ocfs2_xattr_entry *last,
					size_t min_offs)
{
	size_t name_len = strlen(xi->name);
	int i;

	if (xi->value && xs->not_found) {
		/* Insert the new xattr entry. */
		le16_add_cpu(&xs->header->xh_count, 1);
		ocfs2_xattr_set_type(last, xi->name_index);
		ocfs2_xattr_set_local(last, 1);
		last->xe_name_len = name_len;
	} else {
		void *first_val;
		void *val;
		size_t offs, size;

		first_val = xs->base + min_offs;
		offs = le16_to_cpu(xs->here->xe_name_offset);
		val = xs->base + offs;

		if (le64_to_cpu(xs->here->xe_value_size) >
		    OCFS2_XATTR_INLINE_SIZE)
			size = OCFS2_XATTR_SIZE(name_len) +
				OCFS2_XATTR_ROOT_SIZE;
		else
			size = OCFS2_XATTR_SIZE(name_len) +
			OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));

		if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
				OCFS2_XATTR_SIZE(xi->value_len)) {
			/* The old and the new value have the
			   same size. Just replace the value. */
			ocfs2_xattr_set_local(xs->here, 1);
			xs->here->xe_value_size = cpu_to_le64(xi->value_len);
			/* Clear value bytes. */
			memset(val + OCFS2_XATTR_SIZE(name_len),
			       0,
			       OCFS2_XATTR_SIZE(xi->value_len));
			memcpy(val + OCFS2_XATTR_SIZE(name_len),
			       xi->value,
			       xi->value_len);
			return;
		}
		/* Remove the old name+value. */
		memmove(first_val + size, first_val, val - first_val);
		memset(first_val, 0, size);
		xs->here->xe_name_hash = 0;
		xs->here->xe_name_offset = 0;
		ocfs2_xattr_set_local(xs->here, 1);
		xs->here->xe_value_size = 0;

		min_offs += size;

		/* Adjust all value offsets. */
		last = xs->header->xh_entries;
		for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
			size_t o = le16_to_cpu(last->xe_name_offset);

			if (o < offs)
				last->xe_name_offset = cpu_to_le16(o + size);
			last += 1;
		}

		if (!xi->value) {
			/* Remove the old entry. */
			last -= 1;
			memmove(xs->here, xs->here + 1,
				(void *)last - (void *)xs->here);
			memset(last, 0, sizeof(struct ocfs2_xattr_entry));
			le16_add_cpu(&xs->header->xh_count, -1);
		}
	}
	if (xi->value) {
		/* Insert the new name+value. */
		size_t size = OCFS2_XATTR_SIZE(name_len) +
				OCFS2_XATTR_SIZE(xi->value_len);
		void *val = xs->base + min_offs - size;

		xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
		memset(val, 0, size);
		memcpy(val, xi->name, name_len);
		memcpy(val + OCFS2_XATTR_SIZE(name_len),
		       xi->value,
		       xi->value_len);
		xs->here->xe_value_size = cpu_to_le64(xi->value_len);
		ocfs2_xattr_set_local(xs->here, 1);
		ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
	}

	return;
}

/*
 * ocfs2_xattr_set_entry()
 *
 * Set extended attribute entry into inode or block.
 *
 * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
 * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
 * then set value in B tree with set_value_outside().
 */
static int ocfs2_xattr_set_entry(struct inode *inode,
				 struct ocfs2_xattr_info *xi,
				 struct ocfs2_xattr_search *xs,
				 int flag)
{
	struct ocfs2_xattr_entry *last;
	struct ocfs2_inode_info *oi = OCFS2_I(inode);
	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
	size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
	size_t size_l = 0;
	handle_t *handle = NULL;
	int free, i, ret;
	struct ocfs2_xattr_info xi_l = {
		.name_index = xi->name_index,
		.name = xi->name,
		.value = xi->value,
		.value_len = xi->value_len,
	};

	/* Compute min_offs, last and free space. */
	last = xs->header->xh_entries;

	for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
		size_t offs = le16_to_cpu(last->xe_name_offset);
		if (offs < min_offs)
			min_offs = offs;
		last += 1;
	}

	free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
	if (free < 0)
		return -EFAULT;

	if (!xs->not_found) {
		size_t size = 0;
		if (ocfs2_xattr_is_local(xs->here))
			size = OCFS2_XATTR_SIZE(name_len) +
			OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
		else
			size = OCFS2_XATTR_SIZE(name_len) +
				OCFS2_XATTR_ROOT_SIZE;
		free += (size + sizeof(struct ocfs2_xattr_entry));
	}
	/* Check free space in inode or block */
	if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
		if (free < sizeof(struct ocfs2_xattr_entry) +
			   OCFS2_XATTR_SIZE(name_len) +
			   OCFS2_XATTR_ROOT_SIZE) {
			ret = -ENOSPC;
			goto out;
		}
		size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
		xi_l.value = (void *)&def_xv;
		xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
	} else if (xi->value) {
		if (free < sizeof(struct ocfs2_xattr_entry) +
			   OCFS2_XATTR_SIZE(name_len) +
			   OCFS2_XATTR_SIZE(xi->value_len)) {
			ret = -ENOSPC;
			goto out;
		}
	}

	if (!xs->not_found) {
		/* For existing extended attribute */
		size_t size = OCFS2_XATTR_SIZE(name_len) +
			OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
		size_t offs = le16_to_cpu(xs->here->xe_name_offset);
		void *val = xs->base + offs;

		if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
			/* Replace existing local xattr with tree root */
			ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
							    offs);
			if (ret < 0)
				mlog_errno(ret);
			goto out;
		} else if (!ocfs2_xattr_is_local(xs->here)) {
			/* For existing xattr which has value outside */
			struct ocfs2_xattr_value_root *xv = NULL;
			xv = (struct ocfs2_xattr_value_root *)(val +
				OCFS2_XATTR_SIZE(name_len));

			if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
				/*
				 * If new value need set outside also,
				 * first truncate old value to new value,
				 * then set new value with set_value_outside().
				 */
				ret = ocfs2_xattr_value_truncate(inode,
								 xs->xattr_bh,
								 xv,
								 xi->value_len);
				if (ret < 0) {
					mlog_errno(ret);
					goto out;
				}

				ret = __ocfs2_xattr_set_value_outside(inode,
								xv,
								xi->value,
								xi->value_len);
				if (ret < 0) {
					mlog_errno(ret);
					goto out;
				}

				ret = ocfs2_xattr_update_entry(inode,
							       xi,
							       xs,
							       offs);
				if (ret < 0)
					mlog_errno(ret);
				goto out;
			} else {
				/*
				 * If new value need set in local,
				 * just trucate old value to zero.
				 */
				 ret = ocfs2_xattr_value_truncate(inode,
								 xs->xattr_bh,
								 xv,
								 0);
				if (ret < 0)
					mlog_errno(ret);
			}
		}
	}

	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
				   OCFS2_INODE_UPDATE_CREDITS);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		mlog_errno(ret);
		goto out;
	}

	ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	if (!(flag & OCFS2_INLINE_XATTR_FL)) {
		/* set extended attribute in external block. */
		ret = ocfs2_extend_trans(handle,
					 OCFS2_INODE_UPDATE_CREDITS +
					 OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
		if (ret) {
			mlog_errno(ret);
			goto out_commit;
		}
		ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
					   OCFS2_JOURNAL_ACCESS_WRITE);
		if (ret) {
			mlog_errno(ret);
			goto out_commit;
		}
	}

	/*
	 * Set value in local, include set tree root in local.
	 * This is the first step for value size >INLINE_SIZE.
	 */
	ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);

	if (!(flag & OCFS2_INLINE_XATTR_FL)) {
		ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
		if (ret < 0) {
			mlog_errno(ret);
			goto out_commit;
		}
	}

	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
	    (flag & OCFS2_INLINE_XATTR_FL)) {
		struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
		unsigned int xattrsize = osb->s_xattr_inline_size;

		/*
		 * Adjust extent record count or inline data size
		 * to reserve space for extended attribute.
		 */
		if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
			struct ocfs2_inline_data *idata = &di->id2.i_data;
			le16_add_cpu(&idata->id_count, -xattrsize);
		} else if (!(ocfs2_inode_is_fast_symlink(inode))) {
			struct ocfs2_extent_list *el = &di->id2.i_list;
			le16_add_cpu(&el->l_count, -(xattrsize /
					sizeof(struct ocfs2_extent_rec)));
		}
		di->i_xattr_inline_size = cpu_to_le16(xattrsize);
	}
	/* Update xattr flag */
	spin_lock(&oi->ip_lock);
	oi->ip_dyn_features |= flag;
	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
	spin_unlock(&oi->ip_lock);
	/* Update inode ctime */
	inode->i_ctime = CURRENT_TIME;
	di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
	di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);

	ret = ocfs2_journal_dirty(handle, xs->inode_bh);
	if (ret < 0)
		mlog_errno(ret);

out_commit:
	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);

	if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
		/*
		 * Set value outside in B tree.
		 * This is the second step for value size > INLINE_SIZE.
		 */
		size_t offs = le16_to_cpu(xs->here->xe_name_offset);
		ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs);
		if (ret < 0) {
			int ret2;

			mlog_errno(ret);
			/*
			 * If set value outside failed, we have to clean
			 * the junk tree root we have already set in local.
			 */
			ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs);
			if (ret2 < 0)
				mlog_errno(ret2);
		}
	}
out:
	return ret;

}

static int ocfs2_remove_value_outside(struct inode*inode,
				      struct buffer_head *bh,
				      struct ocfs2_xattr_header *header)
{
	int ret = 0, i;

	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];

		if (!ocfs2_xattr_is_local(entry)) {
			struct ocfs2_xattr_value_root *xv;
			void *val;

			val = (void *)header +
				le16_to_cpu(entry->xe_name_offset);
			xv = (struct ocfs2_xattr_value_root *)
				(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
			ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0);
			if (ret < 0) {
				mlog_errno(ret);
				return ret;
			}
		}
	}

	return ret;
}

static int ocfs2_xattr_ibody_remove(struct inode *inode,
				    struct buffer_head *di_bh)
{

	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
	struct ocfs2_xattr_header *header;
	int ret;

	header = (struct ocfs2_xattr_header *)
		 ((void *)di + inode->i_sb->s_blocksize -
		 le16_to_cpu(di->i_xattr_inline_size));

	ret = ocfs2_remove_value_outside(inode, di_bh, header);

	return ret;
}

static int ocfs2_xattr_block_remove(struct inode *inode,
				    struct buffer_head *blk_bh)
{
	struct ocfs2_xattr_block *xb;
	int ret = 0;

	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
		struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
		ret = ocfs2_remove_value_outside(inode, blk_bh, header);
	} else
		ret = ocfs2_delete_xattr_index_block(inode, blk_bh);

	return ret;
}

static int ocfs2_xattr_free_block(struct inode *inode,
				  u64 block)
{
	struct inode *xb_alloc_inode;
	struct buffer_head *xb_alloc_bh = NULL;
	struct buffer_head *blk_bh = NULL;
	struct ocfs2_xattr_block *xb;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	handle_t *handle;
	int ret = 0;
	u64 blk, bg_blkno;
	u16 bit;

	ret = ocfs2_read_block(osb, block, &blk_bh,
			       OCFS2_BH_CACHED, inode);
	if (ret < 0) {
		mlog_errno(ret);
		goto out;
	}

	/*Verify the signature of xattr block*/
	if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
		   strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
		ret = -EFAULT;
		goto out;
	}

	ret = ocfs2_xattr_block_remove(inode, blk_bh);
	if (ret < 0) {
		mlog_errno(ret);
		goto out;
	}

	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
	blk = le64_to_cpu(xb->xb_blkno);
	bit = le16_to_cpu(xb->xb_suballoc_bit);
	bg_blkno = ocfs2_which_suballoc_group(blk, bit);

	xb_alloc_inode = ocfs2_get_system_file_inode(osb,
				EXTENT_ALLOC_SYSTEM_INODE,
				le16_to_cpu(xb->xb_suballoc_slot));
	if (!xb_alloc_inode) {
		ret = -ENOMEM;
		mlog_errno(ret);
		goto out;
	}
	mutex_lock(&xb_alloc_inode->i_mutex);

	ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
	if (ret < 0) {
		mlog_errno(ret);
		goto out_mutex;
	}

	handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		mlog_errno(ret);
		goto out_unlock;
	}

	ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
				       bit, bg_blkno, 1);
	if (ret < 0)
		mlog_errno(ret);

	ocfs2_commit_trans(osb, handle);
out_unlock:
	ocfs2_inode_unlock(xb_alloc_inode, 1);
	brelse(xb_alloc_bh);
out_mutex:
	mutex_unlock(&xb_alloc_inode->i_mutex);
	iput(xb_alloc_inode);
out:
	brelse(blk_bh);
	return ret;
}

/*
 * ocfs2_xattr_remove()
 *
 * Free extended attribute resources associated with this inode.
 */
int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
{
	struct ocfs2_inode_info *oi = OCFS2_I(inode);
	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
	handle_t *handle;
	int ret;

	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
		return 0;

	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
		return 0;

	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
		ret = ocfs2_xattr_ibody_remove(inode, di_bh);
		if (ret < 0) {
			mlog_errno(ret);
			goto out;
		}
	}

	if (di->i_xattr_loc) {
		ret = ocfs2_xattr_free_block(inode,
					     le64_to_cpu(di->i_xattr_loc));
		if (ret < 0) {
			mlog_errno(ret);
			goto out;
		}
	}

	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
				   OCFS2_INODE_UPDATE_CREDITS);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		mlog_errno(ret);
		goto out;
	}
	ret = ocfs2_journal_access(handle, inode, di_bh,
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	di->i_xattr_loc = 0;

	spin_lock(&oi->ip_lock);
	oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
	spin_unlock(&oi->ip_lock);

	ret = ocfs2_journal_dirty(handle, di_bh);
	if (ret < 0)
		mlog_errno(ret);
out_commit:
	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
out:
	return ret;
}

static int ocfs2_xattr_has_space_inline(struct inode *inode,
					struct ocfs2_dinode *di)
{
	struct ocfs2_inode_info *oi = OCFS2_I(inode);
	unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
	int free;

	if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
		return 0;

	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
		struct ocfs2_inline_data *idata = &di->id2.i_data;
		free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
	} else if (ocfs2_inode_is_fast_symlink(inode)) {
		free = ocfs2_fast_symlink_chars(inode->i_sb) -
			le64_to_cpu(di->i_size);
	} else {
		struct ocfs2_extent_list *el = &di->id2.i_list;
		free = (le16_to_cpu(el->l_count) -
			le16_to_cpu(el->l_next_free_rec)) *
			sizeof(struct ocfs2_extent_rec);
	}
	if (free >= xattrsize)
		return 1;

	return 0;
}

/*
 * ocfs2_xattr_ibody_find()
 *
 * Find extended attribute in inode block and
 * fill search info into struct ocfs2_xattr_search.
 */
static int ocfs2_xattr_ibody_find(struct inode *inode,
				  int name_index,
				  const char *name,
				  struct ocfs2_xattr_search *xs)
{
	struct ocfs2_inode_info *oi = OCFS2_I(inode);
	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
	int ret;
	int has_space = 0;

	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
		return 0;

	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
		down_read(&oi->ip_alloc_sem);
		has_space = ocfs2_xattr_has_space_inline(inode, di);
		up_read(&oi->ip_alloc_sem);
		if (!has_space)
			return 0;
	}

	xs->xattr_bh = xs->inode_bh;
	xs->end = (void *)di + inode->i_sb->s_blocksize;
	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
		xs->header = (struct ocfs2_xattr_header *)
			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
	else
		xs->header = (struct ocfs2_xattr_header *)
			(xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
	xs->base = (void *)xs->header;
	xs->here = xs->header->xh_entries;

	/* Find the named attribute. */
	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
		ret = ocfs2_xattr_find_entry(name_index, name, xs);
		if (ret && ret != -ENODATA)
			return ret;
		xs->not_found = ret;
	}

	return 0;
}

/*
 * ocfs2_xattr_ibody_set()
 *
 * Set, replace or remove an extended attribute into inode block.
 *
 */
static int ocfs2_xattr_ibody_set(struct inode *inode,
				 struct ocfs2_xattr_info *xi,
				 struct ocfs2_xattr_search *xs)
{
	struct ocfs2_inode_info *oi = OCFS2_I(inode);
	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
	int ret;

	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
		return -ENOSPC;

	down_write(&oi->ip_alloc_sem);
	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
		if (!ocfs2_xattr_has_space_inline(inode, di)) {
			ret = -ENOSPC;
			goto out;
		}
	}

	ret = ocfs2_xattr_set_entry(inode, xi, xs,
				(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
out:
	up_write(&oi->ip_alloc_sem);

	return ret;
}

/*
 * ocfs2_xattr_block_find()
 *
 * Find extended attribute in external block and
 * fill search info into struct ocfs2_xattr_search.
 */
static int ocfs2_xattr_block_find(struct inode *inode,
				  int name_index,
				  const char *name,
				  struct ocfs2_xattr_search *xs)
{
	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
	struct buffer_head *blk_bh = NULL;
	struct ocfs2_xattr_block *xb;
	int ret = 0;

	if (!di->i_xattr_loc)
		return ret;

	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
			       le64_to_cpu(di->i_xattr_loc),
			       &blk_bh, OCFS2_BH_CACHED, inode);
	if (ret < 0) {
		mlog_errno(ret);
		return ret;
	}
	/*Verify the signature of xattr block*/
	if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
		   strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
			ret = -EFAULT;
			goto cleanup;
	}

	xs->xattr_bh = blk_bh;
	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;

	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
		xs->header = &xb->xb_attrs.xb_header;
		xs->base = (void *)xs->header;
		xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
		xs->here = xs->header->xh_entries;

		ret = ocfs2_xattr_find_entry(name_index, name, xs);
	} else
		ret = ocfs2_xattr_index_block_find(inode, blk_bh,
						   name_index,
						   name, xs);

	if (ret && ret != -ENODATA) {
		xs->xattr_bh = NULL;
		goto cleanup;
	}
	xs->not_found = ret;
	return 0;
cleanup:
	brelse(blk_bh);

	return ret;
}

/*
 * When all the xattrs are deleted from index btree, the ocfs2_xattr_tree
 * will be erased and ocfs2_xattr_block will have its ocfs2_xattr_header
 * re-initialized.
 */
static int ocfs2_restore_xattr_block(struct inode *inode,
				     struct ocfs2_xattr_search *xs)
{
	int ret;
	handle_t *handle;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	struct ocfs2_xattr_block *xb =
		(struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
	u16 xb_flags = le16_to_cpu(xb->xb_flags);

	BUG_ON(!(xb_flags & OCFS2_XATTR_INDEXED) ||
		le16_to_cpu(el->l_next_free_rec) != 0);

	handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		handle = NULL;
		goto out;
	}

	ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret < 0) {
		mlog_errno(ret);
		goto out_commit;
	}

	memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
	       offsetof(struct ocfs2_xattr_block, xb_attrs));

	xb->xb_flags = cpu_to_le16(xb_flags & ~OCFS2_XATTR_INDEXED);

	ocfs2_journal_dirty(handle, xs->xattr_bh);

out_commit:
	ocfs2_commit_trans(osb, handle);
out:
	return ret;
}

/*
 * ocfs2_xattr_block_set()
 *
 * Set, replace or remove an extended attribute into external block.
 *
 */
static int ocfs2_xattr_block_set(struct inode *inode,
				 struct ocfs2_xattr_info *xi,
				 struct ocfs2_xattr_search *xs)
{
	struct buffer_head *new_bh = NULL;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)xs->inode_bh->b_data;
	struct ocfs2_alloc_context *meta_ac = NULL;
	handle_t *handle = NULL;
	struct ocfs2_xattr_block *xblk = NULL;
	u16 suballoc_bit_start;
	u32 num_got;
	u64 first_blkno;
	int ret;

	if (!xs->xattr_bh) {
		/*
		 * Alloc one external block for extended attribute
		 * outside of inode.
		 */
		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
		if (ret < 0) {
			mlog_errno(ret);
			goto out;
		}
		handle = ocfs2_start_trans(osb,
					   OCFS2_XATTR_BLOCK_CREATE_CREDITS);
		if (IS_ERR(handle)) {
			ret = PTR_ERR(handle);
			mlog_errno(ret);
			goto out;
		}
		ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
					   OCFS2_JOURNAL_ACCESS_CREATE);
		if (ret < 0) {
			mlog_errno(ret);
			goto out_commit;
		}

		ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
					   &suballoc_bit_start, &num_got,
					   &first_blkno);
		if (ret < 0) {
			mlog_errno(ret);
			goto out_commit;
		}

		new_bh = sb_getblk(inode->i_sb, first_blkno);
		ocfs2_set_new_buffer_uptodate(inode, new_bh);

		ret = ocfs2_journal_access(handle, inode, new_bh,
					   OCFS2_JOURNAL_ACCESS_CREATE);
		if (ret < 0) {
			mlog_errno(ret);
			goto out_commit;
		}

		/* Initialize ocfs2_xattr_block */
		xs->xattr_bh = new_bh;
		xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
		memset(xblk, 0, inode->i_sb->s_blocksize);
		strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
		xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
		xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
		xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
		xblk->xb_blkno = cpu_to_le64(first_blkno);

		xs->header = &xblk->xb_attrs.xb_header;
		xs->base = (void *)xs->header;
		xs->end = (void *)xblk + inode->i_sb->s_blocksize;
		xs->here = xs->header->xh_entries;


		ret = ocfs2_journal_dirty(handle, new_bh);
		if (ret < 0) {
			mlog_errno(ret);
			goto out_commit;
		}
		di->i_xattr_loc = cpu_to_le64(first_blkno);
		ret = ocfs2_journal_dirty(handle, xs->inode_bh);
		if (ret < 0)
			mlog_errno(ret);
out_commit:
		ocfs2_commit_trans(osb, handle);
out:
		if (meta_ac)
			ocfs2_free_alloc_context(meta_ac);
		if (ret < 0)
			return ret;
	} else
		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;

	if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
		/* Set extended attribute into external block */
		ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL);
		if (!ret || ret != -ENOSPC)
			goto end;

		ret = ocfs2_xattr_create_index_block(inode, xs);
		if (ret)
			goto end;
	}

	ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs);
	if (!ret && xblk->xb_attrs.xb_root.xt_list.l_next_free_rec == 0)
		ret = ocfs2_restore_xattr_block(inode, xs);

end:

	return ret;
}

/*
 * ocfs2_xattr_set()
 *
 * Set, replace or remove an extended attribute for this inode.
 * value is NULL to remove an existing extended attribute, else either
 * create or replace an extended attribute.
 */
int ocfs2_xattr_set(struct inode *inode,
		    int name_index,
		    const char *name,
		    const void *value,
		    size_t value_len,
		    int flags)
{
	struct buffer_head *di_bh = NULL;
	struct ocfs2_dinode *di;
	int ret;
	u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);

	struct ocfs2_xattr_info xi = {
		.name_index = name_index,
		.name = name,
		.value = value,
		.value_len = value_len,
	};

	struct ocfs2_xattr_search xis = {
		.not_found = -ENODATA,
	};

	struct ocfs2_xattr_search xbs = {
		.not_found = -ENODATA,
	};

	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
		return -EOPNOTSUPP;

	ret = ocfs2_inode_lock(inode, &di_bh, 1);
	if (ret < 0) {
		mlog_errno(ret);
		return ret;
	}
	xis.inode_bh = xbs.inode_bh = di_bh;
	di = (struct ocfs2_dinode *)di_bh->b_data;

	down_write(&OCFS2_I(inode)->ip_xattr_sem);
	/*
	 * Scan inode and external block to find the same name
	 * extended attribute and collect search infomation.
	 */
	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
	if (ret)
		goto cleanup;
	if (xis.not_found) {
		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
		if (ret)
			goto cleanup;
	}

	if (xis.not_found && xbs.not_found) {
		ret = -ENODATA;
		if (flags & XATTR_REPLACE)
			goto cleanup;
		ret = 0;
		if (!value)
			goto cleanup;
	} else {
		ret = -EEXIST;
		if (flags & XATTR_CREATE)
			goto cleanup;
	}

	if (!value) {
		/* Remove existing extended attribute */
		if (!xis.not_found)
			ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
		else if (!xbs.not_found)
			ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
	} else {
		/* We always try to set extended attribute into inode first*/
		ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
		if (!ret && !xbs.not_found) {
			/*
			 * If succeed and that extended attribute existing in
			 * external block, then we will remove it.
			 */
			xi.value = NULL;
			xi.value_len = 0;
			ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
		} else if (ret == -ENOSPC) {
			if (di->i_xattr_loc && !xbs.xattr_bh) {
				ret = ocfs2_xattr_block_find(inode, name_index,
							     name, &xbs);
				if (ret)
					goto cleanup;
			}
			/*
			 * If no space in inode, we will set extended attribute
			 * into external block.
			 */
			ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
			if (ret)
				goto cleanup;
			if (!xis.not_found) {
				/*
				 * If succeed and that extended attribute
				 * existing in inode, we will remove it.
				 */
				xi.value = NULL;
				xi.value_len = 0;
				ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
			}
		}
	}
cleanup:
	up_write(&OCFS2_I(inode)->ip_xattr_sem);
	ocfs2_inode_unlock(inode, 1);
	brelse(di_bh);
	brelse(xbs.xattr_bh);
	for (i = 0; i < blk_per_bucket; i++)
		brelse(xbs.bucket.bhs[i]);

	return ret;
}

static inline u32 ocfs2_xattr_hash_by_name(struct inode *inode,
					   int name_index,
					   const char *suffix_name)
{
	struct xattr_handler *handler = ocfs2_xattr_handler(name_index);
	char *prefix = handler->prefix;
	int prefix_len = strlen(handler->prefix);

	return ocfs2_xattr_name_hash(inode, prefix, prefix_len,
				     (char *)suffix_name, strlen(suffix_name));
}

/*
 * Find the xattr extent rec which may contains name_hash.
 * e_cpos will be the first name hash of the xattr rec.
 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
 */
static int ocfs2_xattr_get_rec(struct inode *inode,
			       u32 name_hash,
			       u64 *p_blkno,
			       u32 *e_cpos,
			       u32 *num_clusters,
			       struct ocfs2_extent_list *el)
{
	int ret = 0, i;
	struct buffer_head *eb_bh = NULL;
	struct ocfs2_extent_block *eb;
	struct ocfs2_extent_rec *rec = NULL;
	u64 e_blkno = 0;

	if (el->l_tree_depth) {
		ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}

		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
		el = &eb->h_list;

		if (el->l_tree_depth) {
			ocfs2_error(inode->i_sb,
				    "Inode %lu has non zero tree depth in "
				    "xattr tree block %llu\n", inode->i_ino,
				    (unsigned long long)eb_bh->b_blocknr);
			ret = -EROFS;
			goto out;
		}
	}

	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
		rec = &el->l_recs[i];

		if (le32_to_cpu(rec->e_cpos) <= name_hash) {
			e_blkno = le64_to_cpu(rec->e_blkno);
			break;
		}
	}

	if (!e_blkno) {
		ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
			    "record (%u, %u, 0) in xattr", inode->i_ino,
			    le32_to_cpu(rec->e_cpos),
			    ocfs2_rec_clusters(el, rec));
		ret = -EROFS;
		goto out;
	}

	*p_blkno = le64_to_cpu(rec->e_blkno);
	*num_clusters = le16_to_cpu(rec->e_leaf_clusters);
	if (e_cpos)
		*e_cpos = le32_to_cpu(rec->e_cpos);
out:
	brelse(eb_bh);
	return ret;
}

typedef int (xattr_bucket_func)(struct inode *inode,
				struct ocfs2_xattr_bucket *bucket,
				void *para);

static int ocfs2_find_xe_in_bucket(struct inode *inode,
				   struct buffer_head *header_bh,
				   int name_index,
				   const char *name,
				   u32 name_hash,
				   u16 *xe_index,
				   int *found)
{
	int i, ret = 0, cmp = 1, block_off, new_offset;
	struct ocfs2_xattr_header *xh =
			(struct ocfs2_xattr_header *)header_bh->b_data;
	size_t name_len = strlen(name);
	struct ocfs2_xattr_entry *xe = NULL;
	struct buffer_head *name_bh = NULL;
	char *xe_name;

	/*
	 * We don't use binary search in the bucket because there
	 * may be multiple entries with the same name hash.
	 */
	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
		xe = &xh->xh_entries[i];

		if (name_hash > le32_to_cpu(xe->xe_name_hash))
			continue;
		else if (name_hash < le32_to_cpu(xe->xe_name_hash))
			break;

		cmp = name_index - ocfs2_xattr_get_type(xe);
		if (!cmp)
			cmp = name_len - xe->xe_name_len;
		if (cmp)
			continue;

		ret = ocfs2_xattr_bucket_get_name_value(inode,
							xh,
							i,
							&block_off,
							&new_offset);
		if (ret) {
			mlog_errno(ret);
			break;
		}

		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
				       header_bh->b_blocknr + block_off,
				       &name_bh, OCFS2_BH_CACHED, inode);
		if (ret) {
			mlog_errno(ret);
			break;
		}
		xe_name = name_bh->b_data + new_offset;

		cmp = memcmp(name, xe_name, name_len);
		brelse(name_bh);
		name_bh = NULL;

		if (cmp == 0) {
			*xe_index = i;
			*found = 1;
			ret = 0;
			break;
		}
	}

	return ret;
}

/*
 * Find the specified xattr entry in a series of buckets.
 * This series start from p_blkno and last for num_clusters.
 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
 * the num of the valid buckets.
 *
 * Return the buffer_head this xattr should reside in. And if the xattr's
 * hash is in the gap of 2 buckets, return the lower bucket.
 */
static int ocfs2_xattr_bucket_find(struct inode *inode,
				   int name_index,
				   const char *name,
				   u32 name_hash,
				   u64 p_blkno,
				   u32 first_hash,
				   u32 num_clusters,
				   struct ocfs2_xattr_search *xs)
{
	int ret, found = 0;
	struct buffer_head *bh = NULL;
	struct buffer_head *lower_bh = NULL;
	struct ocfs2_xattr_header *xh = NULL;
	struct ocfs2_xattr_entry *xe = NULL;
	u16 index = 0;
	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
	int low_bucket = 0, bucket, high_bucket;
	u32 last_hash;
	u64 blkno;

	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno,
			       &bh, OCFS2_BH_CACHED, inode);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	xh = (struct ocfs2_xattr_header *)bh->b_data;
	high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;

	while (low_bucket <= high_bucket) {
		brelse(bh);
		bh = NULL;
		bucket = (low_bucket + high_bucket) / 2;

		blkno = p_blkno + bucket * blk_per_bucket;

		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno,
				       &bh, OCFS2_BH_CACHED, inode);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}

		xh = (struct ocfs2_xattr_header *)bh->b_data;
		xe = &xh->xh_entries[0];
		if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
			high_bucket = bucket - 1;
			continue;
		}

		/*
		 * Check whether the hash of the last entry in our
		 * bucket is larger than the search one.
		 */
		xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
		last_hash = le32_to_cpu(xe->xe_name_hash);

		/* record lower_bh which may be the insert place. */
		brelse(lower_bh);
		lower_bh = bh;
		bh = NULL;

		if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
			low_bucket = bucket + 1;
			continue;
		}

		/* the searched xattr should reside in this bucket if exists. */
		ret = ocfs2_find_xe_in_bucket(inode, lower_bh,
					      name_index, name, name_hash,
					      &index, &found);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}
		break;
	}

	/*
	 * Record the bucket we have found.
	 * When the xattr's hash value is in the gap of 2 buckets, we will
	 * always set it to the previous bucket.
	 */
	if (!lower_bh) {
		/*
		 * We can't find any bucket whose first name_hash is less
		 * than the find name_hash.
		 */
		BUG_ON(bh->b_blocknr != p_blkno);
		lower_bh = bh;
		bh = NULL;
	}
	xs->bucket.bhs[0] = lower_bh;
	xs->bucket.xh = (struct ocfs2_xattr_header *)
					xs->bucket.bhs[0]->b_data;
	lower_bh = NULL;

	xs->header = xs->bucket.xh;
	xs->base = xs->bucket.bhs[0]->b_data;
	xs->end = xs->base + inode->i_sb->s_blocksize;

	if (found) {
		/*
		 * If we have found the xattr enty, read all the blocks in
		 * this bucket.
		 */
		ret = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
					xs->bucket.bhs[0]->b_blocknr + 1,
					blk_per_bucket - 1, &xs->bucket.bhs[1],
					OCFS2_BH_CACHED, inode);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}

		xs->here = &xs->header->xh_entries[index];
		mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
		     (unsigned long long)xs->bucket.bhs[0]->b_blocknr, index);
	} else
		ret = -ENODATA;

out:
	brelse(bh);
	brelse(lower_bh);
	return ret;
}

static int ocfs2_xattr_index_block_find(struct inode *inode,
					struct buffer_head *root_bh,
					int name_index,
					const char *name,
					struct ocfs2_xattr_search *xs)
{
	int ret;
	struct ocfs2_xattr_block *xb =
			(struct ocfs2_xattr_block *)root_bh->b_data;
	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
	struct ocfs2_extent_list *el = &xb_root->xt_list;
	u64 p_blkno = 0;
	u32 first_hash, num_clusters = 0;
	u32 name_hash = ocfs2_xattr_hash_by_name(inode, name_index, name);

	if (le16_to_cpu(el->l_next_free_rec) == 0)
		return -ENODATA;

	mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
	     name, name_hash, name_index);

	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
				  &num_clusters, el);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);

	mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
	     "in the rec is %u\n", num_clusters, p_blkno, first_hash);

	ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
				      p_blkno, first_hash, num_clusters, xs);

out:
	return ret;
}

static int ocfs2_iterate_xattr_buckets(struct inode *inode,
				       u64 blkno,
				       u32 clusters,
				       xattr_bucket_func *func,
				       void *para)
{
	int i, j, ret = 0;
	int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
	u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
	u32 num_buckets = clusters * bpc;
	struct ocfs2_xattr_bucket bucket;

	memset(&bucket, 0, sizeof(bucket));

	mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
	     clusters, blkno);

	for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
		ret = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
					blkno, blk_per_bucket,
					bucket.bhs, OCFS2_BH_CACHED, inode);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}

		bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data;
		/*
		 * The real bucket num in this series of blocks is stored
		 * in the 1st bucket.
		 */
		if (i == 0)
			num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets);

		mlog(0, "iterating xattr bucket %llu\n", blkno);
		if (func) {
			ret = func(inode, &bucket, para);
			if (ret) {
				mlog_errno(ret);
				break;
			}
		}

		for (j = 0; j < blk_per_bucket; j++)
			brelse(bucket.bhs[j]);
		memset(&bucket, 0, sizeof(bucket));
	}

out:
	for (j = 0; j < blk_per_bucket; j++)
		brelse(bucket.bhs[j]);

	return ret;
}

struct ocfs2_xattr_tree_list {
	char *buffer;
	size_t buffer_size;
};

static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
					     struct ocfs2_xattr_header *xh,
					     int index,
					     int *block_off,
					     int *new_offset)
{
	u16 name_offset;

	if (index < 0 || index >= le16_to_cpu(xh->xh_count))
		return -EINVAL;

	name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);

	*block_off = name_offset >> inode->i_sb->s_blocksize_bits;
	*new_offset = name_offset % inode->i_sb->s_blocksize;

	return 0;
}

static int ocfs2_list_xattr_bucket(struct inode *inode,
				   struct ocfs2_xattr_bucket *bucket,
				   void *para)
{
	int ret = 0;
	struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
	size_t size;
	int i, block_off, new_offset;

	for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) {
		struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i];
		struct xattr_handler *handler =
			ocfs2_xattr_handler(ocfs2_xattr_get_type(entry));

		if (handler) {
			ret = ocfs2_xattr_bucket_get_name_value(inode,
								bucket->xh,
								i,
								&block_off,
								&new_offset);
			if (ret)
				break;
			size = handler->list(inode, xl->buffer, xl->buffer_size,
					     bucket->bhs[block_off]->b_data +
					     new_offset,
					     entry->xe_name_len);
			if (xl->buffer) {
				if (size > xl->buffer_size)
					return -ERANGE;
				xl->buffer += size;
			}
			xl->buffer_size -= size;
		}
	}

	return ret;
}

static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
					     struct ocfs2_xattr_tree_root *xt,
					     char *buffer,
					     size_t buffer_size)
{
	struct ocfs2_extent_list *el = &xt->xt_list;
	int ret = 0;
	u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
	u64 p_blkno = 0;
	struct ocfs2_xattr_tree_list xl = {
		.buffer = buffer,
		.buffer_size = buffer_size,
	};

	if (le16_to_cpu(el->l_next_free_rec) == 0)
		return 0;

	while (name_hash > 0) {
		ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
					  &e_cpos, &num_clusters, el);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}

		ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
						  ocfs2_list_xattr_bucket,
						  &xl);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}

		if (e_cpos == 0)
			break;

		name_hash = e_cpos - 1;
	}

	ret = buffer_size - xl.buffer_size;
out:
	return ret;
}

static int cmp_xe(const void *a, const void *b)
{
	const struct ocfs2_xattr_entry *l = a, *r = b;
	u32 l_hash = le32_to_cpu(l->xe_name_hash);
	u32 r_hash = le32_to_cpu(r->xe_name_hash);

	if (l_hash > r_hash)
		return 1;
	if (l_hash < r_hash)
		return -1;
	return 0;
}

static void swap_xe(void *a, void *b, int size)
{
	struct ocfs2_xattr_entry *l = a, *r = b, tmp;

	tmp = *l;
	memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
	memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
}

/*
 * When the ocfs2_xattr_block is filled up, new bucket will be created
 * and all the xattr entries will be moved to the new bucket.
 * Note: we need to sort the entries since they are not saved in order
 * in the ocfs2_xattr_block.
 */
static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
					   struct buffer_head *xb_bh,
					   struct buffer_head *xh_bh,
					   struct buffer_head *data_bh)
{
	int i, blocksize = inode->i_sb->s_blocksize;
	u16 offset, size, off_change;
	struct ocfs2_xattr_entry *xe;
	struct ocfs2_xattr_block *xb =
				(struct ocfs2_xattr_block *)xb_bh->b_data;
	struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
	struct ocfs2_xattr_header *xh =
				(struct ocfs2_xattr_header *)xh_bh->b_data;
	u16 count = le16_to_cpu(xb_xh->xh_count);
	char *target = xh_bh->b_data, *src = xb_bh->b_data;

	mlog(0, "cp xattr from block %llu to bucket %llu\n",
	     (unsigned long long)xb_bh->b_blocknr,
	     (unsigned long long)xh_bh->b_blocknr);

	memset(xh_bh->b_data, 0, blocksize);
	if (data_bh)
		memset(data_bh->b_data, 0, blocksize);
	/*
	 * Since the xe_name_offset is based on ocfs2_xattr_header,
	 * there is a offset change corresponding to the change of
	 * ocfs2_xattr_header's position.
	 */
	off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
	xe = &xb_xh->xh_entries[count - 1];
	offset = le16_to_cpu(xe->xe_name_offset) + off_change;
	size = blocksize - offset;

	/* copy all the names and values. */
	if (data_bh)
		target = data_bh->b_data;
	memcpy(target + offset, src + offset, size);

	/* Init new header now. */
	xh->xh_count = xb_xh->xh_count;
	xh->xh_num_buckets = cpu_to_le16(1);
	xh->xh_name_value_len = cpu_to_le16(size);
	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);

	/* copy all the entries. */
	target = xh_bh->b_data;
	offset = offsetof(struct ocfs2_xattr_header, xh_entries);
	size = count * sizeof(struct ocfs2_xattr_entry);
	memcpy(target + offset, (char *)xb_xh + offset, size);

	/* Change the xe offset for all the xe because of the move. */
	off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
		 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
	for (i = 0; i < count; i++)
		le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);

	mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
	     offset, size, off_change);

	sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
	     cmp_xe, swap_xe);
}

/*
 * After we move xattr from block to index btree, we have to
 * update ocfs2_xattr_search to the new xe and base.
 *
 * When the entry is in xattr block, xattr_bh indicates the storage place.
 * While if the entry is in index b-tree, "bucket" indicates the
 * real place of the xattr.
 */
static int ocfs2_xattr_update_xattr_search(struct inode *inode,
					   struct ocfs2_xattr_search *xs,
					   struct buffer_head *old_bh,
					   struct buffer_head *new_bh)
{
	int ret = 0;
	char *buf = old_bh->b_data;
	struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
	struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
	int i, blocksize = inode->i_sb->s_blocksize;
	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);

	xs->bucket.bhs[0] = new_bh;
	get_bh(new_bh);
	xs->bucket.xh = (struct ocfs2_xattr_header *)xs->bucket.bhs[0]->b_data;
	xs->header = xs->bucket.xh;

	xs->base = new_bh->b_data;
	xs->end = xs->base + inode->i_sb->s_blocksize;

	if (!xs->not_found) {
		if (OCFS2_XATTR_BUCKET_SIZE != blocksize) {
			ret = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
					xs->bucket.bhs[0]->b_blocknr + 1,
					blk_per_bucket - 1, &xs->bucket.bhs[1],
					OCFS2_BH_CACHED, inode);
			if (ret) {
				mlog_errno(ret);
				return ret;
			}

			i = xs->here - old_xh->xh_entries;
			xs->here = &xs->header->xh_entries[i];
		}
	}

	return ret;
}

static int ocfs2_xattr_create_index_block(struct inode *inode,
					  struct ocfs2_xattr_search *xs)
{
	int ret, credits = OCFS2_SUBALLOC_ALLOC;
	u32 bit_off, len;
	u64 blkno;
	handle_t *handle;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	struct ocfs2_inode_info *oi = OCFS2_I(inode);
	struct ocfs2_alloc_context *data_ac;
	struct buffer_head *xh_bh = NULL, *data_bh = NULL;
	struct buffer_head *xb_bh = xs->xattr_bh;
	struct ocfs2_xattr_block *xb =
			(struct ocfs2_xattr_block *)xb_bh->b_data;
	struct ocfs2_xattr_tree_root *xr;
	u16 xb_flags = le16_to_cpu(xb->xb_flags);
	u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb);

	mlog(0, "create xattr index block for %llu\n",
	     (unsigned long long)xb_bh->b_blocknr);

	BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);

	ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	/*
	 * XXX:
	 * We can use this lock for now, and maybe move to a dedicated mutex
	 * if performance becomes a problem later.
	 */
	down_write(&oi->ip_alloc_sem);

	/*
	 * 3 more credits, one for xattr block update, one for the 1st block
	 * of the new xattr bucket and one for the value/data.
	 */
	credits += 3;
	handle = ocfs2_start_trans(osb, credits);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		mlog_errno(ret);
		goto out_sem;
	}

	ret = ocfs2_journal_access(handle, inode, xb_bh,
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	/*
	 * The bucket may spread in many blocks, and
	 * we will only touch the 1st block and the last block
	 * in the whole bucket(one for entry and one for data).
	 */
	blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);

	mlog(0, "allocate 1 cluster from %llu to xattr block\n", blkno);

	xh_bh = sb_getblk(inode->i_sb, blkno);
	if (!xh_bh) {
		ret = -EIO;
		mlog_errno(ret);
		goto out_commit;
	}

	ocfs2_set_new_buffer_uptodate(inode, xh_bh);

	ret = ocfs2_journal_access(handle, inode, xh_bh,
				   OCFS2_JOURNAL_ACCESS_CREATE);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	if (bpb > 1) {
		data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1);
		if (!data_bh) {
			ret = -EIO;
			mlog_errno(ret);
			goto out_commit;
		}

		ocfs2_set_new_buffer_uptodate(inode, data_bh);

		ret = ocfs2_journal_access(handle, inode, data_bh,
					   OCFS2_JOURNAL_ACCESS_CREATE);
		if (ret) {
			mlog_errno(ret);
			goto out_commit;
		}
	}

	ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh);

	ocfs2_journal_dirty(handle, xh_bh);
	if (data_bh)
		ocfs2_journal_dirty(handle, data_bh);

	ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh);

	/* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
	memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
	       offsetof(struct ocfs2_xattr_block, xb_attrs));

	xr = &xb->xb_attrs.xb_root;
	xr->xt_clusters = cpu_to_le32(1);
	xr->xt_last_eb_blk = 0;
	xr->xt_list.l_tree_depth = 0;
	xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
	xr->xt_list.l_next_free_rec = cpu_to_le16(1);

	xr->xt_list.l_recs[0].e_cpos = 0;
	xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
	xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);

	xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);

	ret = ocfs2_journal_dirty(handle, xb_bh);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

out_commit:
	ocfs2_commit_trans(osb, handle);

out_sem:
	up_write(&oi->ip_alloc_sem);

out:
	if (data_ac)
		ocfs2_free_alloc_context(data_ac);

	brelse(xh_bh);
	brelse(data_bh);

	return ret;
}

static int cmp_xe_offset(const void *a, const void *b)
{
	const struct ocfs2_xattr_entry *l = a, *r = b;
	u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
	u32 r_name_offset = le16_to_cpu(r->xe_name_offset);

	if (l_name_offset < r_name_offset)
		return 1;
	if (l_name_offset > r_name_offset)
		return -1;
	return 0;
}

/*
 * defrag a xattr bucket if we find that the bucket has some
 * holes beteen name/value pairs.
 * We will move all the name/value pairs to the end of the bucket
 * so that we can spare some space for insertion.
 */
static int ocfs2_defrag_xattr_bucket(struct inode *inode,
				     struct ocfs2_xattr_bucket *bucket)
{
	int ret, i;
	size_t end, offset, len, value_len;
	struct ocfs2_xattr_header *xh;
	char *entries, *buf, *bucket_buf = NULL;
	u64 blkno = bucket->bhs[0]->b_blocknr;
	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
	u16 xh_free_start;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	size_t blocksize = inode->i_sb->s_blocksize;
	handle_t *handle;
	struct buffer_head **bhs;
	struct ocfs2_xattr_entry *xe;

	bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
			GFP_NOFS);
	if (!bhs)
		return -ENOMEM;

	ret = ocfs2_read_blocks(osb, blkno, blk_per_bucket, bhs,
				OCFS2_BH_CACHED, inode);
	if (ret)
		goto out;

	/*
	 * In order to make the operation more efficient and generic,
	 * we copy all the blocks into a contiguous memory and do the
	 * defragment there, so if anything is error, we will not touch
	 * the real block.
	 */
	bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
	if (!bucket_buf) {
		ret = -EIO;
		goto out;
	}

	buf = bucket_buf;
	for (i = 0; i < blk_per_bucket; i++, buf += blocksize)
		memcpy(buf, bhs[i]->b_data, blocksize);

	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		handle = NULL;
		mlog_errno(ret);
		goto out;
	}

	for (i = 0; i < blk_per_bucket; i++) {
		ret = ocfs2_journal_access(handle, inode, bhs[i],
					   OCFS2_JOURNAL_ACCESS_WRITE);
		if (ret < 0) {
			mlog_errno(ret);
			goto commit;
		}
	}

	xh = (struct ocfs2_xattr_header *)bucket_buf;
	entries = (char *)xh->xh_entries;
	xh_free_start = le16_to_cpu(xh->xh_free_start);

	mlog(0, "adjust xattr bucket in %llu, count = %u, "
	     "xh_free_start = %u, xh_name_value_len = %u.\n",
	     blkno, le16_to_cpu(xh->xh_count), xh_free_start,
	     le16_to_cpu(xh->xh_name_value_len));

	/*
	 * sort all the entries by their offset.
	 * the largest will be the first, so that we can
	 * move them to the end one by one.
	 */
	sort(entries, le16_to_cpu(xh->xh_count),
	     sizeof(struct ocfs2_xattr_entry),
	     cmp_xe_offset, swap_xe);

	/* Move all name/values to the end of the bucket. */
	xe = xh->xh_entries;
	end = OCFS2_XATTR_BUCKET_SIZE;
	for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
		offset = le16_to_cpu(xe->xe_name_offset);
		if (ocfs2_xattr_is_local(xe))
			value_len = OCFS2_XATTR_SIZE(
					le64_to_cpu(xe->xe_value_size));
		else
			value_len = OCFS2_XATTR_ROOT_SIZE;
		len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;

		/*
		 * We must make sure that the name/value pair
		 * exist in the same block. So adjust end to
		 * the previous block end if needed.
		 */
		if (((end - len) / blocksize !=
			(end - 1) / blocksize))
			end = end - end % blocksize;

		if (end > offset + len) {
			memmove(bucket_buf + end - len,
				bucket_buf + offset, len);
			xe->xe_name_offset = cpu_to_le16(end - len);
		}

		mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
				"bucket %llu\n", (unsigned long long)blkno);

		end -= len;
	}

	mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
			"bucket %llu\n", (unsigned long long)blkno);

	if (xh_free_start == end)
		goto commit;

	memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
	xh->xh_free_start = cpu_to_le16(end);

	/* sort the entries by their name_hash. */
	sort(entries, le16_to_cpu(xh->xh_count),
	     sizeof(struct ocfs2_xattr_entry),
	     cmp_xe, swap_xe);

	buf = bucket_buf;
	for (i = 0; i < blk_per_bucket; i++, buf += blocksize) {
		memcpy(bhs[i]->b_data, buf, blocksize);
		ocfs2_journal_dirty(handle, bhs[i]);
	}

commit:
	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
out:

	if (bhs) {
		for (i = 0; i < blk_per_bucket; i++)
			brelse(bhs[i]);
	}
	kfree(bhs);

	kfree(bucket_buf);
	return ret;
}

/*
 * Move half nums of the xattr bucket in the previous cluster to this new
 * cluster. We only touch the last cluster of the previous extend record.
 *
 * first_bh is the first buffer_head of a series of bucket in the same
 * extent rec and header_bh is the header of one bucket in this cluster.
 * They will be updated if we move the data header_bh contains to the new
 * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster.
 */
static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
					       handle_t *handle,
					       struct buffer_head **first_bh,
					       struct buffer_head **header_bh,
					       u64 new_blkno,
					       u64 prev_blkno,
					       u32 num_clusters,
					       u32 *first_hash)
{
	int i, ret, credits;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
	int blocksize = inode->i_sb->s_blocksize;
	struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL;
	struct ocfs2_xattr_header *new_xh;
	struct ocfs2_xattr_header *xh =
			(struct ocfs2_xattr_header *)((*first_bh)->b_data);

	BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
	BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);

	prev_bh = *first_bh;
	get_bh(prev_bh);
	xh = (struct ocfs2_xattr_header *)prev_bh->b_data;

	prev_blkno += (num_clusters - 1) * bpc + bpc / 2;

	mlog(0, "move half of xattrs in cluster %llu to %llu\n",
	     prev_blkno, new_blkno);

	/*
	 * We need to update the 1st half of the new cluster and
	 * 1 more for the update of the 1st bucket of the previous
	 * extent record.
	 */
	credits = bpc / 2 + 1;
	ret = ocfs2_extend_trans(handle, credits);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	ret = ocfs2_journal_access(handle, inode, prev_bh,
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) {
		old_bh = new_bh = NULL;
		new_bh = sb_getblk(inode->i_sb, new_blkno);
		if (!new_bh) {
			ret = -EIO;
			mlog_errno(ret);
			goto out;
		}

		ocfs2_set_new_buffer_uptodate(inode, new_bh);

		ret = ocfs2_journal_access(handle, inode, new_bh,
					   OCFS2_JOURNAL_ACCESS_CREATE);
		if (ret < 0) {
			mlog_errno(ret);
			brelse(new_bh);
			goto out;
		}

		ret = ocfs2_read_block(osb, prev_blkno,
				       &old_bh, OCFS2_BH_CACHED, inode);
		if (ret < 0) {
			mlog_errno(ret);
			brelse(new_bh);
			goto out;
		}

		memcpy(new_bh->b_data, old_bh->b_data, blocksize);

		if (i == 0) {
			new_xh = (struct ocfs2_xattr_header *)new_bh->b_data;
			new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2);

			if (first_hash)
				*first_hash = le32_to_cpu(
					new_xh->xh_entries[0].xe_name_hash);
			new_first_bh = new_bh;
			get_bh(new_first_bh);
		}

		ocfs2_journal_dirty(handle, new_bh);

		if (*header_bh == old_bh) {
			brelse(*header_bh);
			*header_bh = new_bh;
			get_bh(*header_bh);

			brelse(*first_bh);
			*first_bh = new_first_bh;
			get_bh(*first_bh);
		}
		brelse(new_bh);
		brelse(old_bh);
	}

	le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2));

	ocfs2_journal_dirty(handle, prev_bh);
out:
	brelse(prev_bh);
	brelse(new_first_bh);
	return ret;
}

static int ocfs2_read_xattr_bucket(struct inode *inode,
				   u64 blkno,
				   struct buffer_head **bhs,
				   int new)
{
	int ret = 0;
	u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);

	if (!new)
		return ocfs2_read_blocks(OCFS2_SB(inode->i_sb), blkno,
					 blk_per_bucket, bhs,
					 OCFS2_BH_CACHED, inode);

	for (i = 0; i < blk_per_bucket; i++) {
		bhs[i] = sb_getblk(inode->i_sb, blkno + i);
		if (bhs[i] == NULL) {
			ret = -EIO;
			mlog_errno(ret);
			break;
		}
		ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
	}

	return ret;
}

/*
 * Move half num of the xattrs in old bucket(blk) to new bucket(new_blk).
 * first_hash will record the 1st hash of the new bucket.
 */
static int ocfs2_half_xattr_bucket(struct inode *inode,
				   handle_t *handle,
				   u64 blk,
				   u64 new_blk,
				   u32 *first_hash,
				   int new_bucket_head)
{
	int ret, i;
	u16 count, start, len, name_value_len, xe_len, name_offset;
	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
	struct buffer_head **s_bhs, **t_bhs = NULL;
	struct ocfs2_xattr_header *xh;
	struct ocfs2_xattr_entry *xe;
	int blocksize = inode->i_sb->s_blocksize;

	mlog(0, "move half of xattrs from bucket %llu to %llu\n",
	     blk, new_blk);

	s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
	if (!s_bhs)
		return -ENOMEM;

	ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	ret = ocfs2_journal_access(handle, inode, s_bhs[0],
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
	if (!t_bhs) {
		ret = -ENOMEM;
		goto out;
	}

	ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	for (i = 0; i < blk_per_bucket; i++) {
		ret = ocfs2_journal_access(handle, inode, t_bhs[i],
					   OCFS2_JOURNAL_ACCESS_CREATE);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}
	}

	/* copy the whole bucket to the new first. */
	for (i = 0; i < blk_per_bucket; i++)
		memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);

	/* update the new bucket. */
	xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
	count = le16_to_cpu(xh->xh_count);
	start = count / 2;

	/*
	 * Calculate the total name/value len and xh_free_start for
	 * the old bucket first.
	 */
	name_offset = OCFS2_XATTR_BUCKET_SIZE;
	name_value_len = 0;
	for (i = 0; i < start; i++) {
		xe = &xh->xh_entries[i];
		xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
		if (ocfs2_xattr_is_local(xe))
			xe_len +=
			   OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
		else
			xe_len += OCFS2_XATTR_ROOT_SIZE;
		name_value_len += xe_len;
		if (le16_to_cpu(xe->xe_name_offset) < name_offset)
			name_offset = le16_to_cpu(xe->xe_name_offset);
	}

	/*
	 * Now begin the modification to the new bucket.
	 *
	 * In the new bucket, We just move the xattr entry to the beginning
	 * and don't touch the name/value. So there will be some holes in the
	 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
	 * called.
	 */
	xe = &xh->xh_entries[start];
	len = sizeof(struct ocfs2_xattr_entry) * (count - start);
	mlog(0, "mv xattr entry len %d from %d to %d\n", len,
	     (int)((char *)xe - (char *)xh),
	     (int)((char *)xh->xh_entries - (char *)xh));
	memmove((char *)xh->xh_entries, (char *)xe, len);
	xe = &xh->xh_entries[count - start];
	len = sizeof(struct ocfs2_xattr_entry) * start;
	memset((char *)xe, 0, len);

	le16_add_cpu(&xh->xh_count, -start);
	le16_add_cpu(&xh->xh_name_value_len, -name_value_len);

	/* Calculate xh_free_start for the new bucket. */
	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
		xe = &xh->xh_entries[i];
		xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
		if (ocfs2_xattr_is_local(xe))
			xe_len +=
			   OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
		else
			xe_len += OCFS2_XATTR_ROOT_SIZE;
		if (le16_to_cpu(xe->xe_name_offset) <
		    le16_to_cpu(xh->xh_free_start))
			xh->xh_free_start = xe->xe_name_offset;
	}

	/* set xh->xh_num_buckets for the new xh. */
	if (new_bucket_head)
		xh->xh_num_buckets = cpu_to_le16(1);
	else
		xh->xh_num_buckets = 0;

	for (i = 0; i < blk_per_bucket; i++) {
		ocfs2_journal_dirty(handle, t_bhs[i]);
		if (ret)
			mlog_errno(ret);
	}

	/* store the first_hash of the new bucket. */
	if (first_hash)
		*first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);

	/*
	 * Now only update the 1st block of the old bucket.
	 * Please note that the entry has been sorted already above.
	 */
	xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
	memset(&xh->xh_entries[start], 0,
	       sizeof(struct ocfs2_xattr_entry) * (count - start));
	xh->xh_count = cpu_to_le16(start);
	xh->xh_free_start = cpu_to_le16(name_offset);
	xh->xh_name_value_len = cpu_to_le16(name_value_len);

	ocfs2_journal_dirty(handle, s_bhs[0]);
	if (ret)
		mlog_errno(ret);

out:
	if (s_bhs) {
		for (i = 0; i < blk_per_bucket; i++)
			brelse(s_bhs[i]);
	}
	kfree(s_bhs);

	if (t_bhs) {
		for (i = 0; i < blk_per_bucket; i++)
			brelse(t_bhs[i]);
	}
	kfree(t_bhs);

	return ret;
}

/*
 * Copy xattr from one bucket to another bucket.
 *
 * The caller must make sure that the journal transaction
 * has enough space for journaling.
 */
static int ocfs2_cp_xattr_bucket(struct inode *inode,
				 handle_t *handle,
				 u64 s_blkno,
				 u64 t_blkno,
				 int t_is_new)
{
	int ret, i;
	int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
	int blocksize = inode->i_sb->s_blocksize;
	struct buffer_head **s_bhs, **t_bhs = NULL;

	BUG_ON(s_blkno == t_blkno);

	mlog(0, "cp bucket %llu to %llu, target is %d\n",
	     s_blkno, t_blkno, t_is_new);

	s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
			GFP_NOFS);
	if (!s_bhs)
		return -ENOMEM;

	ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0);
	if (ret)
		goto out;

	t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
			GFP_NOFS);
	if (!t_bhs) {
		ret = -ENOMEM;
		goto out;
	}

	ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new);
	if (ret)
		goto out;

	for (i = 0; i < blk_per_bucket; i++) {
		ret = ocfs2_journal_access(handle, inode, t_bhs[i],
					   OCFS2_JOURNAL_ACCESS_WRITE);
		if (ret)
			goto out;
	}

	for (i = 0; i < blk_per_bucket; i++) {
		memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
		ocfs2_journal_dirty(handle, t_bhs[i]);
	}

out:
	if (s_bhs) {
		for (i = 0; i < blk_per_bucket; i++)
			brelse(s_bhs[i]);
	}
	kfree(s_bhs);

	if (t_bhs) {
		for (i = 0; i < blk_per_bucket; i++)
			brelse(t_bhs[i]);
	}
	kfree(t_bhs);

	return ret;
}

/*
 * Copy one xattr cluster from src_blk to to_blk.
 * The to_blk will become the first bucket header of the cluster, so its
 * xh_num_buckets will be initialized as the bucket num in the cluster.
 */
static int ocfs2_cp_xattr_cluster(struct inode *inode,
				  handle_t *handle,
				  struct buffer_head *first_bh,
				  u64 src_blk,
				  u64 to_blk,
				  u32 *first_hash)
{
	int i, ret, credits;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
	struct buffer_head *bh = NULL;
	struct ocfs2_xattr_header *xh;
	u64 to_blk_start = to_blk;

	mlog(0, "cp xattrs from cluster %llu to %llu\n", src_blk, to_blk);

	/*
	 * We need to update the new cluster and 1 more for the update of
	 * the 1st bucket of the previous extent rec.
	 */
	credits = bpc + 1;
	ret = ocfs2_extend_trans(handle, credits);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	ret = ocfs2_journal_access(handle, inode, first_bh,
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	for (i = 0; i < num_buckets; i++) {
		ret = ocfs2_cp_xattr_bucket(inode, handle,
					    src_blk, to_blk, 1);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}

		src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
		to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
	}

	/* update the old bucket header. */
	xh = (struct ocfs2_xattr_header *)first_bh->b_data;
	le16_add_cpu(&xh->xh_num_buckets, -num_buckets);

	ocfs2_journal_dirty(handle, first_bh);

	/* update the new bucket header. */
	ret = ocfs2_read_block(osb, to_blk_start, &bh, OCFS2_BH_CACHED, inode);
	if (ret < 0) {
		mlog_errno(ret);
		goto out;
	}

	ret = ocfs2_journal_access(handle, inode, bh,
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	xh = (struct ocfs2_xattr_header *)bh->b_data;
	xh->xh_num_buckets = cpu_to_le16(num_buckets);

	ocfs2_journal_dirty(handle, bh);

	if (first_hash)
		*first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
out:
	brelse(bh);
	return ret;
}

/*
 * Move half of the xattrs in this cluster to the new cluster.
 * This function should only be called when bucket size == cluster size.
 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
 */
static int ocfs2_half_xattr_cluster(struct inode *inode,
				    handle_t *handle,
				    u64 prev_blk,
				    u64 new_blk,
				    u32 *first_hash)
{
	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
	int ret, credits = 2 * blk_per_bucket;

	BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);

	ret = ocfs2_extend_trans(handle, credits);
	if (ret) {
		mlog_errno(ret);
		return ret;
	}

	/* Move half of the xattr in start_blk to the next bucket. */
	return  ocfs2_half_xattr_bucket(inode, handle, prev_blk,
					new_blk, first_hash, 1);
}

/*
 * Move some xattrs from the old cluster to the new one since they are not
 * contiguous in ocfs2 xattr tree.
 *
 * new_blk starts a new separate cluster, and we will move some xattrs from
 * prev_blk to it. v_start will be set as the first name hash value in this
 * new cluster so that it can be used as e_cpos during tree insertion and
 * don't collide with our original b-tree operations. first_bh and header_bh
 * will also be updated since they will be used in ocfs2_extend_xattr_bucket
 * to extend the insert bucket.
 *
 * The problem is how much xattr should we move to the new one and when should
 * we update first_bh and header_bh?
 * 1. If cluster size > bucket size, that means the previous cluster has more
 *    than 1 bucket, so just move half nums of bucket into the new cluster and
 *    update the first_bh and header_bh if the insert bucket has been moved
 *    to the new cluster.
 * 2. If cluster_size == bucket_size:
 *    a) If the previous extent rec has more than one cluster and the insert
 *       place isn't in the last cluster, copy the entire last cluster to the
 *       new one. This time, we don't need to upate the first_bh and header_bh
 *       since they will not be moved into the new cluster.
 *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
 *       the new one. And we set the extend flag to zero if the insert place is
 *       moved into the new allocated cluster since no extend is needed.
 */
static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
					    handle_t *handle,
					    struct buffer_head **first_bh,
					    struct buffer_head **header_bh,
					    u64 new_blk,
					    u64 prev_blk,
					    u32 prev_clusters,
					    u32 *v_start,
					    int *extend)
{
	int ret = 0;
	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);

	mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
	     prev_blk, prev_clusters, new_blk);

	if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1)
		ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
							  handle,
							  first_bh,
							  header_bh,
							  new_blk,
							  prev_blk,
							  prev_clusters,
							  v_start);
	else {
		u64 last_blk = prev_blk + bpc * (prev_clusters - 1);

		if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk)
			ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh,
						     last_blk, new_blk,
						     v_start);
		else {
			ret = ocfs2_half_xattr_cluster(inode, handle,
						       last_blk, new_blk,
						       v_start);

			if ((*header_bh)->b_blocknr == last_blk && extend)
				*extend = 0;
		}
	}

	return ret;
}

/*
 * Add a new cluster for xattr storage.
 *
 * If the new cluster is contiguous with the previous one, it will be
 * appended to the same extent record, and num_clusters will be updated.
 * If not, we will insert a new extent for it and move some xattrs in
 * the last cluster into the new allocated one.
 * We also need to limit the maximum size of a btree leaf, otherwise we'll
 * lose the benefits of hashing because we'll have to search large leaves.
 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
 * if it's bigger).
 *
 * first_bh is the first block of the previous extent rec and header_bh
 * indicates the bucket we will insert the new xattrs. They will be updated
 * when the header_bh is moved into the new cluster.
 */
static int ocfs2_add_new_xattr_cluster(struct inode *inode,
				       struct buffer_head *root_bh,
				       struct buffer_head **first_bh,
				       struct buffer_head **header_bh,
				       u32 *num_clusters,
				       u32 prev_cpos,
				       u64 prev_blkno,
				       int *extend)
{
	int ret, credits;
	u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
	u32 prev_clusters = *num_clusters;
	u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
	u64 block;
	handle_t *handle = NULL;
	struct ocfs2_alloc_context *data_ac = NULL;
	struct ocfs2_alloc_context *meta_ac = NULL;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	struct ocfs2_extent_tree et;

	mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
	     "previous xattr blkno = %llu\n",
	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
	     prev_cpos, prev_blkno);

	ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);

	ret = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
				    &data_ac, &meta_ac);
	if (ret) {
		mlog_errno(ret);
		goto leave;
	}

	credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
					    clusters_to_add);
	handle = ocfs2_start_trans(osb, credits);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		handle = NULL;
		mlog_errno(ret);
		goto leave;
	}

	ret = ocfs2_journal_access(handle, inode, root_bh,
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret < 0) {
		mlog_errno(ret);
		goto leave;
	}

	ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
				     clusters_to_add, &bit_off, &num_bits);
	if (ret < 0) {
		if (ret != -ENOSPC)
			mlog_errno(ret);
		goto leave;
	}

	BUG_ON(num_bits > clusters_to_add);

	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
	mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);

	if (prev_blkno + prev_clusters * bpc == block &&
	    (prev_clusters + num_bits) << osb->s_clustersize_bits <=
	     OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
		/*
		 * If this cluster is contiguous with the old one and
		 * adding this new cluster, we don't surpass the limit of
		 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
		 * initialized and used like other buckets in the previous
		 * cluster.
		 * So add it as a contiguous one. The caller will handle
		 * its init process.
		 */
		v_start = prev_cpos + prev_clusters;
		*num_clusters = prev_clusters + num_bits;
		mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
		     num_bits);
	} else {
		ret = ocfs2_adjust_xattr_cross_cluster(inode,
						       handle,
						       first_bh,
						       header_bh,
						       block,
						       prev_blkno,
						       prev_clusters,
						       &v_start,
						       extend);
		if (ret) {
			mlog_errno(ret);
			goto leave;
		}
	}

	if (handle->h_buffer_credits < credits) {
		/*
		 * The journal has been restarted before, and don't
		 * have enough space for the insertion, so extend it
		 * here.
		 */
		ret = ocfs2_extend_trans(handle, credits);
		if (ret) {
			mlog_errno(ret);
			goto leave;
		}
	}
	mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
	     num_bits, block, v_start);
	ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
				  num_bits, 0, meta_ac);
	if (ret < 0) {
		mlog_errno(ret);
		goto leave;
	}

	ret = ocfs2_journal_dirty(handle, root_bh);
	if (ret < 0) {
		mlog_errno(ret);
		goto leave;
	}

leave:
	if (handle)
		ocfs2_commit_trans(osb, handle);
	if (data_ac)
		ocfs2_free_alloc_context(data_ac);
	if (meta_ac)
		ocfs2_free_alloc_context(meta_ac);

	return ret;
}

/*
 * Extend a new xattr bucket and move xattrs to the end one by one until
 * We meet with start_bh. Only move half of the xattrs to the bucket after it.
 */
static int ocfs2_extend_xattr_bucket(struct inode *inode,
				     struct buffer_head *first_bh,
				     struct buffer_head *start_bh,
				     u32 num_clusters)
{
	int ret, credits;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
	u64 start_blk = start_bh->b_blocknr, end_blk;
	u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb);
	handle_t *handle;
	struct ocfs2_xattr_header *first_xh =
				(struct ocfs2_xattr_header *)first_bh->b_data;
	u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);

	mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
	     "from %llu, len = %u\n", start_blk,
	     (unsigned long long)first_bh->b_blocknr, num_clusters);

	BUG_ON(bucket >= num_buckets);

	end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket;

	/*
	 * We will touch all the buckets after the start_bh(include it).
	 * Add one more bucket and modify the first_bh.
	 */
	credits = end_blk - start_blk + 2 * blk_per_bucket + 1;
	handle = ocfs2_start_trans(osb, credits);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		handle = NULL;
		mlog_errno(ret);
		goto out;
	}

	ret = ocfs2_journal_access(handle, inode, first_bh,
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
		mlog_errno(ret);
		goto commit;
	}

	while (end_blk != start_blk) {
		ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
					    end_blk + blk_per_bucket, 0);
		if (ret)
			goto commit;
		end_blk -= blk_per_bucket;
	}

	/* Move half of the xattr in start_blk to the next bucket. */
	ret = ocfs2_half_xattr_bucket(inode, handle, start_blk,
				      start_blk + blk_per_bucket, NULL, 0);

	le16_add_cpu(&first_xh->xh_num_buckets, 1);
	ocfs2_journal_dirty(handle, first_bh);

commit:
	ocfs2_commit_trans(osb, handle);
out:
	return ret;
}

/*
 * Add new xattr bucket in an extent record and adjust the buckets accordingly.
 * xb_bh is the ocfs2_xattr_block.
 * We will move all the buckets starting from header_bh to the next place. As
 * for this one, half num of its xattrs will be moved to the next one.
 *
 * We will allocate a new cluster if current cluster is full and adjust
 * header_bh and first_bh if the insert place is moved to the new cluster.
 */
static int ocfs2_add_new_xattr_bucket(struct inode *inode,
				      struct buffer_head *xb_bh,
				      struct buffer_head *header_bh)
{
	struct ocfs2_xattr_header *first_xh = NULL;
	struct buffer_head *first_bh = NULL;
	struct ocfs2_xattr_block *xb =
			(struct ocfs2_xattr_block *)xb_bh->b_data;
	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
	struct ocfs2_extent_list *el = &xb_root->xt_list;
	struct ocfs2_xattr_header *xh =
			(struct ocfs2_xattr_header *)header_bh->b_data;
	u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
	struct super_block *sb = inode->i_sb;
	struct ocfs2_super *osb = OCFS2_SB(sb);
	int ret, num_buckets, extend = 1;
	u64 p_blkno;
	u32 e_cpos, num_clusters;

	mlog(0, "Add new xattr bucket starting form %llu\n",
	     (unsigned long long)header_bh->b_blocknr);

	/*
	 * Add refrence for header_bh here because it may be
	 * changed in ocfs2_add_new_xattr_cluster and we need
	 * to free it in the end.
	 */
	get_bh(header_bh);

	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
				  &num_clusters, el);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	ret = ocfs2_read_block(osb, p_blkno,
			       &first_bh, OCFS2_BH_CACHED, inode);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
	first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;

	if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) {
		ret = ocfs2_add_new_xattr_cluster(inode,
						  xb_bh,
						  &first_bh,
						  &header_bh,
						  &num_clusters,
						  e_cpos,
						  p_blkno,
						  &extend);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}
	}

	if (extend)
		ret = ocfs2_extend_xattr_bucket(inode,
						first_bh,
						header_bh,
						num_clusters);
	if (ret)
		mlog_errno(ret);
out:
	brelse(first_bh);
	brelse(header_bh);
	return ret;
}

static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
					struct ocfs2_xattr_bucket *bucket,
					int offs)
{
	int block_off = offs >> inode->i_sb->s_blocksize_bits;

	offs = offs % inode->i_sb->s_blocksize;
	return bucket->bhs[block_off]->b_data + offs;
}

/*
 * Handle the normal xattr set, including replace, delete and new.
 * When the bucket is empty, "is_empty" is set and the caller can
 * free this bucket.
 *
 * Note: "local" indicates the real data's locality. So we can't
 * just its bucket locality by its length.
 */
static void ocfs2_xattr_set_entry_normal(struct inode *inode,
					 struct ocfs2_xattr_info *xi,
					 struct ocfs2_xattr_search *xs,
					 u32 name_hash,
					 int local,
					 int *is_empty)
{
	struct ocfs2_xattr_entry *last, *xe;
	int name_len = strlen(xi->name);
	struct ocfs2_xattr_header *xh = xs->header;
	u16 count = le16_to_cpu(xh->xh_count), start;
	size_t blocksize = inode->i_sb->s_blocksize;
	char *val;
	size_t offs, size, new_size;

	last = &xh->xh_entries[count];
	if (!xs->not_found) {
		xe = xs->here;
		offs = le16_to_cpu(xe->xe_name_offset);
		if (ocfs2_xattr_is_local(xe))
			size = OCFS2_XATTR_SIZE(name_len) +
			OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
		else
			size = OCFS2_XATTR_SIZE(name_len) +
			OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);

		/*
		 * If the new value will be stored outside, xi->value has been
		 * initalized as an empty ocfs2_xattr_value_root, and the same
		 * goes with xi->value_len, so we can set new_size safely here.
		 * See ocfs2_xattr_set_in_bucket.
		 */
		new_size = OCFS2_XATTR_SIZE(name_len) +
			   OCFS2_XATTR_SIZE(xi->value_len);

		le16_add_cpu(&xh->xh_name_value_len, -size);
		if (xi->value) {
			if (new_size > size)
				goto set_new_name_value;

			/* Now replace the old value with new one. */
			if (local)
				xe->xe_value_size = cpu_to_le64(xi->value_len);
			else
				xe->xe_value_size = 0;

			val = ocfs2_xattr_bucket_get_val(inode,
							 &xs->bucket, offs);
			memset(val + OCFS2_XATTR_SIZE(name_len), 0,
			       size - OCFS2_XATTR_SIZE(name_len));
			if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
				memcpy(val + OCFS2_XATTR_SIZE(name_len),
				       xi->value, xi->value_len);

			le16_add_cpu(&xh->xh_name_value_len, new_size);
			ocfs2_xattr_set_local(xe, local);
			return;
		} else {
			/* Remove the old entry. */
			last -= 1;
			memmove(xe, xe + 1,
				(void *)last - (void *)xe);
			memset(last, 0, sizeof(struct ocfs2_xattr_entry));
			le16_add_cpu(&xh->xh_count, -1);
			if (xh->xh_count == 0 && is_empty)
				*is_empty = 1;
			return;
		}
	} else {
		/* find a new entry for insert. */
		int low = 0, high = count - 1, tmp;
		struct ocfs2_xattr_entry *tmp_xe;

		while (low <= high) {
			tmp = (low + high) / 2;
			tmp_xe = &xh->xh_entries[tmp];

			if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
				low = tmp + 1;
			else if (name_hash <
				 le32_to_cpu(tmp_xe->xe_name_hash))
				high = tmp - 1;
			else {
				low = tmp;
				break;
			}
		}

		xe = &xh->xh_entries[low];
		if (low != count)
			memmove(xe + 1, xe, (void *)last - (void *)xe);

		le16_add_cpu(&xh->xh_count, 1);
		memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
		xe->xe_name_hash = cpu_to_le32(name_hash);
		xe->xe_name_len = name_len;
		ocfs2_xattr_set_type(xe, xi->name_index);
	}

set_new_name_value:
	/* Insert the new name+value. */
	size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);

	/*
	 * We must make sure that the name/value pair
	 * exists in the same block.
	 */
	offs = le16_to_cpu(xh->xh_free_start);
	start = offs - size;

	if (start >> inode->i_sb->s_blocksize_bits !=
	    (offs - 1) >> inode->i_sb->s_blocksize_bits) {
		offs = offs - offs % blocksize;
		xh->xh_free_start = cpu_to_le16(offs);
	}

	val = ocfs2_xattr_bucket_get_val(inode,
					 &xs->bucket, offs - size);
	xe->xe_name_offset = cpu_to_le16(offs - size);

	memset(val, 0, size);
	memcpy(val, xi->name, name_len);
	memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);

	xe->xe_value_size = cpu_to_le64(xi->value_len);
	ocfs2_xattr_set_local(xe, local);
	xs->here = xe;
	le16_add_cpu(&xh->xh_free_start, -size);
	le16_add_cpu(&xh->xh_name_value_len, size);

	return;
}

static int ocfs2_xattr_bucket_handle_journal(struct inode *inode,
					     handle_t *handle,
					     struct ocfs2_xattr_search *xs,
					     struct buffer_head **bhs,
					     u16 bh_num)
{
	int ret = 0, off, block_off;
	struct ocfs2_xattr_entry *xe = xs->here;

	/*
	 * First calculate all the blocks we should journal_access
	 * and journal_dirty. The first block should always be touched.
	 */
	ret = ocfs2_journal_dirty(handle, bhs[0]);
	if (ret)
		mlog_errno(ret);

	/* calc the data. */
	off = le16_to_cpu(xe->xe_name_offset);
	block_off = off >> inode->i_sb->s_blocksize_bits;
	ret = ocfs2_journal_dirty(handle, bhs[block_off]);
	if (ret)
		mlog_errno(ret);

	return ret;
}

/*
 * Set the xattr entry in the specified bucket.
 * The bucket is indicated by xs->bucket and it should have the enough
 * space for the xattr insertion.
 */
static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
					   struct ocfs2_xattr_info *xi,
					   struct ocfs2_xattr_search *xs,
					   u32 name_hash,
					   int local,
					   int *bucket_empty)
{
	int i, ret;
	handle_t *handle = NULL;
	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

	mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
	     (unsigned long)xi->value_len, xi->name_index,
	     (unsigned long long)xs->bucket.bhs[0]->b_blocknr);

	if (!xs->bucket.bhs[1]) {
		ret = ocfs2_read_blocks(osb,
					xs->bucket.bhs[0]->b_blocknr + 1,
					blk_per_bucket - 1, &xs->bucket.bhs[1],
					OCFS2_BH_CACHED, inode);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}
	}

	handle = ocfs2_start_trans(osb, blk_per_bucket);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		handle = NULL;
		mlog_errno(ret);
		goto out;
	}

	for (i = 0; i < blk_per_bucket; i++) {
		ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[i],
					   OCFS2_JOURNAL_ACCESS_WRITE);
		if (ret < 0) {
			mlog_errno(ret);
			goto out;
		}
	}

	ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash,
				     local, bucket_empty);

	/*Only dirty the blocks we have touched in set xattr. */
	ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs,
						xs->bucket.bhs, blk_per_bucket);
	if (ret)
		mlog_errno(ret);
out:
	ocfs2_commit_trans(osb, handle);

	return ret;
}

static int ocfs2_xattr_value_update_size(struct inode *inode,
					 struct buffer_head *xe_bh,
					 struct ocfs2_xattr_entry *xe,
					 u64 new_size)
{
	int ret;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	handle_t *handle = NULL;

	handle = ocfs2_start_trans(osb, 1);
	if (handle == NULL) {
		ret = -ENOMEM;
		mlog_errno(ret);
		goto out;
	}

	ret = ocfs2_journal_access(handle, inode, xe_bh,
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret < 0) {
		mlog_errno(ret);
		goto out_commit;
	}

	xe->xe_value_size = cpu_to_le64(new_size);

	ret = ocfs2_journal_dirty(handle, xe_bh);
	if (ret < 0)
		mlog_errno(ret);

out_commit:
	ocfs2_commit_trans(osb, handle);
out:
	return ret;
}

/*
 * Truncate the specified xe_off entry in xattr bucket.
 * bucket is indicated by header_bh and len is the new length.
 * Both the ocfs2_xattr_value_root and the entry will be updated here.
 *
 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
 */
static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
					     struct buffer_head *header_bh,
					     int xe_off,
					     int len)
{
	int ret, offset;
	u64 value_blk;
	struct buffer_head *value_bh = NULL;
	struct ocfs2_xattr_value_root *xv;
	struct ocfs2_xattr_entry *xe;
	struct ocfs2_xattr_header *xh =
			(struct ocfs2_xattr_header *)header_bh->b_data;
	size_t blocksize = inode->i_sb->s_blocksize;

	xe = &xh->xh_entries[xe_off];

	BUG_ON(!xe || ocfs2_xattr_is_local(xe));

	offset = le16_to_cpu(xe->xe_name_offset) +
		 OCFS2_XATTR_SIZE(xe->xe_name_len);

	value_blk = offset / blocksize;

	/* We don't allow ocfs2_xattr_value to be stored in different block. */
	BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
	value_blk += header_bh->b_blocknr;

	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), value_blk,
			       &value_bh, OCFS2_BH_CACHED, inode);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	xv = (struct ocfs2_xattr_value_root *)
		(value_bh->b_data + offset % blocksize);

	mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
	     xe_off, (unsigned long long)header_bh->b_blocknr, len);
	ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

out:
	brelse(value_bh);
	return ret;
}

static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
						struct ocfs2_xattr_search *xs,
						int len)
{
	int ret, offset;
	struct ocfs2_xattr_entry *xe = xs->here;
	struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;

	BUG_ON(!xs->bucket.bhs[0] || !xe || ocfs2_xattr_is_local(xe));

	offset = xe - xh->xh_entries;
	ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bhs[0],
						offset, len);
	if (ret)
		mlog_errno(ret);

	return ret;
}

static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
						struct ocfs2_xattr_search *xs,
						char *val,
						int value_len)
{
	int offset;
	struct ocfs2_xattr_value_root *xv;
	struct ocfs2_xattr_entry *xe = xs->here;

	BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));

	offset = le16_to_cpu(xe->xe_name_offset) +
		 OCFS2_XATTR_SIZE(xe->xe_name_len);

	xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);

	return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len);
}

/*
 * Remove the xattr bucket pointed by bucket_bh.
 * All the buckets after it in the same xattr extent rec will be
 * move forward one by one.
 */
static int ocfs2_rm_xattr_bucket(struct inode *inode,
				 struct buffer_head *first_bh,
				 struct ocfs2_xattr_bucket *bucket)
{
	int ret = 0, credits;
	struct ocfs2_xattr_header *xh =
				(struct ocfs2_xattr_header *)first_bh->b_data;
	u16 bucket_num = le16_to_cpu(xh->xh_num_buckets);
	u64 end, start = bucket->bhs[0]->b_blocknr;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	handle_t *handle;
	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);

	end = first_bh->b_blocknr + (bucket_num - 1) * blk_per_bucket;

	mlog(0, "rm xattr bucket %llu\n", start);
	/*
	 * We need to update the first xattr_header and all the buckets starting
	 * from start in this xattr rec.
	 *
	 * XXX: Should we empty the old last bucket here?
	 */
	credits = 1 + end - start;
	handle = ocfs2_start_trans(osb, credits);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		mlog_errno(ret);
		return ret;
	}

	ret = ocfs2_journal_access(handle, inode, first_bh,
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}


	while (start < end) {
		ret = ocfs2_cp_xattr_bucket(inode, handle,
					    start + blk_per_bucket,
					    start, 0);
		if (ret) {
			mlog_errno(ret);
			goto out_commit;
		}
		start += blk_per_bucket;
	}

	/* update the first_bh. */
	xh->xh_num_buckets = cpu_to_le16(bucket_num - 1);
	ocfs2_journal_dirty(handle, first_bh);

out_commit:
	ocfs2_commit_trans(osb, handle);
	return ret;
}

static int ocfs2_rm_xattr_cluster(struct inode *inode,
				  struct buffer_head *root_bh,
				  u64 blkno,
				  u32 cpos,
				  u32 len)
{
	int ret;
	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	struct inode *tl_inode = osb->osb_tl_inode;
	handle_t *handle;
	struct ocfs2_xattr_block *xb =
			(struct ocfs2_xattr_block *)root_bh->b_data;
	struct ocfs2_alloc_context *meta_ac = NULL;
	struct ocfs2_cached_dealloc_ctxt dealloc;
	struct ocfs2_extent_tree et;

	ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);

	ocfs2_init_dealloc_ctxt(&dealloc);

	mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
	     cpos, len, (unsigned long long)blkno);

	ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len);

	ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
	if (ret) {
		mlog_errno(ret);
		return ret;
	}

	mutex_lock(&tl_inode->i_mutex);

	if (ocfs2_truncate_log_needs_flush(osb)) {
		ret = __ocfs2_flush_truncate_log(osb);
		if (ret < 0) {
			mlog_errno(ret);
			goto out;
		}
	}

	handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
	if (handle == NULL) {
		ret = -ENOMEM;
		mlog_errno(ret);
		goto out;
	}

	ret = ocfs2_journal_access(handle, inode, root_bh,
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
				  &dealloc);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);

	ret = ocfs2_journal_dirty(handle, root_bh);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
	if (ret)
		mlog_errno(ret);

out_commit:
	ocfs2_commit_trans(osb, handle);
out:
	ocfs2_schedule_truncate_log_flush(osb, 1);

	mutex_unlock(&tl_inode->i_mutex);

	if (meta_ac)
		ocfs2_free_alloc_context(meta_ac);

	ocfs2_run_deallocs(osb, &dealloc);

	return ret;
}

/*
 * Free the xattr bucket indicated by xs->bucket and if all the buckets
 * in the clusters is free, free the clusters also.
 */
static int ocfs2_xattr_bucket_shrink(struct inode *inode,
				     struct ocfs2_xattr_info *xi,
				     struct ocfs2_xattr_search *xs,
				     u32 name_hash)
{
	int ret;
	u32 e_cpos, num_clusters;
	u64 p_blkno;
	struct buffer_head *first_bh = NULL;
	struct ocfs2_xattr_header *first_xh;
	struct ocfs2_xattr_block *xb =
			(struct ocfs2_xattr_block *)xs->xattr_bh->b_data;

	BUG_ON(xs->header->xh_count != 0);

	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
				  &e_cpos, &num_clusters,
				  &xb->xb_attrs.xb_root.xt_list);
	if (ret) {
		mlog_errno(ret);
		return ret;
	}

	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno,
			       &first_bh, OCFS2_BH_CACHED, inode);
	if (ret) {
		mlog_errno(ret);
		return ret;
	}

	ret = ocfs2_rm_xattr_bucket(inode, first_bh, &xs->bucket);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
	if (first_xh->xh_num_buckets == 0)
		ret = ocfs2_rm_xattr_cluster(inode, xs->xattr_bh,
					     p_blkno, e_cpos,
					     num_clusters);

out:
	brelse(first_bh);
	return ret;
}

static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
					 struct ocfs2_xattr_search *xs)
{
	handle_t *handle = NULL;
	struct ocfs2_xattr_header *xh = xs->bucket.xh;
	struct ocfs2_xattr_entry *last = &xh->xh_entries[
						le16_to_cpu(xh->xh_count) - 1];
	int ret = 0;

	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		mlog_errno(ret);
		return;
	}

	ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[0],
				   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
		mlog_errno(ret);
		goto out_commit;
	}

	/* Remove the old entry. */
	memmove(xs->here, xs->here + 1,
		(void *)last - (void *)xs->here);
	memset(last, 0, sizeof(struct ocfs2_xattr_entry));
	le16_add_cpu(&xh->xh_count, -1);

	ret = ocfs2_journal_dirty(handle, xs->bucket.bhs[0]);
	if (ret < 0)
		mlog_errno(ret);
out_commit:
	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
}

/*
 * Set the xattr name/value in the bucket specified in xs.
 *
 * As the new value in xi may be stored in the bucket or in an outside cluster,
 * we divide the whole process into 3 steps:
 * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
 * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
 * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
 * 4. If the clusters for the new outside value can't be allocated, we need
 *    to free the xattr we allocated in set.
 */
static int ocfs2_xattr_set_in_bucket(struct inode *inode,
				     struct ocfs2_xattr_info *xi,
				     struct ocfs2_xattr_search *xs)
{
	int ret, local = 1, bucket_empty = 0;
	size_t value_len;
	char *val = (char *)xi->value;
	struct ocfs2_xattr_entry *xe = xs->here;
	u32 name_hash = ocfs2_xattr_hash_by_name(inode,
						 xi->name_index, xi->name);

	if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
		/*
		 * We need to truncate the xattr storage first.
		 *
		 * If both the old and new value are stored to
		 * outside block, we only need to truncate
		 * the storage and then set the value outside.
		 *
		 * If the new value should be stored within block,
		 * we should free all the outside block first and
		 * the modification to the xattr block will be done
		 * by following steps.
		 */
		if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
			value_len = xi->value_len;
		else
			value_len = 0;

		ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
							   value_len);
		if (ret)
			goto out;

		if (value_len)
			goto set_value_outside;
	}

	value_len = xi->value_len;
	/* So we have to handle the inside block change now. */
	if (value_len > OCFS2_XATTR_INLINE_SIZE) {
		/*
		 * If the new value will be stored outside of block,
		 * initalize a new empty value root and insert it first.
		 */
		local = 0;
		xi->value = &def_xv;
		xi->value_len = OCFS2_XATTR_ROOT_SIZE;
	}

	ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash,
					      local, &bucket_empty);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	if (value_len > OCFS2_XATTR_INLINE_SIZE) {
		/* allocate the space now for the outside block storage. */
		ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
							   value_len);
		if (ret) {
			mlog_errno(ret);

			if (xs->not_found) {
				/*
				 * We can't allocate enough clusters for outside
				 * storage and we have allocated xattr already,
				 * so need to remove it.
				 */
				ocfs2_xattr_bucket_remove_xs(inode, xs);
			}
			goto out;
		}
	} else {
		if (bucket_empty)
			ret = ocfs2_xattr_bucket_shrink(inode, xi,
							xs, name_hash);
		goto out;
	}

set_value_outside:
	ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len);
out:
	return ret;
}

/* check whether the xattr bucket is filled up with the same hash value. */
static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
					      struct ocfs2_xattr_bucket *bucket)
{
	struct ocfs2_xattr_header *xh = bucket->xh;

	if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
	    xh->xh_entries[0].xe_name_hash) {
		mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
		     "hash = %u\n",
		     (unsigned long long)bucket->bhs[0]->b_blocknr,
		     le32_to_cpu(xh->xh_entries[0].xe_name_hash));
		return -ENOSPC;
	}

	return 0;
}

static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
					     struct ocfs2_xattr_info *xi,
					     struct ocfs2_xattr_search *xs)
{
	struct ocfs2_xattr_header *xh;
	struct ocfs2_xattr_entry *xe;
	u16 count, header_size, xh_free_start;
	int i, free, max_free, need, old;
	size_t value_size = 0, name_len = strlen(xi->name);
	size_t blocksize = inode->i_sb->s_blocksize;
	int ret, allocation = 0;
	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);

	mlog_entry("Set xattr %s in xattr index block\n", xi->name);

try_again:
	xh = xs->header;
	count = le16_to_cpu(xh->xh_count);
	xh_free_start = le16_to_cpu(xh->xh_free_start);
	header_size = sizeof(struct ocfs2_xattr_header) +
			count * sizeof(struct ocfs2_xattr_entry);
	max_free = OCFS2_XATTR_BUCKET_SIZE -
		le16_to_cpu(xh->xh_name_value_len) - header_size;

	mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
			"of %u which exceed block size\n",
			(unsigned long long)xs->bucket.bhs[0]->b_blocknr,
			header_size);

	if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
		value_size = OCFS2_XATTR_ROOT_SIZE;
	else if (xi->value)
		value_size = OCFS2_XATTR_SIZE(xi->value_len);

	if (xs->not_found)
		need = sizeof(struct ocfs2_xattr_entry) +
			OCFS2_XATTR_SIZE(name_len) + value_size;
	else {
		need = value_size + OCFS2_XATTR_SIZE(name_len);

		/*
		 * We only replace the old value if the new length is smaller
		 * than the old one. Otherwise we will allocate new space in the
		 * bucket to store it.
		 */
		xe = xs->here;
		if (ocfs2_xattr_is_local(xe))
			old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
		else
			old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);

		if (old >= value_size)
			need = 0;
	}

	free = xh_free_start - header_size;
	/*
	 * We need to make sure the new name/value pair
	 * can exist in the same block.
	 */
	if (xh_free_start % blocksize < need)
		free -= xh_free_start % blocksize;

	mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
	     "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
	     " %u\n", xs->not_found,
	     (unsigned long long)xs->bucket.bhs[0]->b_blocknr,
	     free, need, max_free, le16_to_cpu(xh->xh_free_start),
	     le16_to_cpu(xh->xh_name_value_len));

	if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
		if (need <= max_free &&
		    count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
			/*
			 * We can create the space by defragment. Since only the
			 * name/value will be moved, the xe shouldn't be changed
			 * in xs.
			 */
			ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket);
			if (ret) {
				mlog_errno(ret);
				goto out;
			}

			xh_free_start = le16_to_cpu(xh->xh_free_start);
			free = xh_free_start - header_size;
			if (xh_free_start % blocksize < need)
				free -= xh_free_start % blocksize;

			if (free >= need)
				goto xattr_set;

			mlog(0, "Can't get enough space for xattr insert by "
			     "defragment. Need %u bytes, but we have %d, so "
			     "allocate new bucket for it.\n", need, free);
		}

		/*
		 * We have to add new buckets or clusters and one
		 * allocation should leave us enough space for insert.
		 */
		BUG_ON(allocation);

		/*
		 * We do not allow for overlapping ranges between buckets. And
		 * the maximum number of collisions we will allow for then is
		 * one bucket's worth, so check it here whether we need to
		 * add a new bucket for the insert.
		 */
		ret = ocfs2_check_xattr_bucket_collision(inode, &xs->bucket);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}

		ret = ocfs2_add_new_xattr_bucket(inode,
						 xs->xattr_bh,
						 xs->bucket.bhs[0]);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}

		for (i = 0; i < blk_per_bucket; i++)
			brelse(xs->bucket.bhs[i]);

		memset(&xs->bucket, 0, sizeof(xs->bucket));

		ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
						   xi->name_index,
						   xi->name, xs);
		if (ret && ret != -ENODATA)
			goto out;
		xs->not_found = ret;
		allocation = 1;
		goto try_again;
	}

xattr_set:
	ret = ocfs2_xattr_set_in_bucket(inode, xi, xs);
out:
	mlog_exit(ret);
	return ret;
}

static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
					struct ocfs2_xattr_bucket *bucket,
					void *para)
{
	int ret = 0;
	struct ocfs2_xattr_header *xh = bucket->xh;
	u16 i;
	struct ocfs2_xattr_entry *xe;

	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
		xe = &xh->xh_entries[i];
		if (ocfs2_xattr_is_local(xe))
			continue;

		ret = ocfs2_xattr_bucket_value_truncate(inode,
							bucket->bhs[0],
							i, 0);
		if (ret) {
			mlog_errno(ret);
			break;
		}
	}

	return ret;
}

static int ocfs2_delete_xattr_index_block(struct inode *inode,
					  struct buffer_head *xb_bh)
{
	struct ocfs2_xattr_block *xb =
			(struct ocfs2_xattr_block *)xb_bh->b_data;
	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
	int ret = 0;
	u32 name_hash = UINT_MAX, e_cpos, num_clusters;
	u64 p_blkno;

	if (le16_to_cpu(el->l_next_free_rec) == 0)
		return 0;

	while (name_hash > 0) {
		ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
					  &e_cpos, &num_clusters, el);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}

		ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
						  ocfs2_delete_xattr_in_bucket,
						  NULL);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}

		ret = ocfs2_rm_xattr_cluster(inode, xb_bh,
					     p_blkno, e_cpos, num_clusters);
		if (ret) {
			mlog_errno(ret);
			break;
		}

		if (e_cpos == 0)
			break;

		name_hash = e_cpos - 1;
	}

out:
	return ret;
}