Skip to content

Commit 78579dd

Browse files
committed
DAOS-17535 chk: misc improvements for CR logic
Include the followings: 1. When create CHK IV namespace, make the secondary group to be same as the primary group. Otherwise, CHK logic may hit DER_NONEXIST trouble when communicate via IV. 2. Integrate CHK IV namespace create and destroy API, cleanup related logic, redefine the version. 3. Get ranks list and IV namespace version from CHK leader when rejoin. Adjust CHK_REJOIN RPC for related changes. 4. Remove unsupported functionality for checking the specified 'phase'. 5. Add new test for case of lost some engine(s) before start checker. Test-tag: recovery Signed-off-by: Fan Yong <fan.yong@hpe.com>
1 parent d9ae7df commit 78579dd

File tree

13 files changed

+317
-236
lines changed

13 files changed

+317
-236
lines changed

src/chk/chk_common.c

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,8 +1073,7 @@ chk_policy_refresh(uint32_t policy_nr, struct chk_policy *policies, struct chk_p
10731073
}
10741074

10751075
int
1076-
chk_prop_prepare(d_rank_t leader, uint32_t flags, int phase,
1077-
uint32_t policy_nr, struct chk_policy *policies,
1076+
chk_prop_prepare(d_rank_t leader, uint32_t flags, uint32_t policy_nr, struct chk_policy *policies,
10781077
d_rank_list_t *ranks, struct chk_property *prop)
10791078
{
10801079
int rc = 0;
@@ -1086,11 +1085,8 @@ chk_prop_prepare(d_rank_t leader, uint32_t flags, int phase,
10861085
prop->cp_flags &= ~CHK__CHECK_FLAG__CF_FAILOUT;
10871086
if (flags & CHK__CHECK_FLAG__CF_NO_AUTO)
10881087
prop->cp_flags &= ~CHK__CHECK_FLAG__CF_AUTO;
1089-
prop->cp_flags |= flags & ~(CHK__CHECK_FLAG__CF_RESET |
1090-
CHK__CHECK_FLAG__CF_ORPHAN_POOL |
1091-
CHK__CHECK_FLAG__CF_NO_FAILOUT |
1092-
CHK__CHECK_FLAG__CF_NO_AUTO);
1093-
prop->cp_phase = phase;
1088+
prop->cp_flags |= flags & ~(CHK__CHECK_FLAG__CF_RESET | CHK__CHECK_FLAG__CF_ORPHAN_POOL |
1089+
CHK__CHECK_FLAG__CF_NO_FAILOUT | CHK__CHECK_FLAG__CF_NO_AUTO);
10941090
if (ranks != NULL)
10951091
prop->cp_rank_nr = ranks->rl_nr;
10961092

@@ -1301,10 +1297,7 @@ chk_ins_fini(struct chk_instance **p_ins)
13011297
return;
13021298

13031299
ins->ci_inited = 0;
1304-
chk_iv_ns_cleanup(&ins->ci_iv_ns);
1305-
1306-
if (ins->ci_iv_group != NULL)
1307-
crt_group_secondary_destroy(ins->ci_iv_group);
1300+
chk_iv_ns_destroy(ins);
13081301

13091302
d_rank_list_free(ins->ci_ranks);
13101303
D_ASSERT(d_list_empty(&ins->ci_dead_ranks));

src/chk/chk_engine.c

Lines changed: 81 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -2045,9 +2045,8 @@ chk_engine_sched(void *args)
20452045

20462046
static int
20472047
chk_engine_start_prep(struct chk_instance *ins, uint32_t rank_nr, d_rank_t *ranks,
2048-
uint32_t policy_nr, struct chk_policy *policies, int pool_nr,
2049-
uuid_t pools[], uint64_t gen, int phase, uint32_t api_flags,
2050-
d_rank_t leader, uint32_t flags)
2048+
uint32_t policy_nr, struct chk_policy *policies, int pool_nr, uuid_t pools[],
2049+
uint64_t gen, uint32_t api_flags, d_rank_t leader, uint32_t flags)
20512050
{
20522051
struct chk_traverse_pools_args ctpa = { 0 };
20532052
struct chk_bookmark *cbk = &ins->ci_bk;
@@ -2134,8 +2133,7 @@ chk_engine_start_prep(struct chk_instance *ins, uint32_t rank_nr, d_rank_t *rank
21342133

21352134
init:
21362135
if (!chk_is_on_leader(gen, leader, true)) {
2137-
rc = chk_prop_prepare(leader, api_flags, phase, policy_nr, policies, rank_list,
2138-
prop);
2136+
rc = chk_prop_prepare(leader, api_flags, policy_nr, policies, rank_list, prop);
21392137
if (rc != 0)
21402138
goto out;
21412139

@@ -2263,16 +2261,15 @@ chk_engine_pool_filter(uuid_t uuid, void *arg, int *phase)
22632261
int
22642262
chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr,
22652263
struct chk_policy *policies, int pool_nr, uuid_t pools[], uint32_t api_flags,
2266-
int phase, d_rank_t leader, uint32_t flags, uuid_t iv_uuid,
2264+
uint32_t ns_ver, d_rank_t leader, uint32_t flags, uuid_t iv_uuid,
22672265
struct ds_pool_clues *clues)
22682266
{
2269-
struct chk_instance *ins = chk_engine;
2270-
struct chk_bookmark *cbk = &ins->ci_bk;
2271-
struct umem_attr uma = { 0 };
2272-
char uuid_str[DAOS_UUID_STR_SIZE];
2273-
d_rank_t myrank = dss_self_rank();
2274-
int rc;
2275-
int rc1;
2267+
struct chk_instance *ins = chk_engine;
2268+
struct chk_bookmark *cbk = &ins->ci_bk;
2269+
struct umem_attr uma = {0};
2270+
d_rank_t myrank = dss_self_rank();
2271+
int rc;
2272+
int rc1;
22762273

22772274
rc = chk_ins_can_start(ins);
22782275
if (rc != 0)
@@ -2294,12 +2291,7 @@ chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t polic
22942291
if (ins->ci_sched != ABT_THREAD_NULL)
22952292
ABT_thread_free(&ins->ci_sched);
22962293

2297-
chk_iv_ns_cleanup(&ins->ci_iv_ns);
2298-
2299-
if (ins->ci_iv_group != NULL) {
2300-
crt_group_secondary_destroy(ins->ci_iv_group);
2301-
ins->ci_iv_group = NULL;
2302-
}
2294+
chk_iv_ns_destroy(ins);
23032295

23042296
uma.uma_id = UMEM_CLASS_VMEM;
23052297

@@ -2313,27 +2305,20 @@ chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t polic
23132305
if (rc != 0)
23142306
goto out_tree;
23152307

2316-
rc = chk_engine_start_prep(ins, rank_nr, ranks, policy_nr, policies,
2317-
pool_nr, pools, gen, phase, api_flags, leader, flags);
2308+
rc = chk_engine_start_prep(ins, rank_nr, ranks, policy_nr, policies, pool_nr, pools, gen,
2309+
api_flags, leader, flags);
23182310
if (rc != 0)
23192311
goto out_tree;
23202312

23212313
if (chk_is_on_leader(gen, leader, true)) {
23222314
ins->ci_iv_ns = chk_leader_get_iv_ns();
2323-
if (unlikely(ins->ci_iv_ns == NULL))
2324-
goto out_tree;
2315+
D_ASSERT(ins->ci_iv_ns != NULL);
2316+
2317+
ins->ci_ns_ver = ns_ver;
23252318
} else {
2326-
uuid_unparse_lower(iv_uuid, uuid_str);
2327-
rc = crt_group_secondary_create(uuid_str, NULL, ins->ci_ranks, &ins->ci_iv_group);
2319+
rc = chk_iv_ns_create(ins, iv_uuid, leader, ns_ver);
23282320
if (rc != 0)
23292321
goto out_tree;
2330-
2331-
rc = ds_iv_ns_create(dss_get_module_info()->dmi_ctx, iv_uuid, ins->ci_iv_group,
2332-
&ins->ci_iv_id, &ins->ci_iv_ns);
2333-
if (rc != 0)
2334-
goto out_group;
2335-
2336-
ds_iv_ns_update(ins->ci_iv_ns, leader, ins->ci_iv_ns->iv_master_term + 1);
23372322
}
23382323

23392324
uuid_copy(cbk->cb_iv_uuid, iv_uuid);
@@ -2366,30 +2351,26 @@ chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t polic
23662351
D_WARN(DF_ENGINE" failed to update engine bookmark: "DF_RC"\n",
23672352
DP_ENGINE(ins), DP_RC(rc1));
23682353
}
2369-
chk_iv_ns_cleanup(&ins->ci_iv_ns);
2370-
out_group:
2371-
if (ins->ci_iv_group != NULL) {
2372-
crt_group_secondary_destroy(ins->ci_iv_group);
2373-
ins->ci_iv_group = NULL;
2374-
}
2354+
chk_iv_ns_destroy(ins);
23752355
out_tree:
23762356
chk_destroy_pending_tree(ins);
23772357
chk_destroy_pool_tree(ins);
23782358
out_done:
23792359
ins->ci_starting = 0;
23802360
out_log:
23812361
if (rc >= 0) {
2382-
D_INFO(DF_ENGINE " %s on rank %u with api_flags %x, phase %d, leader %u, "
2383-
"flags %x, iv "DF_UUIDF": rc %d\n",
2362+
D_INFO(DF_ENGINE " %s on rank %u with api_flags %x, ns_ver %d, leader %u, "
2363+
"flags %x, iv " DF_UUIDF ": rc %d\n",
23842364
DP_ENGINE(ins), chk_is_ins_reset(ins, api_flags) ? "start" : "resume",
2385-
myrank, api_flags, phase, leader, flags, DP_UUID(iv_uuid), rc);
2365+
myrank, api_flags, ns_ver, leader, flags, DP_UUID(iv_uuid), rc);
23862366

23872367
chk_ranks_dump(ins->ci_ranks->rl_nr, ins->ci_ranks->rl_ranks);
23882368
chk_pools_dump(&ins->ci_pool_list, pool_nr, pools);
23892369
} else {
2390-
D_ERROR(DF_ENGINE" failed to start on rank %u with %d pools, api_flags %x, "
2391-
"phase %d, leader %u, flags %x, gen "DF_X64", iv "DF_UUIDF": "DF_RC"\n",
2392-
DP_ENGINE(ins), myrank, pool_nr, api_flags, phase, leader, flags, gen,
2370+
D_ERROR(DF_ENGINE " failed to start on rank %u with %d pools, api_flags %x, "
2371+
"ns_ver %d, leader %u, flags %x, gen " DF_X64 ", iv " DF_UUIDF
2372+
": " DF_RC "\n",
2373+
DP_ENGINE(ins), myrank, pool_nr, api_flags, ns_ver, leader, flags, gen,
23932374
DP_UUID(iv_uuid), DP_RC(rc));
23942375
}
23952376

@@ -2413,7 +2394,7 @@ chk_engine_stop(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *flags)
24132394
if (cbk->cb_magic != CHK_BK_MAGIC_ENGINE)
24142395
D_GOTO(log, rc = -DER_NOTAPPLICABLE);
24152396

2416-
if (ins->ci_starting)
2397+
if (ins->ci_starting || ins->ci_rejoining)
24172398
D_GOTO(log, rc = -DER_BUSY);
24182399

24192400
if (ins->ci_stopping || ins->ci_sched_exiting)
@@ -2642,32 +2623,42 @@ chk_engine_query(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *ins_status
26422623
int
26432624
chk_engine_mark_rank_dead(uint64_t gen, d_rank_t rank, uint32_t version)
26442625
{
2645-
struct chk_instance *ins = chk_engine;
2646-
struct chk_property *prop = &ins->ci_prop;
2647-
struct chk_bookmark *cbk = &ins->ci_bk;
2648-
d_rank_list_t *rank_list = NULL;
2649-
int rc = 0;
2626+
struct chk_instance *ins = chk_engine;
2627+
struct chk_property *prop = &ins->ci_prop;
2628+
struct chk_bookmark *cbk = &ins->ci_bk;
2629+
int rc = 0;
26502630

26512631
if (cbk->cb_gen != gen)
26522632
D_GOTO(out, rc = -DER_NOTAPPLICABLE);
26532633

2654-
rc = chk_prop_fetch(prop, &rank_list);
2655-
if (rc != 0)
2656-
goto out;
2634+
/* For check engine on the leader, reload rank list that has been refreshed by leader. */
2635+
if (chk_is_on_leader(cbk->cb_gen, prop->cp_leader, true)) {
2636+
d_rank_list_free(ins->ci_ranks);
2637+
ins->ci_ranks = NULL;
2638+
}
26572639

2658-
D_ASSERT(rank_list != NULL);
2640+
if (ins->ci_ranks == NULL) {
2641+
rc = chk_prop_fetch(prop, &ins->ci_ranks);
2642+
if (rc != 0)
2643+
goto out;
26592644

2660-
/* For check engine on the leader, related rank has already been marked as "dead". */
2661-
if (chk_is_on_leader(cbk->cb_gen, prop->cp_leader, true))
2662-
goto group;
2645+
/* For check engine on the leader, it's done. */
2646+
if (chk_is_on_leader(cbk->cb_gen, prop->cp_leader, true)) {
2647+
ins->ci_ns_ver = version;
2648+
goto out;
2649+
}
2650+
}
2651+
2652+
if (unlikely(ins->ci_ranks == NULL))
2653+
D_GOTO(out, rc = -DER_NOTAPPLICABLE);
26632654

2664-
if (!chk_remove_rank_from_list(rank_list, rank))
2655+
if (!chk_remove_rank_from_list(ins->ci_ranks, rank))
26652656
D_GOTO(out, rc = -DER_NOTAPPLICABLE);
26662657

26672658
prop->cp_rank_nr--;
2668-
rc = chk_prop_update(prop, rank_list);
2669-
if (rc != 0)
2670-
goto out;
2659+
rc = chk_prop_update(prop, ins->ci_ranks);
2660+
if (rc == 0)
2661+
rc = chk_iv_ns_update(ins, version);
26712662

26722663
/*
26732664
* NOTE: If the rank dead before DAOS check start, then subsequent check start will
@@ -2688,19 +2679,7 @@ chk_engine_mark_rank_dead(uint64_t gen, d_rank_t rank, uint32_t version)
26882679
* sometime later as the DAOS check going.
26892680
*/
26902681

2691-
group:
2692-
if (ins->ci_iv_group != NULL)
2693-
rc = crt_group_secondary_modify(ins->ci_iv_group, rank_list, rank_list,
2694-
CRT_GROUP_MOD_OP_REPLACE, version);
2695-
26962682
out:
2697-
if (rc == 0) {
2698-
d_rank_list_free(ins->ci_ranks);
2699-
ins->ci_ranks = rank_list;
2700-
rank_list = NULL;
2701-
}
2702-
2703-
d_rank_list_free(rank_list);
27042683
if (rc != -DER_NOTAPPLICABLE)
27052684
D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO,
27062685
DF_ENGINE" on rank %u mark rank %u as dead with gen "
@@ -3364,19 +3343,19 @@ chk_engine_notify(struct chk_iv *iv)
33643343
void
33653344
chk_engine_rejoin(void *args)
33663345
{
3367-
struct chk_instance *ins = chk_engine;
3368-
struct chk_property *prop = &ins->ci_prop;
3369-
struct chk_bookmark *cbk = &ins->ci_bk;
3370-
uuid_t *pools = NULL;
3371-
struct chk_iv iv = { 0 };
3372-
struct umem_attr uma = { 0 };
3373-
char uuid_str[DAOS_UUID_STR_SIZE];
3374-
d_rank_t myrank = dss_self_rank();
3375-
uint32_t pool_nr = 0;
3376-
uint32_t flags = 0;
3377-
int rc = 0;
3378-
int rc1;
3379-
bool need_join = false;
3346+
struct chk_instance *ins = chk_engine;
3347+
struct chk_property *prop = &ins->ci_prop;
3348+
struct chk_bookmark *cbk = &ins->ci_bk;
3349+
d_rank_list_t *ranks = NULL;
3350+
uuid_t *pools = NULL;
3351+
struct chk_iv iv = {0};
3352+
struct umem_attr uma = {0};
3353+
d_rank_t myrank = dss_self_rank();
3354+
uint32_t pool_nr = 0;
3355+
uint32_t flags = 0;
3356+
int rc = 0;
3357+
int rc1;
3358+
bool need_join = false;
33803359

33813360
if (cbk->cb_magic != CHK_BK_MAGIC_ENGINE)
33823361
goto out_log;
@@ -3385,7 +3364,7 @@ chk_engine_rejoin(void *args)
33853364
cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_PAUSED)
33863365
goto out_log;
33873366

3388-
/* We do NOT support leader (and its associated engine ) to rejoin former check instance. */
3367+
/* We do NOT support leader (and its associated engine) to rejoin former check instance. */
33893368
if (chk_is_on_leader(cbk->cb_gen, prop->cp_leader, true))
33903369
goto out_log;
33913370

@@ -3420,22 +3399,10 @@ chk_engine_rejoin(void *args)
34203399
if (rc != 0)
34213400
goto out_tree;
34223401

3423-
uuid_unparse_lower(cbk->cb_iv_uuid, uuid_str);
3424-
rc = crt_group_secondary_create(uuid_str, NULL, ins->ci_ranks, &ins->ci_iv_group);
3425-
if (rc != 0)
3426-
goto out_tree;
3427-
3428-
rc = ds_iv_ns_create(dss_get_module_info()->dmi_ctx, cbk->cb_iv_uuid, ins->ci_iv_group,
3429-
&ins->ci_iv_id, &ins->ci_iv_ns);
3430-
if (rc != 0)
3431-
goto out_group;
3432-
3433-
ds_iv_ns_update(ins->ci_iv_ns, prop->cp_leader, ins->ci_iv_ns->iv_master_term + 1);
3434-
34353402
again:
34363403
/* Ask leader whether this engine can rejoin or not. */
34373404
rc = chk_rejoin_remote(prop->cp_leader, cbk->cb_gen, myrank, cbk->cb_iv_uuid, &flags,
3438-
&pool_nr, &pools);
3405+
&ins->ci_ns_ver, &pool_nr, &pools, &ranks);
34393406
if (rc != 0) {
34403407
if ((rc == -DER_OOG || rc == -DER_GRPVER) && !ins->ci_pause) {
34413408
D_INFO(DF_ENGINE" Someone is not ready %d, let's rejoin after 1 sec\n",
@@ -3445,14 +3412,22 @@ chk_engine_rejoin(void *args)
34453412
goto again;
34463413
}
34473414

3448-
goto out_iv;
3415+
goto out_tree;
34493416
}
34503417

3451-
if (pool_nr == 0) {
3418+
if (ranks == NULL || pool_nr == 0) {
34523419
need_join = false;
3453-
D_GOTO(out_iv, rc = 1);
3420+
D_GOTO(out_tree, rc = 1);
34543421
}
34553422

3423+
d_rank_list_free(ins->ci_ranks);
3424+
ins->ci_ranks = ranks;
3425+
ranks = NULL;
3426+
3427+
rc = chk_iv_ns_create(ins, cbk->cb_iv_uuid, prop->cp_leader, ins->ci_ns_ver);
3428+
if (rc != 0)
3429+
goto out_tree;
3430+
34563431
rc = chk_pools_load_list(ins, cbk->cb_gen, 0, pool_nr, pools, NULL);
34573432
if (rc != 0)
34583433
goto out_notify;
@@ -3496,17 +3471,13 @@ chk_engine_rejoin(void *args)
34963471
D_CDEBUG(rc1 != 0, DLOG_ERR, DLOG_INFO,
34973472
DF_ENGINE" on rank %u notify leader for its exit, status %u: rc1 = %d\n",
34983473
DP_ENGINE(ins), myrank, cbk->cb_ins_status, rc1);
3499-
out_iv:
3500-
chk_iv_ns_cleanup(&ins->ci_iv_ns);
3501-
out_group:
3502-
if (ins->ci_iv_group != NULL) {
3503-
crt_group_secondary_destroy(ins->ci_iv_group);
3504-
ins->ci_iv_group = NULL;
3505-
}
3474+
chk_iv_ns_destroy(ins);
35063475
out_tree:
35073476
chk_destroy_pending_tree(ins);
35083477
chk_destroy_pool_tree(ins);
35093478
out_log:
3479+
d_rank_list_free(ranks);
3480+
D_FREE(pools);
35103481
if (need_join)
35113482
D_CDEBUG(rc < 0, DLOG_ERR, DLOG_INFO,
35123483
DF_ENGINE" rejoin on rank %u with iv "DF_UUIDF": "DF_RC"\n",

0 commit comments

Comments
 (0)