@@ -422,7 +422,7 @@ impl AllRuns {
422
422
runner_test_command_differs,
423
423
} )
424
424
}
425
- Some ( ( old_entity, old_finished_state ) ) => {
425
+ Some ( ( old_entity, old_finished_time ) ) => {
426
426
if old_entity. id == entity. id {
427
427
// The same worker entity is connecting twice - this implies that the
428
428
// same worker process is asking to find a run more than once, which
@@ -453,13 +453,13 @@ impl AllRuns {
453
453
// itself, and if this is done we could hand out the manifest right here,
454
454
// rather than asking the runner to reconnect to retrieve the manifest.
455
455
tracing:: info!(
456
- ?old_finished_state ,
456
+ ?old_finished_time ,
457
457
?entity,
458
458
"worker reconnecting for out-of-process retry manifest during active run"
459
459
) ;
460
460
461
461
AssignedRunStatus :: Run ( AssignedRun {
462
- kind : AssignedRunKind :: Retry ,
462
+ kind : AssignedRunKind :: RetryAndContinue ,
463
463
runner_test_command_differs,
464
464
} )
465
465
}
@@ -488,7 +488,7 @@ impl AllRuns {
488
488
}
489
489
490
490
AssignedRunStatus :: Run ( AssignedRun {
491
- kind : AssignedRunKind :: Retry ,
491
+ kind : AssignedRunKind :: RetryAndContinue ,
492
492
runner_test_command_differs,
493
493
} )
494
494
} else {
@@ -1305,21 +1305,11 @@ impl AllRuns {
1305
1305
// legal cancellation states
1306
1306
}
1307
1307
RunState :: InitialManifestDone { seen_workers, .. } => {
1308
- // Since we already have issued the full manifest out, don't mark this run as
1309
- // cancelled; this might be a stragling worker or a worker that cancelled an
1310
- // out-of-process retry.
1311
1308
tracing:: info!(
1312
1309
?run_id,
1313
1310
"refusing to cancel run whose manifest has already been exhausted"
1314
1311
) ;
1315
- // Mark the worker as now-inactive.
1316
- let old_tag = seen_workers. write ( ) . insert_by_tag ( entity, false ) ;
1317
- log_assert ! (
1318
- old_tag. is_some( ) ,
1319
- ?entity,
1320
- ?run_id,
1321
- "entity was not seen before it marked cancellation"
1322
- ) ;
1312
+ seen_workers. write ( ) . insert_by_tag ( entity, false ) ;
1323
1313
return ;
1324
1314
}
1325
1315
}
@@ -3995,7 +3985,7 @@ mod test {
3995
3985
assert_eq ! (
3996
3986
assigned,
3997
3987
AssignedRunStatus :: Run ( AssignedRun {
3998
- kind: AssignedRunKind :: Retry ,
3988
+ kind: AssignedRunKind :: RetryAndContinue ,
3999
3989
runner_test_command_differs: false
4000
3990
} )
4001
3991
) ;
@@ -4331,7 +4321,7 @@ mod persistence_on_end_of_manifest {
4331
4321
4332
4322
#[ tokio:: test]
4333
4323
#[ with_protocol_version]
4334
- async fn worker_told_to_pull_retry_manifest ( ) {
4324
+ async fn worker_told_to_pull_retry_manifest_and_continue ( ) {
4335
4325
let queues = SharedRuns :: default ( ) ;
4336
4326
let remote = remote:: NoopPersister :: new ( ) . into ( ) ;
4337
4327
@@ -4388,10 +4378,75 @@ mod persistence_on_end_of_manifest {
4388
4378
. build ( ) ,
4389
4379
)
4390
4380
. await ;
4381
+
4382
+ assert_eq ! (
4383
+ assigned,
4384
+ AssignedRunStatus :: Run ( AssignedRun {
4385
+ kind: AssignedRunKind :: RetryAndContinue ,
4386
+ runner_test_command_differs: false
4387
+ } )
4388
+ ) ;
4389
+ }
4390
+
4391
+ #[ tokio:: test]
4392
+ #[ with_protocol_version]
4393
+ async fn worker_told_to_pull_retry_manifest_no_continue ( ) {
4394
+ let queues = SharedRuns :: default ( ) ;
4395
+ let remote = remote:: NoopPersister :: new ( ) . into ( ) ;
4396
+
4397
+ let run_id = RunId :: unique ( ) ;
4398
+
4399
+ let worker0 = Entity :: runner ( 1 , 1 ) ;
4400
+ let worker0_shadow = Entity :: runner ( 1 , 1 ) ;
4401
+ assert_ne ! ( worker0. id, worker0_shadow. id) ;
4402
+ assert_eq ! ( worker0. tag, worker0_shadow. tag) ;
4403
+
4404
+ let test1 = fake_test_spec ( proto) ;
4405
+ let test2 = fake_test_spec ( proto) ;
4406
+ let test3 = fake_test_spec ( proto) ;
4407
+
4408
+ let test_command_hash = TestCommandHash :: random ( ) ;
4409
+
4410
+ // Create run, add manifest by worker0
4411
+ {
4412
+ let run_params = RunParamsBuilder :: new ( & run_id, & remote)
4413
+ . entity ( worker0)
4414
+ . runner_test_command_hash ( test_command_hash)
4415
+ . build ( ) ;
4416
+ let manifest = vec ! [
4417
+ ( test1. clone( ) , GroupId :: new( ) ) ,
4418
+ ( test2, GroupId :: new( ) ) ,
4419
+ ( test3, GroupId :: new( ) ) ,
4420
+ ] ;
4421
+ let _ = queues. find_or_create_run ( run_params) . await ;
4422
+ let _ = queues. add_manifest ( & run_id, manifest, Default :: default ( ) ) ;
4423
+ }
4424
+
4425
+ // worker0 pulls tests
4426
+ {
4427
+ let NextWorkResult { bundle, .. } = queues. next_work ( worker0, & run_id) ;
4428
+ assert_eq ! (
4429
+ bundle. work,
4430
+ vec![ WorkerTest :: new( test1. clone( ) , INIT_RUN_NUMBER ) ]
4431
+ ) ;
4432
+ }
4433
+
4434
+ queues. mark_worker_complete ( & run_id, worker0, std:: time:: Instant :: now ( ) ) ;
4435
+
4436
+ // Suppose worker0 re-runs.
4437
+ let assigned = queues
4438
+ . find_or_create_run (
4439
+ RunParamsBuilder :: new ( & run_id, & remote)
4440
+ . entity ( worker0_shadow)
4441
+ . runner_test_command_hash ( test_command_hash)
4442
+ . build ( ) ,
4443
+ )
4444
+ . await ;
4445
+
4391
4446
assert_eq ! (
4392
4447
assigned,
4393
4448
AssignedRunStatus :: Run ( AssignedRun {
4394
- kind: AssignedRunKind :: Retry ,
4449
+ kind: AssignedRunKind :: RetryAndContinue ,
4395
4450
runner_test_command_differs: false
4396
4451
} )
4397
4452
) ;
0 commit comments