From b58165762a9f37b5a59ca9725b8a45b03ceafb39 Mon Sep 17 00:00:00 2001 From: Brennan Kinney <5098581+polarathene@users.noreply.github.com> Date: Sat, 24 Dec 2022 01:57:24 +1300 Subject: [PATCH] fix(changedetector): Use service `reload` commands instead of `supervisorctl restart ` (#2947) With `reload` a change detection event during local testing can be processed in less than a second according to logs. Previously this was 5+ seconds (_plus additional downtime for Postfix/Dovecot to become available again_). In the past it was apparently an issue to use ` reload` due to a concern with the PID for wrapper scripts that `supervisorctl` managed, thus `supervisorctl restart` had been used. Past discussions with maintainers suggest this is not likely an issue anymore, and `reload` should be fine to switch to now :+1: --- **NOTE:** It may not be an issue in the CI, but on _**local systems running tests may risk failure in `setup-cli.bats` from a false positive**_ due to 1 second polling window of the test helper method, and a change event being possible to occur entirely between the two checks undetected by the current approach. If this is a problem, we may need to think of a better way to catch the change. The `letsencrypt` test counts how many change events are expected to have been processed, and this could technically be leveraged by the test helper too. --- **NOTE:** These two lines (_with regex pattern for postfix_) are output in the terminal when using the services respective `reload` commands: ``` postfix/master.*: reload -- version .*, configuration /etc/postfix dovecot: master: Warning: SIGHUP received - reloading configuration ``` I wasn't sure how to match them as they did not appear in the `changedetector` log (_**EDIT:** they appear in the main log output, eg `docker logs `_). Instead I've just monitored the `changedetector` log messages, which should be ok for logic that previously needed to ensure Dovecot / Postfix was back up after the `restart` was issued. --- Commit history: * chore: Change events `reload` Dovecot and Postfix instead of `restart` Reloading is faster than restarting the processes. Restarting is a bit heavy handed here and may no longer be necessary for general usage? * tests: Adapt tests to support service `reload` instead of `restart` * chore: Additional logging for debugging change event logs * fix: Wait on change detection, then verify directory created Change detection is too fast now (0-1 seconds vs 5+). Directory being waited on here was created near the end of a change event, reducing that time to detect a change by the utility method further. We can instead check that the directory exists after the change detection event is completed. * chore: Keep using the maildir polling check We don't presently use remote storage in tests, but it might be relevant in future when testing NFS. This at least avoids any confusing failure happening when that scenario is tested. --- target/scripts/check-for-changes.sh | 11 ++++++----- test/helper/common.bash | 9 +++------ test/test_helper/common.bash | 9 +++------ test/tests/serial/mail_changedetector.bats | 20 ++++++++++---------- test/tests/serial/mail_ssl_letsencrypt.bats | 21 +++++++++------------ test/tests/serial/setup-cli.bats | 13 +++++++------ 6 files changed, 38 insertions(+), 45 deletions(-) diff --git a/target/scripts/check-for-changes.sh b/target/scripts/check-for-changes.sh index 5c3d34c6..11d4bcc0 100755 --- a/target/scripts/check-for-changes.sh +++ b/target/scripts/check-for-changes.sh @@ -56,13 +56,11 @@ function _check_for_changes _ssl_changes _postfix_dovecot_changes - # While some config changes may be properly applied by Postfix or Dovecot - # via their 'reload' commands; some may require restarting?: - _log_with_date 'debug' 'Restarting services due to detected changes' + _log_with_date 'debug' 'Reloading services due to detected changes' [[ ${ENABLE_AMAVIS} -eq 1 ]] && _reload_amavis - supervisorctl restart postfix - [[ ${SMTP_ONLY} -ne 1 ]] && supervisorctl restart dovecot + postfix reload + [[ ${SMTP_ONLY} -ne 1 ]] && dovecot reload _remove_lock _log_with_date 'debug' 'Completed handling of detected change' @@ -113,6 +111,7 @@ function _postfix_dovecot_changes || [[ ${CHANGED} =~ ${DMS_DIR}/dovecot-quotas.cf ]] \ || [[ ${CHANGED} =~ ${DMS_DIR}/dovecot-masters.cf ]] then + _log_with_date 'trace' 'Regenerating accounts (Dovecot + Postfix)' [[ ${SMTP_ONLY} -ne 1 ]] && _create_accounts fi @@ -126,6 +125,7 @@ function _postfix_dovecot_changes || [[ ${CHANGED} =~ ${DMS_DIR}/postfix-relaymap.cf ]] \ || [[ ${CHANGED} =~ ${DMS_DIR}/postfix-sasl-password.cf ]] then + _log_with_date 'trace' 'Regenerating relay config (Postfix)' _rebuild_relayhost fi @@ -138,6 +138,7 @@ function _postfix_dovecot_changes if [[ ${CHANGED} =~ ${DMS_DIR}/postfix-accounts.cf ]] \ || [[ ${CHANGED} =~ ${DMS_DIR}/postfix-virtual.cf ]] then + _log_with_date 'trace' 'Regenerating vhosts (Postfix)' _create_postfix_vhost fi diff --git a/test/helper/common.bash b/test/helper/common.bash index 6df8a2de..f9914aa3 100644 --- a/test/helper/common.bash +++ b/test/helper/common.bash @@ -181,18 +181,15 @@ function wait_until_change_detection_event_completes() { [[ $(__change_event_status) == "${CHANGE_EVENT_END}" ]] } - if [[ ! $(__is_changedetector_processing) ]] + # A new change event is expected, + # If the last event status is not yet `CHANGE_EVENT_START`, wait until it is: + if ! __is_changedetector_processing then - # A new change event is expected, wait for it: repeat_until_success_or_timeout 60 __is_changedetector_processing fi # Change event is in progress, wait until it finishes: repeat_until_success_or_timeout 60 __is_changedetector_finished - - # NOTE: Although the change event has completed, services like Postfix and Dovecot - # may still be in the process of restarting. - # You may still want to wait longer if depending on those to be ready. } # An account added to `postfix-accounts.cf` must wait for the `changedetector` service diff --git a/test/test_helper/common.bash b/test/test_helper/common.bash index 1ce9d587..234b01b2 100644 --- a/test/test_helper/common.bash +++ b/test/test_helper/common.bash @@ -201,18 +201,15 @@ function wait_until_change_detection_event_completes() { [[ $(__change_event_status) == "${CHANGE_EVENT_END}" ]] } - if [[ ! $(__is_changedetector_processing) ]] + # A new change event is expected, + # If the last event status is not yet `CHANGE_EVENT_START`, wait until it is: + if ! __is_changedetector_processing then - # A new change event is expected, wait for it: repeat_until_success_or_timeout 60 __is_changedetector_processing fi # Change event is in progress, wait until it finishes: repeat_until_success_or_timeout 60 __is_changedetector_finished - - # NOTE: Although the change event has completed, services like Postfix and Dovecot - # may still be in the process of restarting. - # You may still want to wait longer if depending on those to be ready. } # An account added to `postfix-accounts.cf` must wait for the `changedetector` service diff --git a/test/tests/serial/mail_changedetector.bats b/test/tests/serial/mail_changedetector.bats index 2a48bd07..b6700a35 100644 --- a/test/tests/serial/mail_changedetector.bats +++ b/test/tests/serial/mail_changedetector.bats @@ -38,17 +38,17 @@ function teardown_file() { echo "" >> "$(private_config_path mail_changedetector_one)/postfix-accounts.cf" sleep 25 - run docker exec mail_changedetector_one /bin/bash -c "supervisorctl tail -3000 changedetector" - assert_output --partial "postfix: stopped" - assert_output --partial "postfix: started" - assert_output --partial "Change detected" - assert_output --partial "Removed lock" + run docker exec mail_changedetector_one /bin/bash -c 'supervisorctl tail -3000 changedetector' + assert_output --partial 'Change detected' + assert_output --partial 'Reloading services due to detected changes' + assert_output --partial 'Removed lock' + assert_output --partial 'Completed handling of detected change' - run docker exec mail_changedetector_two /bin/bash -c "supervisorctl tail -3000 changedetector" - assert_output --partial "postfix: stopped" - assert_output --partial "postfix: started" - assert_output --partial "Change detected" - assert_output --partial "Removed lock" + run docker exec mail_changedetector_two /bin/bash -c 'supervisorctl tail -3000 changedetector' + assert_output --partial 'Change detected' + assert_output --partial 'Reloading services due to detected changes' + assert_output --partial 'Removed lock' + assert_output --partial 'Completed handling of detected change' } @test "checking changedetector: lock file found, blocks, and doesn't get prematurely removed" { diff --git a/test/tests/serial/mail_ssl_letsencrypt.bats b/test/tests/serial/mail_ssl_letsencrypt.bats index c16b9625..e764527b 100644 --- a/test/tests/serial/mail_ssl_letsencrypt.bats +++ b/test/tests/serial/mail_ssl_letsencrypt.bats @@ -139,7 +139,7 @@ function teardown() { # It should replace the cert files in the existing `letsencrypt/live/mail.example.test/` folder. function _acme_rsa() { _should_extract_on_changes 'mail.example.test' "${LOCAL_BASE_PATH}/rsa.acme.json" - _should_have_service_restart_count '1' + _should_have_service_reload_count '1' local RSA_KEY_PATH="${LOCAL_BASE_PATH}/key.rsa.pem" local RSA_CERT_PATH="${LOCAL_BASE_PATH}/cert.rsa.pem" @@ -151,7 +151,7 @@ function teardown() { # Wildcard `*.example.test` should extract to `example.test/` in `letsencrypt/live/`: function _acme_wildcard() { _should_extract_on_changes 'example.test' "${LOCAL_BASE_PATH}/wildcard/rsa.acme.json" - _should_have_service_restart_count '2' + _should_have_service_reload_count '2' # As the FQDN has changed since startup, the Postfix + Dovecot configs should be updated: _should_have_valid_config 'example.test' 'key.pem' 'fullchain.pem' @@ -240,20 +240,17 @@ function _should_extract_on_changes() { assert_output --partial 'Change detected' assert_output --partial "'/etc/letsencrypt/acme.json' has changed - extracting certificates" assert_output --partial "_extract_certs_from_acme | Certificate successfully extracted for '${EXPECTED_DOMAIN}'" - assert_output --partial 'Restarting services due to detected changes' - assert_output --partial 'postfix: stopped' - assert_output --partial 'postfix: started' - assert_output --partial 'dovecot: stopped' - assert_output --partial 'dovecot: started' + assert_output --partial 'Reloading services due to detected changes' + assert_output --partial 'Completed handling of detected change' } # Ensure change detection is not mistakenly validating against previous change events: -function _should_have_service_restart_count() { - local NUM_RESTARTS=${1} +function _should_have_service_reload_count() { + local NUM_RELOADS=${1} - # Count how many times postfix was restarted by the `changedetector` service: - run docker exec "${TEST_NAME}" sh -c "grep -c 'postfix: started' /var/log/supervisor/changedetector.log" - assert_output "${NUM_RESTARTS}" + # Count how many times processes (like Postfix and Dovecot) have been reloaded by the `changedetector` service: + run docker exec "${TEST_NAME}" sh -c "grep -c 'Completed handling of detected change' /var/log/supervisor/changedetector.log" + assert_output "${NUM_RELOADS}" } # Extracted cert files from `acme.json` have content matching the expected reference files: diff --git a/test/tests/serial/setup-cli.bats b/test/tests/serial/setup-cli.bats index 083a3754..d1cb78d5 100644 --- a/test/tests/serial/setup-cli.bats +++ b/test/tests/serial/setup-cli.bats @@ -51,14 +51,15 @@ function teardown_file() { ACCOUNT=$(grep "${MAIL_ACCOUNT}" "${DATABASE_ACCOUNTS}" | awk -F '|' '{print $1}') assert_equal "${ACCOUNT}" "${MAIL_ACCOUNT}" - # Wait for change detection event to complete (create maildir and add account to Dovecot UserDB+PassDB) - wait_until_account_maildir_exists "${TEST_NAME}" "${MAIL_ACCOUNT}" - # Dovecot is stopped briefly at the end of processing a change event (should change to reload in future), - # to more accurately use `wait_for_service` ensure you wait until `changedetector` is done. + # Ensure you wait until `changedetector` is finished. + # Mail account and storage directory should now be valid wait_until_change_detection_event_completes "${TEST_NAME}" - wait_for_service "${TEST_NAME}" dovecot - # Verify account authentication is successful: + # Verify mail storage directory exists (polls if storage is slow, eg remote mount): + wait_until_account_maildir_exists "${TEST_NAME}" "${MAIL_ACCOUNT}" + + # Verify account authentication is successful (account added to Dovecot UserDB+PassDB): + wait_for_service "${TEST_NAME}" dovecot local RESPONSE RESPONSE=$(docker exec "${TEST_NAME}" doveadm auth test "${MAIL_ACCOUNT}" "${MAIL_PASS}" | grep 'passdb') assert_equal "${RESPONSE}" "passdb: ${MAIL_ACCOUNT} auth succeeded"