diff --git a/Dockerfile b/Dockerfile index 52472ce..cb6a098 100644 --- a/Dockerfile +++ b/Dockerfile @@ -83,7 +83,9 @@ ENV PGROOT=/var/lib/postgresql \ OR_REINDEX_COUNTER=${OR_REINDEX_COUNTER} \ OR_DISABLE_REINDEX=${OR_DISABLE_REINDEX:-false} \ POSTGRES_MAX_CONNECTIONS=${POSTGRES_MAX_CONNECTIONS:-50} \ - OR_DISABLE_AUTO_UPGRADE=${OR_DISABLE_AUTO_UPGRADE:-false} + OR_DISABLE_AUTO_UPGRADE=${OR_DISABLE_AUTO_UPGRADE:-false} \ + OR_ASSET_DATAPOINT_RETENTION=$OR_ASSET_DATAPOINT_RETENTION \ + OR_ASSET_PREDICTED_DATAPOINT_RETENTION=$OR_ASSET_PREDICTED_DATAPOINT_RETENTION WORKDIR /var/lib/postgresql EXPOSE 5432 8008 8081 diff --git a/README.md b/README.md index cb77985..a809d3a 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,13 @@ NOTE THAT A REINDEX CAN TAKE A LONG TIME DEPENDING ON THE SIZE OF THE DB! And startup will be delayed until completed This functionality is intended to simplify migration for basic users; advanced users with large DBs should take care of this themselves. +- Automatic TimescaleDB retention policy configuration for asset datapoint tables using environment variables: + - `OR_ASSET_DATAPOINT_RETENTION` - Sets retention period for `asset_datapoint` table (disabled by default) + - `OR_ASSET_PREDICTED_DATAPOINT_RETENTION` - Sets retention period for `asset_predicted_datapoint` table (disabled by default) + - Supports flexible time units: hours, days, weeks, months, years (e.g., "48 hours", "2 weeks", "6 months") + - Retention policies automatically delete data older than the specified period + - Policies are applied on both new database initialization and existing database startup + - Can be changed at any time by updating the environment variables and restarting the container `timescale/timescaledb-ha` image is ubuntu based and only currently supports amd64; they are working on ARM64 support in timescaledev/timescaledb-ha see: @@ -29,5 +36,66 @@ https://github.com/postgis/docker-postgis/issues/216 TODO: Switch over to timescale/timescaledb-ha once arm64 supported +## TimescaleDB Retention Policies + +This image supports automatic configuration of TimescaleDB retention policies for asset datapoint tables. Retention policies automatically delete data older than a specified period, helping to manage database size and performance. + +### Configuration + +Set the following environment variables to configure retention policies: + +- **`OR_ASSET_DATAPOINT_RETENTION`** - Retention period for the `asset_datapoint` table (disabled by default) +- **`OR_ASSET_PREDICTED_DATAPOINT_RETENTION`** - Retention period for the `asset_predicted_datapoint` table (disabled by default) + +The retention period supports flexible PostgreSQL interval syntax with the following time units: +- **hours** - e.g., "48 hours", "72 hours" +- **days** - e.g., "30 days", "180 days" +- **weeks** - e.g., "2 weeks", "8 weeks" +- **months** - e.g., "3 months", "12 months" +- **years** - e.g., "1 year", "2 years" + +You can also combine units: "1 year 6 months", "2 weeks 3 days" + +### Example Usage + +```yaml +# docker-compose.yml +services: + postgresql: + image: openremote/postgresql:latest + environment: + # Keep asset_datapoint for 6 months + OR_ASSET_DATAPOINT_RETENTION: 6 months + # Keep predicted datapoint for only 2 weeks + OR_ASSET_PREDICTED_DATAPOINT_RETENTION: 2 weeks +``` + +Additional examples: +```yaml +# Short-term retention (48 hours) +OR_ASSET_DATAPOINT_RETENTION: 48 hours + +# Long-term retention (1 year) +OR_ASSET_DATAPOINT_RETENTION: 1 year + +# Combined units +OR_ASSET_DATAPOINT_RETENTION: 1 year 6 months +``` + +### Behavior + +- **New databases**: Retention policies are automatically configured during initialization if the tables exist as hypertables +- **Existing databases**: Retention policies are applied/updated on container startup when environment variables are set +- **Changing retention periods**: Simply update the environment variables and restart the container +- **Disabling retention**: To disable automatic retention policy configuration, do not set these environment variables + +### Notes + +- Retention policies only work on tables that have been converted to TimescaleDB hypertables +- If the tables don't exist or aren't hypertables, the configuration will be skipped (no error) +- Data deletion happens automatically in the background via TimescaleDB's retention policy mechanism +- Existing retention policies are replaced when new values are configured +- The interval format follows PostgreSQL's INTERVAL syntax + ## Upgrading ***NOTE: If you change the version of container you use then make sure you have backed up your DB first as this container will try to auto upgrade your DB and/or TimescaleDB extension; this auto upgrade functionality can be disabled using `OR_DISABLE_AUTO_UPGRADE=true`*** diff --git a/docker-entrypoint-initdb.d/200_or_retention_policy.sh b/docker-entrypoint-initdb.d/200_or_retention_policy.sh new file mode 100644 index 0000000..eca87e2 --- /dev/null +++ b/docker-entrypoint-initdb.d/200_or_retention_policy.sh @@ -0,0 +1,94 @@ +#!/bin/bash + +# THIS SCRIPT CONFIGURES TIMESCALEDB RETENTION POLICIES FOR ASSET DATAPOINT TABLES +# The retention policies will automatically delete data older than the specified interval +# This can be run during database initialization or on existing databases + +set -e + +# Default retention periods (supports flexible time units like "90 days", "6 months", "2 weeks", "48 hours") +OR_ASSET_DATAPOINT_RETENTION=$OR_ASSET_DATAPOINT_RETENTION +OR_ASSET_PREDICTED_DATAPOINT_RETENTION=$OR_ASSET_PREDICTED_DATAPOINT_RETENTION + +echo "-----------------------------------------------------------" +echo "Configuring TimescaleDB retention policies..." +echo "-----------------------------------------------------------" + +# Function to configure or remove retention policy based on whether interval is set +configure_retention_policy() { + local table_name=$1 + local retention_interval=$2 + + echo "Checking if table '$table_name' exists and is a hypertable..." + + # Check if table exists and is a hypertable + # Use docker_process_sql if available (existing DB), otherwise use psql (new DB) + if declare -f docker_process_sql > /dev/null; then + TABLE_EXISTS=$(docker_process_sql -X -tAc \ + "SELECT COUNT(*) FROM timescaledb_information.hypertables WHERE hypertable_name = '$table_name';") + else + TABLE_EXISTS=$(psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" -tAc \ + "SELECT COUNT(*) FROM timescaledb_information.hypertables WHERE hypertable_name = '$table_name';") + fi + + if [ "$TABLE_EXISTS" -gt 0 ]; then + if [ -n "$retention_interval" ]; then + # Retention interval is set - configure the policy + echo "Table '$table_name' is a hypertable. Configuring retention policy of $retention_interval..." + + # Remove existing retention policy if any and add new one + if declare -f docker_process_sql > /dev/null; then + docker_process_sql -c "SELECT remove_retention_policy('$table_name', if_exists => true);" + docker_process_sql -c "SELECT add_retention_policy('$table_name', INTERVAL '$retention_interval');" + else + psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL + -- Remove existing retention policy if present + SELECT remove_retention_policy('$table_name', if_exists => true); + + -- Add new retention policy + SELECT add_retention_policy('$table_name', INTERVAL '$retention_interval'); +EOSQL + fi + + echo "Retention policy configured for '$table_name': data older than $retention_interval will be automatically deleted." + else + # Retention interval is not set - remove any existing policy + echo "Table '$table_name' is a hypertable. No retention interval specified, removing any existing retention policy..." + + if declare -f docker_process_sql > /dev/null; then + docker_process_sql -c "SELECT remove_retention_policy('$table_name', if_exists => true);" + else + psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL + -- Remove existing retention policy if present + SELECT remove_retention_policy('$table_name', if_exists => true); +EOSQL + fi + + echo "Retention policy removed for '$table_name': data will be retained indefinitely." + fi + else + echo "Table '$table_name' does not exist or is not a hypertable. Skipping retention policy setup." + if [ -n "$retention_interval" ]; then + echo "Note: Retention policy will be applied when the table is created as a hypertable." + fi + fi +} + +# Remove existing settings if they exist +sed -i -e '/^or.asset_datapoint_retention/d' -e '/^or.asset_predicted_datapoint_retention/d' "$PGDATA/postgresql.conf" + +# Add new settings +if [ -n "$OR_ASSET_DATAPOINT_RETENTION" ]; then + echo "or.asset_datapoint_retention = '$OR_ASSET_DATAPOINT_RETENTION'" >> "$PGDATA/postgresql.conf" +fi +if [ -n "$OR_ASSET_PREDICTED_DATAPOINT_RETENTION" ]; then + echo "or.asset_predicted_datapoint_retention = '$OR_ASSET_PREDICTED_DATAPOINT_RETENTION'" >> "$PGDATA/postgresql.conf" +fi + +# Then configure the actual retention policies +configure_retention_policy "asset_datapoint" "$OR_ASSET_DATAPOINT_RETENTION" +configure_retention_policy "asset_predicted_datapoint" "$OR_ASSET_PREDICTED_DATAPOINT_RETENTION" + +echo "-----------------------------------------------------------" +echo "TimescaleDB retention policy configuration complete!" +echo "-----------------------------------------------------------" \ No newline at end of file diff --git a/or-entrypoint.sh b/or-entrypoint.sh index 6ff92a3..c394035 100644 --- a/or-entrypoint.sh +++ b/or-entrypoint.sh @@ -430,7 +430,34 @@ if [ -n "$DATABASE_ALREADY_EXISTS" ]; then fi fi - if [ "$DO_REINDEX" == "true" ] || [ "$DO_TS_UPGRADE" == "true" ]; then + # Check if retention policies need to be configured or removed + # We need to run the script if: + # 1. Environment variables are set (to add/update policies) + # 2. Environment variables changed from previous run (to remove policies) + DO_RETENTION_POLICY=false + RETENTION_CONFIG_FILE="${PGDATA}/OR_RETENTION_CONFIG" + CURRENT_RETENTION_CONFIG="asset_datapoint:${OR_ASSET_DATAPOINT_RETENTION}|asset_predicted_datapoint:${OR_ASSET_PREDICTED_DATAPOINT_RETENTION}" + + if [ -f "$RETENTION_CONFIG_FILE" ]; then + PREVIOUS_RETENTION_CONFIG=$(cat "$RETENTION_CONFIG_FILE") + if [ "$PREVIOUS_RETENTION_CONFIG" != "$CURRENT_RETENTION_CONFIG" ]; then + echo "-------------------------------------------------------" + echo "Retention policy configuration has changed" + echo " Previous: $PREVIOUS_RETENTION_CONFIG" + echo " Current: $CURRENT_RETENTION_CONFIG" + echo "-------------------------------------------------------" + DO_RETENTION_POLICY=true + fi + elif [ -n "$OR_ASSET_DATAPOINT_RETENTION" ] || [ -n "$OR_ASSET_PREDICTED_DATAPOINT_RETENTION" ]; then + echo "-------------------------------------------------------" + echo "Retention policy environment variables detected" + echo " OR_ASSET_DATAPOINT_RETENTION: ${OR_ASSET_DATAPOINT_RETENTION}" + echo " OR_ASSET_PREDICTED_DATAPOINT_RETENTION: ${OR_ASSET_PREDICTED_DATAPOINT_RETENTION}" + echo "-------------------------------------------------------" + DO_RETENTION_POLICY=true + fi + + if [ "$DO_REINDEX" == "true" ] || [ "$DO_TS_UPGRADE" == "true" ] || [ "$DO_RETENTION_POLICY" == "true" ]; then echo "-------------------------" echo "Starting temporary server" echo "-------------------------" @@ -449,16 +476,16 @@ if [ -n "$DATABASE_ALREADY_EXISTS" ]; then echo "Target TimescaleDB version: ${TS_VERSION}" # Don't automatically abort on non-0 exit status, just in case timescaledb extension isn't installed on the DB - set +e + set +e docker_process_sql -X -c "ALTER EXTENSION timescaledb UPDATE;" - - if [ $? -eq 0 ]; then + + if [ $? -eq 0 ]; then NEW_TS_VERSION=$(docker_process_sql -X -c "SELECT extversion FROM pg_extension WHERE extname='timescaledb';" | grep -v extversion | grep -v row | tr -d ' ') echo "TimescaleDB upgraded: ${CURRENT_TS_VERSION} -> ${NEW_TS_VERSION}" docker_process_sql -c "CREATE EXTENSION IF NOT EXISTS timescaledb_toolkit; ALTER EXTENSION timescaledb_toolkit UPDATE;" - fi - - # Return the error handling back to automatically aborting on non-0 exit status + fi + + # Return the error handling back to automatically aborting on non-0 exit status set -e echo "=================================================================================" @@ -482,6 +509,25 @@ if [ -n "$DATABASE_ALREADY_EXISTS" ]; then touch "$REINDEX_FILE" fi + # Configure retention policies if environment variables changed + if [ "$DO_RETENTION_POLICY" == "true" ]; then + echo "-----------------------------------------------------------" + echo "Configuring TimescaleDB retention policies for existing DB..." + echo "-----------------------------------------------------------" + + # Don't automatically abort on non-0 exit status + set +e + + # Run the retention policy script + /docker-entrypoint-initdb.d/200_or_retention_policy.sh + + # Save the current configuration to detect future changes + echo "$CURRENT_RETENTION_CONFIG" > "$RETENTION_CONFIG_FILE" + + # Return the error handling back to automatically aborting on non-0 exit status + set -e + fi + docker_temp_server_stop fi fi