From a197bb489d6fd56fc78eefadd59057fb0fed984f Mon Sep 17 00:00:00 2001
From: sreedev <talisreedev@gmail.com>
Date: Mon, 29 Sep 2025 22:31:04 +0530
Subject: [PATCH 01/10] c5-agent commit

---
 agent-c5/build.gradle                         |  146 ++
 agent-c5/gradle.properties                    |    2 +
 agent-c5/src/docker/Dockerfile                |   22 +
 .../src/docker/jmx_prometheus_exporter.yaml   |   83 +
 agent-c5/src/docker/table1.yaml               |   28 +
 .../com/datastax/oss/cdc/agent/Agent.java     |   94 ++
 .../datastax/oss/cdc/agent/CdcMetrics.java    |   54 +
 .../cdc/agent/CommitLogReadHandlerImpl.java   |  485 ++++++
 .../cdc/agent/CommitLogReaderServiceImpl.java |   89 +
 .../com/datastax/oss/cdc/agent/Mutation.java  |   68 +
 .../datastax/oss/cdc/agent/MutationMaker.java |   39 +
 .../oss/cdc/agent/PulsarMutationSender.java   |  161 ++
 .../oss/cdc/agent/PulsarDualNodeC5Tests.java  |   37 +
 .../cdc/agent/PulsarSingleNodeC5Tests.java    |   50 +
 .../test/resources/cassandra/cassandra.yaml   | 1430 +++++++++++++++++
 .../src/test/resources/cassandra/logback.xml  |  103 ++
 agent-c5/src/test/resources/logback-test.xml  |   15 +
 gradle.properties                             |    3 +-
 settings.gradle                               |    2 +
 .../com/datastax/oss/cdc/AgentTestUtil.java   |    1 +
 20 files changed, 2911 insertions(+), 1 deletion(-)
 create mode 100644 agent-c5/build.gradle
 create mode 100644 agent-c5/gradle.properties
 create mode 100644 agent-c5/src/docker/Dockerfile
 create mode 100644 agent-c5/src/docker/jmx_prometheus_exporter.yaml
 create mode 100644 agent-c5/src/docker/table1.yaml
 create mode 100644 agent-c5/src/main/java/com/datastax/oss/cdc/agent/Agent.java
 create mode 100644 agent-c5/src/main/java/com/datastax/oss/cdc/agent/CdcMetrics.java
 create mode 100644 agent-c5/src/main/java/com/datastax/oss/cdc/agent/CommitLogReadHandlerImpl.java
 create mode 100644 agent-c5/src/main/java/com/datastax/oss/cdc/agent/CommitLogReaderServiceImpl.java
 create mode 100644 agent-c5/src/main/java/com/datastax/oss/cdc/agent/Mutation.java
 create mode 100644 agent-c5/src/main/java/com/datastax/oss/cdc/agent/MutationMaker.java
 create mode 100644 agent-c5/src/main/java/com/datastax/oss/cdc/agent/PulsarMutationSender.java
 create mode 100644 agent-c5/src/test/java/com/datastax/oss/cdc/agent/PulsarDualNodeC5Tests.java
 create mode 100644 agent-c5/src/test/java/com/datastax/oss/cdc/agent/PulsarSingleNodeC5Tests.java
 create mode 100644 agent-c5/src/test/resources/cassandra/cassandra.yaml
 create mode 100644 agent-c5/src/test/resources/cassandra/logback.xml
 create mode 100644 agent-c5/src/test/resources/logback-test.xml

diff --git a/agent-c5/build.gradle b/agent-c5/build.gradle
new file mode 100644
index 00000000..63accc37
--- /dev/null
+++ b/agent-c5/build.gradle
@@ -0,0 +1,146 @@
+plugins {
+    id 'java-library'
+    id 'java'
+    id 'application'
+    id "com.github.johnrengelman.shadow"
+    id 'com.palantir.docker'
+    id 'docker-compose'
+}
+
+application {
+    mainClass = "$mainClassName"
+}
+
+jar {
+    duplicatesStrategy = DuplicatesStrategy.INCLUDE
+    manifest {
+        attributes 'Premain-Class': "$mainClassName"
+    }
+    zip64=true
+}
+tasks.jar.dependsOn project(':commons').jar
+tasks.jar.dependsOn project(':agent').jar
+
+compileTestJava {
+    options.compilerArgs += '-parameters'
+}
+
+sourceSets {
+    // Make the compileOnly dependencies available when compiling/running tests
+    test.compileClasspath += configurations.compileClasspath
+    test.runtimeClasspath += configurations.compileClasspath
+}
+
+shadowJar {
+    manifest {
+        inheritFrom project.tasks.jar.manifest
+    }
+}
+
+jar.enabled = true
+assemble.dependsOn(shadowJar)
+
+dependencies {
+    implementation project(':commons')
+    implementation project(':agent')
+
+    implementation("org.apache.avro:avro:${avroVersion}")
+
+    implementation("${pulsarGroup}:pulsar-client:${pulsarVersion}")
+
+    compileOnly("org.apache.cassandra:cassandra-all:${cassandra5Version}")
+    testCompileOnly("org.apache.cassandra:cassandra-all:${cassandra5Version}")
+    testImplementation("org.apache.cassandra:cassandra-all:${cassandra5Version}") {
+        exclude group: 'io.netty'
+    }
+
+    testImplementation "com.datastax.oss:java-driver-core:${ossDriverVersion}"
+    testImplementation "com.datastax.oss:java-driver-query-builder:${ossDriverVersion}"
+
+    testImplementation "org.testcontainers:testcontainers:${testContainersVersion}"
+    testImplementation project(':testcontainers')
+
+    testRuntimeOnly "org.slf4j:slf4j-api:${slf4jVersion}"
+    testRuntimeOnly "ch.qos.logback:logback-classic:${logbackVersion}"
+}
+
+test {
+    // Add dependency on jar task, since it will be main target for testing
+    dependsOn shadowJar
+
+    // Rearrange test classpath, add compiled JAR instead of main classes
+    classpath = project.sourceSets.test.output + configurations.testRuntimeClasspath + files(shadowJar.archiveFile)
+
+    useJUnitPlatform()
+
+    // JVM arguments for Cassandra 5.0 Java 17+ compatibility
+    jvmArgs '--add-exports', 'java.base/jdk.internal.misc=ALL-UNNAMED',
+           '--add-exports', 'java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED',
+           '--add-exports', 'java.rmi/sun.rmi.registry=ALL-UNNAMED',
+           '--add-exports', 'java.rmi/sun.rmi.server=ALL-UNNAMED',
+           '--add-exports', 'java.sql/java.sql=ALL-UNNAMED',
+           '--add-exports', 'java.base/java.lang.ref=ALL-UNNAMED',
+           '--add-exports', 'jdk.unsupported/sun.misc=ALL-UNNAMED',
+           '--add-opens', 'java.base/java.lang.module=ALL-UNNAMED',
+           '--add-opens', 'java.base/jdk.internal.loader=ALL-UNNAMED',
+           '--add-opens', 'java.base/jdk.internal.ref=ALL-UNNAMED',
+           '--add-opens', 'java.base/jdk.internal.reflect=ALL-UNNAMED',
+           '--add-opens', 'java.base/jdk.internal.math=ALL-UNNAMED',
+           '--add-opens', 'java.base/jdk.internal.module=ALL-UNNAMED',
+           '--add-opens', 'java.base/jdk.internal.util.jar=ALL-UNNAMED',
+           '--add-opens', 'jdk.management/com.sun.management.internal=ALL-UNNAMED',
+           '--add-opens', 'java.base/sun.nio.ch=ALL-UNNAMED',
+           '--add-opens', 'java.base/java.io=ALL-UNNAMED',
+           '--add-opens', 'java.base/java.lang=ALL-UNNAMED',
+           '--add-opens', 'java.base/java.lang.reflect=ALL-UNNAMED',
+           '--add-opens', 'java.base/java.util=ALL-UNNAMED',
+           '--add-opens', 'java.base/java.nio=ALL-UNNAMED'
+
+    environment 'PULSAR_IMAGE', testPulsarImage + ':' + testPulsarImageTag
+    environment 'CASSANDRA_IMAGE', 'cassandra:' + cassandra5Version
+
+    systemProperty "buildDir", buildDir
+    systemProperty "projectVersion", project.version
+}
+
+dockerPrepare.dependsOn(jar, shadowJar)
+dockerPrepare.duplicatesStrategy = DuplicatesStrategy.INCLUDE
+docker {
+    name "${dockerRepo}cassandra:${cassandra5Version}-cdc"
+    files "$buildDir/libs/", "src/docker/"
+    buildArgs([BUILD_VERSION: project.version,
+               CASSANDRA_VERSION: cassandra5Version,
+               COMMITMOG_SYNC_PERIOD_IN_MS: commitlog_sync_period_in_ms,
+               CDC_TOTAL_SPACE_IN_MB: cdc_total_space_in_mb])
+}
+build.dependsOn(project.tasks['docker'])
+
+dockerCompose {
+    projectName = "test1"
+    useComposeFiles = ['../docker-compose.yaml']
+    environment.put 'AGENT_VERSION', 'c5'
+    environment.put 'PROJECT_VERSION', project.version
+    environment.put 'CASSANDRA_IMAGE', "${dockerRepo}cassandra:${cassandra5Version}-cdc"
+    environment.put 'PULSAR_IMAGE', "${dockerRepo}pulsar:${testPulsarImageTag}-csc"
+    waitForTcpPorts = false
+    startedServices = ['cassandra','pulsar']
+
+    stress {
+        projectName = "test1"
+        startedServices = ['cassandra-stress']
+        scale = ['cassandra-stress': 1]
+    }
+    stress2 {
+        projectName = "test1"
+        startedServices = ['cassandra-stress']
+        scale = ['cassandra-stress': 2]
+    }
+    prometheus {
+        projectName = "test1"
+        startedServices = ['prometheus','grafana']
+    }
+    elasticsearch {
+        projectName = "test1"
+        startedServices = ['elasticsearch']
+    }
+}
diff --git a/agent-c5/gradle.properties b/agent-c5/gradle.properties
new file mode 100644
index 00000000..e4ecc1e2
--- /dev/null
+++ b/agent-c5/gradle.properties
@@ -0,0 +1,2 @@
+artifact=agent-c5
+mainClassName=com.datastax.oss.cdc.agent.Agent
diff --git a/agent-c5/src/docker/Dockerfile b/agent-c5/src/docker/Dockerfile
new file mode 100644
index 00000000..d5eadfeb
--- /dev/null
+++ b/agent-c5/src/docker/Dockerfile
@@ -0,0 +1,22 @@
+ARG CASSANDRA_VERSION
+FROM cassandra:${CASSANDRA_VERSION}
+ARG BUILD_VERSION
+ARG COMMITMOG_SYNC_PERIOD_IN_MS
+ARG CDC_TOTAL_SPACE_IN_MB
+
+RUN sed -i 's/cdc_enabled: false/cdc_enabled: true/g' /etc/cassandra/cassandra.yaml
+RUN sed -i "s/commitlog_sync_period_in_ms: 10000/commitlog_sync_period_in_ms: $COMMITMOG_SYNC_PERIOD_IN_MS/g" /etc/cassandra/cassandra.yaml
+RUN echo "cdc_total_space_in_mb: ${CDC_TOTAL_SPACE_IN_MB}" >> /etc/cassandra/cassandra.yaml
+
+COPY agent-c5-${BUILD_VERSION}-all.jar /
+
+# Add cassandra-stress config
+COPY table1.yaml /
+
+# Add the prometheus exporter
+ADD https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.16.1/jmx_prometheus_javaagent-0.16.1.jar /
+RUN chmod a+rx /jmx_prometheus_javaagent-0.16.1.jar
+COPY jmx_prometheus_exporter.yaml /
+
+# Add cassandra tools in the PATH
+ENV PATH="${PATH}:/opt/cassandra/tools/bin"
diff --git a/agent-c5/src/docker/jmx_prometheus_exporter.yaml b/agent-c5/src/docker/jmx_prometheus_exporter.yaml
new file mode 100644
index 00000000..fca93edf
--- /dev/null
+++ b/agent-c5/src/docker/jmx_prometheus_exporter.yaml
@@ -0,0 +1,83 @@
+lowercaseOutputName: true
+lowercaseOutputLabelNames: true
+whitelistObjectNames: [
+    "org.apache.cassandra.metrics:type=ColumnFamily,name=RangeLatency,*",
+    "org.apache.cassandra.metrics:type=ColumnFamily,name=LiveSSTableCount,*",
+    "org.apache.cassandra.metrics:type=ColumnFamily,name=SSTablesPerReadHistogram,*",
+    "org.apache.cassandra.metrics:type=ColumnFamily,name=SpeculativeRetries,*",
+    "org.apache.cassandra.metrics:type=ColumnFamily,name=MemtableOnHeapSize,*",
+    "org.apache.cassandra.metrics:type=ColumnFamily,name=MemtableSwitchCount,*",
+    "org.apache.cassandra.metrics:type=ColumnFamily,name=MemtableLiveDataSize,*",
+    "org.apache.cassandra.metrics:type=ColumnFamily,name=MemtableColumnsCount,*",
+    "org.apache.cassandra.metrics:type=ColumnFamily,name=MemtableOffHeapSize,*",
+    "org.apache.cassandra.metrics:type=ColumnFamily,name=BloomFilterFalsePositives,*",
+    "org.apache.cassandra.metrics:type=ColumnFamily,name=BloomFilterFalseRatio,*",
+    "org.apache.cassandra.metrics:type=ColumnFamily,name=BloomFilterDiskSpaceUsed,*",
+    "org.apache.cassandra.metrics:type=ColumnFamily,name=BloomFilterOffHeapMemoryUsed,*",
+    "org.apache.cassandra.metrics:type=ColumnFamily,name=SnapshotsSize,*",
+    "org.apache.cassandra.metrics:type=ColumnFamily,name=TotalDiskSpaceUsed,*",
+    "org.apache.cassandra.metrics:type=CQL,name=RegularStatementsExecuted,*",
+    "org.apache.cassandra.metrics:type=CQL,name=PreparedStatementsExecuted,*",
+    "org.apache.cassandra.metrics:type=Compaction,name=PendingTasks,*",
+    "org.apache.cassandra.metrics:type=Compaction,name=CompletedTasks,*",
+    "org.apache.cassandra.metrics:type=Compaction,name=BytesCompacted,*",
+    "org.apache.cassandra.metrics:type=Compaction,name=TotalCompactionsCompleted,*",
+    "org.apache.cassandra.metrics:type=ClientRequest,name=Latency,*",
+    "org.apache.cassandra.metrics:type=ClientRequest,name=Unavailables,*",
+    "org.apache.cassandra.metrics:type=ClientRequest,name=Timeouts,*",
+    "org.apache.cassandra.metrics:type=ClientRequest,name=Failures,*",
+    "org.apache.cassandra.metrics:type=ClientRequest,name=Latency,*",
+    "org.apache.cassandra.metrics:type=Storage,name=Exceptions,*",
+    "org.apache.cassandra.metrics:type=Storage,name=TotalHints,*",
+    "org.apache.cassandra.metrics:type=Storage,name=TotalHintsInProgress,*",
+    "org.apache.cassandra.metrics:type=Storage,name=Load,*",
+    "org.apache.cassandra.metrics:type=Connection,name=TotalTimeouts,*",
+    "org.apache.cassandra.metrics:type=ThreadPools,name=CompletedTasks,*",
+    "org.apache.cassandra.metrics:type=ThreadPools,name=PendingTasks,*",
+    "org.apache.cassandra.metrics:type=ThreadPools,name=ActiveTasks,*",
+    "org.apache.cassandra.metrics:type=ThreadPools,name=TotalBlockedTasks,*",
+    "org.apache.cassandra.metrics:type=ThreadPools,name=CurrentlyBlockedTasks,*",
+    "org.apache.cassandra.metrics:type=DroppedMessage,name=Dropped,*",
+    "org.apache.cassandra.metrics:type=Cache,scope=KeyCache,name=HitRate,*",
+    "org.apache.cassandra.metrics:type=Cache,scope=KeyCache,name=Hits,*",
+    "org.apache.cassandra.metrics:type=Cache,scope=KeyCache,name=Requests,*",
+    "org.apache.cassandra.metrics:type=Cache,scope=KeyCache,name=Entries,*",
+    "org.apache.cassandra.metrics:type=Cache,scope=KeyCache,name=Size,*",
+    "org.apache.cassandra.metrics:type=Streaming,name=TotalIncomingBytes,*",
+    "org.apache.cassandra.metrics:type=Streaming,name=TotalOutgoingBytes,*",
+    "org.apache.cassandra.metrics:type=Client,name=connectedNativeClients,*",
+    "org.apache.cassandra.metrics:type=Client,name=connectedThriftClients,*",
+    "org.apache.cassandra.metrics:type=Table,name=WriteLatency,*",
+    "org.apache.cassandra.metrics:type=Table,name=ReadLatency,*",
+    "org.apache.cassandra.metrics:type=CdcAgent,*",
+    "org.apache.cassandra.metrics:type=CommitLog,*",
+    "org.apache.cassandra.net:type=FailureDetector,*",
+]
+#blacklistObjectNames: ["org.apache.cassandra.metrics:type=ColumnFamily,*"]
+rules:
+  - pattern: org.apache.cassandra.metrics<type=(Connection|Streaming), scope=(\S*), name=(\S*)><>(Count|Value)
+    name: cassandra_$1_$3
+    labels:
+      address: "$2"
+  - pattern: org.apache.cassandra.metrics<type=(ColumnFamily), name=(RangeLatency)><>(Mean)
+    name: cassandra_$1_$2_$3
+  - pattern: org.apache.cassandra.net<type=(FailureDetector)><>(DownEndpointCount)
+    name: cassandra_$1_$2
+  - pattern: org.apache.cassandra.metrics<type=(Keyspace), keyspace=(\S*), name=(\S*)><>(Count|Mean|95thPercentile)
+    name: cassandra_$1_$3_$4
+    labels:
+      "$1": "$2"
+  - pattern: org.apache.cassandra.metrics<type=(Table), keyspace=(\S*), scope=(\S*), name=(\S*)><>(Count|Mean|95thPercentile)
+    name: cassandra_$1_$4_$5
+    labels:
+      "keyspace": "$2"
+      "table": "$3"
+  - pattern: org.apache.cassandra.metrics<type=(ClientRequest), scope=(\S*), name=(\S*)><>(Count|Mean|95thPercentile)
+    name: cassandra_$1_$3_$4
+    labels:
+      "type": "$2"
+  - pattern: org.apache.cassandra.metrics<type=(\S*)(?:, ((?!scope)\S*)=(\S*))?(?:, scope=(\S*))?, name=(\S*)><>(Count|Value)
+    name: cassandra_$1_$5
+    labels:
+      "$1": "$4"
+      "$2": "$3"
diff --git a/agent-c5/src/docker/table1.yaml b/agent-c5/src/docker/table1.yaml
new file mode 100644
index 00000000..c79fbea0
--- /dev/null
+++ b/agent-c5/src/docker/table1.yaml
@@ -0,0 +1,28 @@
+keyspace: ks1
+keyspace_definition: |
+  CREATE KEYSPACE ks1 WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'};
+#
+# Table name and create CQL
+#
+table: table1
+table_definition: |
+  CREATE TABLE table1 (a text, b text, PRIMARY KEY (a)) WITH cdc=true;
+
+columnspec:
+  - name: a
+    size: uniform(10..30)
+  - name: b
+    size: fixed(16)
+
+#
+# Specs for insert queries
+#
+insert:
+  partitions: fixed(1) # 1 partition per batch
+  batchtype: UNLOGGED # use unlogged batches
+  select: fixed(10)/10 # no chance of skipping a row when generating inserts
+
+queries:
+  read1:
+    cql: select * from table1 where a = ?
+    fields: samerow     # samerow or multirow (select arguments from the same row, or randomly from all rows in the partition)
diff --git a/agent-c5/src/main/java/com/datastax/oss/cdc/agent/Agent.java b/agent-c5/src/main/java/com/datastax/oss/cdc/agent/Agent.java
new file mode 100644
index 00000000..b769a074
--- /dev/null
+++ b/agent-c5/src/main/java/com/datastax/oss/cdc/agent/Agent.java
@@ -0,0 +1,94 @@
+/**
+ * Copyright DataStax, Inc 2021.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.datastax.oss.cdc.agent;
+
+import lombok.extern.slf4j.Slf4j;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.service.StorageService;
+
+import java.lang.instrument.Instrumentation;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+@Slf4j
+public class Agent {
+    public static void premain(String agentArgs, Instrumentation inst) {
+        log.info("[Agent] In premain method");
+        try {
+            main(agentArgs, inst);
+        } catch(Exception e) {
+            log.error("error:", e);
+            System.exit(-1);
+        }
+    }
+
+    public static void agentmain(String agentArgs, Instrumentation inst) {
+        log.info("[Agent] In agentmain method");
+        try {
+            main(agentArgs, inst);
+        } catch(Exception e) {
+            log.error("error:", e);
+            System.exit(-1);
+        }
+    }
+
+    static void main(String agentArgs, Instrumentation inst) throws Exception {
+        DatabaseDescriptor.daemonInitialization();
+        if (DatabaseDescriptor.isCDCEnabled() == false) {
+            log.error("cdc_enabled=false in your cassandra configuration, CDC agent not started.");
+        } else if (DatabaseDescriptor.getCDCLogLocation() == null) {
+            log.error("cdc_raw_directory=null in your cassandra configuration, CDC agent not started.");
+        } else {
+            startCdcAgent(agentArgs);
+        }
+    }
+
+    static void startCdcAgent(String agentArgs) throws Exception {
+        log.info("Starting CDC agent, cdc_raw_directory={}", DatabaseDescriptor.getCDCLogLocation());
+        AgentConfig config = AgentConfig.create(AgentConfig.Platform.PULSAR, agentArgs);
+
+        SegmentOffsetFileWriter segmentOffsetFileWriter = new SegmentOffsetFileWriter(config.cdcWorkingDir);
+        segmentOffsetFileWriter.loadOffsets();
+
+        PulsarMutationSender pulsarMutationSender = new PulsarMutationSender(config);
+        CommitLogTransfer commitLogTransfer = new BlackHoleCommitLogTransfer(config);
+        CommitLogReaderServiceImpl commitLogReaderService = new CommitLogReaderServiceImpl(config, pulsarMutationSender, segmentOffsetFileWriter, commitLogTransfer);
+        CommitLogProcessor commitLogProcessor = new CommitLogProcessor(DatabaseDescriptor.getCDCLogLocation(), config, commitLogTransfer, segmentOffsetFileWriter, commitLogReaderService, true);
+
+        commitLogReaderService.initialize();
+
+        // detect commitlogs file and submit new/modified files to the commitLogReader
+        ExecutorService commitLogExecutor = Executors.newSingleThreadExecutor();
+        commitLogExecutor.submit(() -> {
+            try {
+                do {
+                    // wait to initialize the hostID before starting
+                    Thread.sleep(1000);
+                } while(StorageService.instance.getLocalHostUUID() == null);
+
+                commitLogProcessor.initialize();
+                commitLogProcessor.start();
+            } catch(Exception e) {
+                log.error("commitLogProcessor error:", e);
+            }
+        });
+
+        ExecutorService commitLogServiceExecutor = Executors.newSingleThreadExecutor();
+        commitLogServiceExecutor.submit(commitLogReaderService);
+
+        log.info("CDC agent started");
+    }
+}
diff --git a/agent-c5/src/main/java/com/datastax/oss/cdc/agent/CdcMetrics.java b/agent-c5/src/main/java/com/datastax/oss/cdc/agent/CdcMetrics.java
new file mode 100644
index 00000000..9305ff2f
--- /dev/null
+++ b/agent-c5/src/main/java/com/datastax/oss/cdc/agent/CdcMetrics.java
@@ -0,0 +1,54 @@
+/**
+ * Copyright DataStax, Inc 2021.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.datastax.oss.cdc.agent;
+
+import com.codahale.metrics.Counter;
+import com.codahale.metrics.Gauge;
+import org.apache.cassandra.metrics.DefaultNameFactory;
+import org.apache.cassandra.metrics.MetricNameFactory;
+
+import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics;
+
+public class CdcMetrics {
+    public static final String CDC_AGENT_MBEAN_NAME = "CdcAgent";
+    private static final MetricNameFactory factory = new DefaultNameFactory(CDC_AGENT_MBEAN_NAME);
+
+    public static final Counter sentMutations = Metrics.counter(factory.createMetricName("SentMutations"));
+    public static final Counter sentErrors = Metrics.counter(factory.createMetricName("SentErrors"));
+
+    public static final Counter commitLogReadErrors = Metrics.counter(factory.createMetricName("CommitLogReadErrors"));
+    public static final Counter skippedMutations = Metrics.counter(factory.createMetricName("SkippedMutations"));
+
+    public static final Counter executedTasks = Metrics.counter(factory.createMetricName("ExecutedTasks"));
+
+    public static final Gauge<Integer> submittedTasksGauge = Metrics.register(factory.createMetricName("SubmittedTasks"),
+            CommitLogReaderService.submittedTasks::size);
+
+    public static final Gauge<Integer> maxSubmittedTasks = Metrics.register(factory.createMetricName("MaxSubmittedTasks"),
+            CommitLogReaderService.maxSubmittedTasks::get);
+
+    public static final Gauge<Integer> pendingTasksGauge = Metrics.register(factory.createMetricName("PendingTasks"),
+            CommitLogReaderService.pendingTasks::size);
+
+    public static final Gauge<Integer> maxPendingTasks = Metrics.register(factory.createMetricName("MaxPendingTasks"),
+            CommitLogReaderService.maxPendingTasks::get);
+
+    public static final Gauge<Integer> uncleanedTasksGauge = Metrics.register(factory.createMetricName("UncleanedTasks"),
+            CommitLogReaderService.pendingTasks::size);
+
+    public static final Gauge<Integer> maxUncleanedTasks = Metrics.register(factory.createMetricName("MaxUncleanedTasks"),
+            CommitLogReaderService.maxUncleanedTasks::get);
+}
diff --git a/agent-c5/src/main/java/com/datastax/oss/cdc/agent/CommitLogReadHandlerImpl.java b/agent-c5/src/main/java/com/datastax/oss/cdc/agent/CommitLogReadHandlerImpl.java
new file mode 100644
index 00000000..360b83db
--- /dev/null
+++ b/agent-c5/src/main/java/com/datastax/oss/cdc/agent/CommitLogReadHandlerImpl.java
@@ -0,0 +1,485 @@
+/**
+ * Copyright DataStax, Inc 2021.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.datastax.oss.cdc.agent;
+
+import com.datastax.oss.cdc.agent.exceptions.CassandraConnectorSchemaException;
+import com.datastax.oss.cdc.agent.exceptions.CassandraConnectorTaskException;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.cassandra.cql3.ColumnSpecification;
+import org.apache.cassandra.db.LivenessInfo;
+import org.apache.cassandra.db.commitlog.CommitLogDescriptor;
+import org.apache.cassandra.db.commitlog.CommitLogReadHandler;
+import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.db.marshal.ValueAccessor;
+import org.apache.cassandra.db.partitions.PartitionUpdate;
+import org.apache.cassandra.db.rows.Row;
+import org.apache.cassandra.db.rows.Unfiltered;
+import org.apache.cassandra.db.rows.UnfilteredRowIterator;
+import org.apache.cassandra.io.util.DataOutputBuffer;
+import org.apache.cassandra.schema.ColumnMetadata;
+import org.apache.cassandra.schema.TableMetadata;
+import org.apache.cassandra.service.StorageService;
+import org.apache.cassandra.utils.ByteBufferUtil;
+import org.apache.commons.codec.digest.DigestUtils;
+
+import java.nio.ByteBuffer;
+import java.util.*;
+import java.util.stream.Collectors;
+
+import static com.datastax.oss.cdc.agent.CommitLogReadHandlerImpl.RowType.DELETE;
+
+/**
+ * Handler that implements {@link CommitLogReadHandler} interface provided by Cassandra source code.
+ *
+ * This handler implementation processes each {@link org.apache.cassandra.db.Mutation} and invokes one of the registered partition handler
+ * for each {@link PartitionUpdate} in the {@link org.apache.cassandra.db.Mutation} (a mutation could have multiple partitions if it is a batch update),
+ * which in turn makes one or more record via the {@link AbstractMutationMaker}.
+ */
+@Slf4j
+public class CommitLogReadHandlerImpl implements CommitLogReadHandler {
+
+    private final AbstractMutationMaker<TableMetadata, Mutation> mutationMaker;
+    private final MutationSender<TableMetadata> mutationSender;
+    private final CommitLogReaderService.Task task;
+    private int processedPosition;
+
+    CommitLogReadHandlerImpl(MutationSender<TableMetadata> mutationSender,
+                             CommitLogReaderService.Task task,
+                             int currentPosition) {
+        this.mutationSender = mutationSender;
+        this.mutationMaker = new MutationMaker();
+        this.task = task;
+        this.processedPosition = currentPosition;
+    }
+
+    public int getProcessedPosition() {
+        return this.processedPosition;
+    }
+
+    /**
+     *  A PartitionType represents the type of a PartitionUpdate.
+     */
+    enum PartitionType {
+        /**
+         * a partition-level deletion where partition key = primary key (no clustering key)
+         */
+        PARTITION_KEY_ROW_DELETION,
+
+        /**
+         *  a partition-level deletion where partition key + clustering key = primary key
+         */
+        PARTITION_AND_CLUSTERING_KEY_ROW_DELETION,
+
+        /**
+         * a row-level modification
+         */
+        ROW_LEVEL_MODIFICATION,
+
+        /**
+         * an update on materialized view
+         */
+        MATERIALIZED_VIEW,
+
+        /**
+         * an update on secondary index
+         */
+        SECONDARY_INDEX,
+
+        /**
+         * an update on a table that contains counter data type
+         */
+        COUNTER,
+
+        /**
+         * a partition-level modification
+         */
+        PARTITION_LEVEL_MODIFICATION;
+
+        static final Set<PartitionType> supportedPartitionTypes = new HashSet<>(Arrays.asList(
+                PARTITION_KEY_ROW_DELETION,
+                PARTITION_AND_CLUSTERING_KEY_ROW_DELETION,
+                ROW_LEVEL_MODIFICATION,
+                PARTITION_LEVEL_MODIFICATION));
+
+        public static PartitionType getPartitionType(PartitionUpdate pu) {
+            if (pu.metadata().isCounter()) {
+                return COUNTER;
+            }
+            else if (pu.metadata().isView()) {
+                return MATERIALIZED_VIEW;
+            }
+            else if (pu.metadata().isIndex()) {
+                return SECONDARY_INDEX;
+            }
+            else if (isPartitionDeletion(pu) && hasClusteringKeys(pu)) {
+                return PARTITION_AND_CLUSTERING_KEY_ROW_DELETION;
+            }
+            else if (isPartitionDeletion(pu) && !hasClusteringKeys(pu)) {
+                return PARTITION_KEY_ROW_DELETION;
+            }
+            else if (!pu.unfilteredIterator().hasNext()) {
+                return PARTITION_LEVEL_MODIFICATION;
+            }
+            else {
+                return ROW_LEVEL_MODIFICATION;
+            }
+        }
+
+        public static boolean isValid(PartitionType type) {
+            return supportedPartitionTypes.contains(type);
+        }
+
+        public static boolean hasClusteringKeys(PartitionUpdate pu) {
+            return !pu.metadata().clusteringColumns().isEmpty();
+        }
+
+        public static boolean isPartitionDeletion(PartitionUpdate pu) {
+            return pu.partitionLevelDeletion().markedForDeleteAt() > LivenessInfo.NO_TIMESTAMP;
+        }
+    }
+
+    /**
+     *  A RowType represents different types of {@link Row}-level modifications in a Cassandra table.
+     */
+    enum RowType {
+        /**
+         * Single-row insert
+         */
+        INSERT,
+
+        /**
+         * Single-row update
+         */
+        UPDATE,
+
+        /**
+         * Single-row delete
+         */
+        DELETE,
+
+        /**
+         * A row-level deletion that deletes a range of keys.
+         * For example: DELETE * FROM table WHERE partition_key = 1 AND clustering_key > 0;
+         */
+        RANGE_TOMBSTONE,
+
+        /**
+         * Unknown row-level operation
+         */
+        UNKNOWN;
+
+        static final Set<RowType> supportedRowTypes = new HashSet<>(Arrays.asList(INSERT, UPDATE, DELETE));
+
+        public static RowType getRowType(Unfiltered unfiltered) {
+            if (unfiltered.isRangeTombstoneMarker()) {
+                return RANGE_TOMBSTONE;
+            }
+            else if (unfiltered.isRow()) {
+                Row row = (Row) unfiltered;
+                return getRowType(row);
+            }
+            return UNKNOWN;
+        }
+
+        public static RowType getRowType(Row row) {
+            if (isDelete(row)) {
+                return DELETE;
+            }
+            else if (isInsert(row)) {
+                return INSERT;
+            }
+            else if (isUpdate(row)) {
+                return UPDATE;
+            }
+            return UNKNOWN;
+        }
+
+        public static boolean isValid(RowType rowType) {
+            return supportedRowTypes.contains(rowType);
+        }
+
+        public static boolean isDelete(Row row) {
+            return row.deletion().time().markedForDeleteAt() > LivenessInfo.NO_TIMESTAMP;
+        }
+
+        public static boolean isInsert(Row row) {
+            return row.primaryKeyLivenessInfo().timestamp() > LivenessInfo.NO_TIMESTAMP;
+        }
+
+        public static boolean isUpdate(Row row) {
+            return row.primaryKeyLivenessInfo().timestamp() == LivenessInfo.NO_TIMESTAMP;
+        }
+    }
+
+    @Override
+    public void handleMutation(org.apache.cassandra.db.Mutation mutation, int size, int entryLocation, CommitLogDescriptor descriptor) {
+        if (!mutation.trackedByCDC()) {
+            return;
+        }
+
+        for (PartitionUpdate pu : mutation.getPartitionUpdates()) {
+            try {
+                DataOutputBuffer dataOutputBuffer = new DataOutputBuffer();
+                org.apache.cassandra.db.Mutation.serializer.serialize(mutation, dataOutputBuffer, descriptor.getMessagingVersion());
+                String md5Digest = DigestUtils.md5Hex(dataOutputBuffer.getData());
+                process(pu, descriptor.id, entryLocation, md5Digest);
+            }
+            catch (Exception e) {
+                throw new RuntimeException(String.format("Failed to process PartitionUpdate %s at %d:%d for table %s.%s.",
+                        pu.toString(), descriptor.id, entryLocation, pu.metadata().keyspace, pu.metadata().name), e);
+            }
+        }
+    }
+
+    @Override
+    public void handleUnrecoverableError(CommitLogReadException exception) {
+        log.error("Unrecoverable error when reading commit log", exception);
+        CdcMetrics.commitLogReadErrors.inc();
+    }
+
+    @Override
+    public boolean shouldSkipSegmentOnError(CommitLogReadException exception) {
+        if (exception.permissible) {
+            log.error("Encountered a permissible exception during log replay", exception);
+        }
+        else {
+            log.error("Encountered a non-permissible exception during log replay", exception);
+        }
+        return false;
+    }
+
+    /**
+     * Method which processes a partition update if it's valid (either a single-row partition-level
+     * deletion or a row-level modification) or throw an exception if it isn't. The valid partition
+     * update is then converted into a {@link AbstractMutation}.
+     */
+    private void process(PartitionUpdate pu, long segment, int position, String md5Digest) {
+        PartitionType partitionType = PartitionType.getPartitionType(pu);
+
+        if (!PartitionType.isValid(partitionType)) {
+            log.warn("Encountered an unsupported partition type {}, skipping...", partitionType);
+            return;
+        }
+
+        switch (partitionType) {
+            case PARTITION_AND_CLUSTERING_KEY_ROW_DELETION:
+            case PARTITION_KEY_ROW_DELETION: {
+                handlePartitionDeletion(pu, segment, position, md5Digest);
+            }
+            break;
+
+            case PARTITION_LEVEL_MODIFICATION: {
+                UnfilteredRowIterator it = pu.unfilteredIterator();
+                Row row = it.staticRow();
+                RowType rowType = RowType.getRowType(row);
+                handleRowModifications(row, rowType, pu, segment, position, md5Digest);
+            }
+            break;
+
+            case ROW_LEVEL_MODIFICATION: {
+                UnfilteredRowIterator it = pu.unfilteredIterator();
+                while (it.hasNext()) {
+                    Unfiltered rowOrRangeTombstone = it.next();
+                    RowType rowType = RowType.getRowType(rowOrRangeTombstone);
+                    if (!RowType.isValid(rowType)) {
+                        log.warn("Encountered an unsupported row type {}, skipping...", rowType);
+                        continue;
+                    }
+                    Row row = (Row) rowOrRangeTombstone;
+
+                    handleRowModifications(row, rowType, pu, segment, position, md5Digest);
+                }
+            }
+            break;
+
+            default:
+                throw new CassandraConnectorSchemaException("Unsupported partition type " + partitionType + " should have been skipped");
+        }
+    }
+
+    /**
+     * Handle a valid deletion event resulted from a partition-level deletion by converting Cassandra representation
+     * of this event into a {@link AbstractMutation} object and send it to pulsar. A valid deletion
+     * event means a partition only has a single row, this implies there are no clustering keys.
+     */
+    private void handlePartitionDeletion(PartitionUpdate pu, long segment, int position, String md5Digest) {
+        try {
+            Object[] after = new Object[pu.metadata().partitionKeyColumns().size() + pu.metadata().clusteringColumns().size()];
+            populatePartitionColumns(after, pu);
+            mutationMaker.delete(StorageService.instance.getLocalHostUUID(), segment, position,
+                    pu.maxTimestamp(), after, this::sendAsync, md5Digest, pu.metadata(), pu.partitionKey().getToken().getTokenValue());
+        }
+        catch (Exception e) {
+            log.error("Fail to send delete partition at {}:{}. Reason: {}", segment, position, e);
+        }
+    }
+
+    /**
+     * Handle a valid event resulted from a row-level modification by converting Cassandra representation of
+     * this event into a {@link AbstractMutation} object and sent it to pulsar. A valid event
+     * implies this must be an insert, update, or delete.
+     */
+    private void handleRowModifications(Row row, RowType rowType, PartitionUpdate pu,
+                                        long segment, int position, String md5Digest) {
+        Object[] after = new Object[pu.metadata().partitionKeyColumns().size() + pu.metadata().clusteringColumns().size()];
+        populatePartitionColumns(after, pu);
+        populateClusteringColumns(after, row, pu);
+
+        long ts = rowType == DELETE ? row.deletion().time().markedForDeleteAt() : pu.maxTimestamp();
+        switch (rowType) {
+            case INSERT:
+                mutationMaker.insert(StorageService.instance.getLocalHostUUID(), segment, position,
+                        ts, after, this::sendAsync, md5Digest, pu.metadata(), pu.partitionKey().getToken().getTokenValue());
+                break;
+
+            case UPDATE:
+                mutationMaker.update(StorageService.instance.getLocalHostUUID(), segment, position,
+                        ts, after, this::sendAsync, md5Digest, pu.metadata(), pu.partitionKey().getToken().getTokenValue());
+                break;
+
+            case DELETE:
+                mutationMaker.delete(StorageService.instance.getLocalHostUUID(), segment, position,
+                        ts, after, this::sendAsync, md5Digest, pu.metadata(), pu.partitionKey().getToken().getTokenValue());
+                break;
+
+            default:
+                throw new CassandraConnectorTaskException("Unsupported row type " + rowType + " should have been skipped");
+        }
+    }
+
+    private void populatePartitionColumns(Object[] after, PartitionUpdate pu) {
+        List<Object> partitionKeys = getPartitionKeys(pu);
+        int i = 0;
+        for (ColumnMetadata cd : pu.metadata().partitionKeyColumns()) {
+            try {
+                after[i++] = partitionKeys.get(cd.position());
+            }
+            catch (Exception e) {
+                throw new RuntimeException(String.format("Failed to populate Column %s with Type %s of Table %s in KeySpace %s.",
+                        cd.name.toString(), cd.type.toString(), cd.cfName, cd.ksName), e);
+            }
+        }
+    }
+
+    @SuppressWarnings({"unchecked","rawtypes"})
+    private void populateClusteringColumns(Object[] after, Row row, PartitionUpdate pu) {
+        int i = pu.metadata().partitionKeyColumns().size();
+        for (ColumnMetadata cd : pu.metadata().clusteringColumns().stream().limit(row.clustering().size()).collect(Collectors.toList())) {
+            try {
+                ValueAccessor valueAccessor = row.clustering().accessor();
+                after[i++] = cd.type.compose(valueAccessor.toBuffer(row.clustering().get(cd.position())));
+            }
+            catch (Exception e) {
+                throw new RuntimeException(String.format("Failed to populate Column %s with Type %s of Table %s in KeySpace %s.",
+                        cd.name.toString(), cd.type.toString(), cd.cfName, cd.ksName), e);
+            }
+        }
+    }
+
+    /**
+     * Given a PartitionUpdate, deserialize the partition key byte buffer
+     * into a list of partition key values.
+     */
+    @SuppressWarnings("checkstyle:magicnumber")
+    private static List<Object> getPartitionKeys(PartitionUpdate pu) {
+        List<Object> values = new ArrayList<>(pu.metadata().partitionKeyColumns().size());
+        List<ColumnMetadata> columnDefinitions = pu.metadata().partitionKeyColumns();
+
+        // simple partition key
+        if (columnDefinitions.size() == 1) {
+            ByteBuffer bb = pu.partitionKey().getKey();
+            ColumnSpecification cs = columnDefinitions.get(0);
+            AbstractType<?> type = cs.type;
+            try {
+                Object value = type.compose(bb);
+                values.add(value);
+            }
+            catch (Exception e) {
+                throw new RuntimeException(String.format("Failed to deserialize Column %s with Type %s in Table %s and KeySpace %s.",
+                        cs.name.toString(), cs.type.toString(), cs.cfName, cs.ksName), e);
+            }
+        }
+        else {
+            ByteBuffer keyBytes = pu.partitionKey().getKey().duplicate();
+
+            // 0xFFFF is reserved to encode "static column", skip if it exists at the start
+            if (keyBytes.remaining() >= 2) {
+                int header = ByteBufferUtil.getShortLength(keyBytes, keyBytes.position());
+                if ((header & 0xFFFF) == 0xFFFF) {
+                    ByteBufferUtil.readShortLength(keyBytes);
+                }
+            }
+
+            // the encoding of columns in the partition key byte buffer is
+            // <col><col><col>...
+            // where <col> is:
+            // <length of value><value><end-of-component byte>
+            // <length of value> is a 2 bytes unsigned short (excluding 0xFFFF used to encode "static columns")
+            // <end-of-component byte> should always be 0 for columns (1 for query bounds)
+            // this section reads the bytes for each column and deserialize into objects based on each column type
+            int i = 0;
+            while (keyBytes.remaining() > 0 && i < columnDefinitions.size()) {
+                ColumnSpecification cs = columnDefinitions.get(i);
+                AbstractType<?> type = cs.type;
+                ByteBuffer bb = ByteBufferUtil.readBytesWithShortLength(keyBytes);
+                try {
+                    Object value = type.compose(bb);
+                    values.add(value);
+                }
+                catch (Exception e) {
+                    throw new RuntimeException(String.format("Failed to deserialize Column %s with Type %s in Table %s and KeySpace %s",
+                            cs.name.toString(), cs.type.toString(), cs.cfName, cs.ksName), e);
+                }
+                byte b = keyBytes.get();
+                if (b != 0) {
+                    break;
+                }
+                ++i;
+            }
+        }
+
+        return values;
+    }
+
+    public void sendAsync(Mutation mutation) {
+        log.debug("Sending mutation={}", mutation);
+        try {
+            task.inflightMessagesSemaphore.acquireUninterruptibly(); // may block
+            this.mutationSender.sendMutationAsync(mutation)
+                    .handle((msgId, t)-> {
+                        if (t == null) {
+                            CdcMetrics.sentMutations.inc();
+                            log.debug("Sent mutation={}", mutation);
+                        } else {
+                            if (t instanceof CassandraConnectorSchemaException) {
+                                log.error("Invalid primary key schema:", t);
+                                CdcMetrics.skippedMutations.inc();
+                            } else {
+                                CdcMetrics.sentErrors.inc();
+                                log.debug("Sent failed mutation=" + mutation, t);
+                                task.lastException = t;
+                            }
+                        }
+                        task.inflightMessagesSemaphore.release();
+                        return msgId;
+                    });
+            this.processedPosition = Math.max(this.processedPosition, mutation.getPosition());
+        } catch(Exception e) {
+            log.error("Send failed:", e);
+            CdcMetrics.sentErrors.inc();
+        }
+    }
+}
diff --git a/agent-c5/src/main/java/com/datastax/oss/cdc/agent/CommitLogReaderServiceImpl.java b/agent-c5/src/main/java/com/datastax/oss/cdc/agent/CommitLogReaderServiceImpl.java
new file mode 100644
index 00000000..4389b013
--- /dev/null
+++ b/agent-c5/src/main/java/com/datastax/oss/cdc/agent/CommitLogReaderServiceImpl.java
@@ -0,0 +1,89 @@
+/**
+ * Copyright DataStax, Inc 2021.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.datastax.oss.cdc.agent;
+
+import lombok.extern.slf4j.Slf4j;
+import org.apache.cassandra.concurrent.ExecutorFactory;
+import org.apache.cassandra.concurrent.ExecutorPlus;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.db.commitlog.CommitLogPosition;
+import org.apache.cassandra.db.commitlog.CommitLogReader;
+import org.apache.cassandra.schema.TableMetadata;
+
+import java.io.File;
+import java.util.Optional;
+import java.util.concurrent.*;
+import java.util.function.IntBinaryOperator;
+
+/**
+ * Consume a queue of commitlog files to read mutations.
+ */
+@Slf4j
+public class CommitLogReaderServiceImpl extends CommitLogReaderService {
+
+    public CommitLogReaderServiceImpl(AgentConfig config,
+                                      MutationSender<TableMetadata> mutationSender,
+                                      SegmentOffsetWriter segmentOffsetWriter,
+                                      CommitLogTransfer commitLogTransfer) {
+        super(config, mutationSender, segmentOffsetWriter, commitLogTransfer);
+        int threads = config.cdcConcurrentProcessors == -1 ? DatabaseDescriptor.getFlushWriters() : config.cdcConcurrentProcessors;
+        this.tasksExecutor = ExecutorFactory.Global.executorFactory()
+                .configurePooled("CdcCommitlogProcessor", threads)
+                .withKeepAlive(1, TimeUnit.MINUTES)
+                .withQueueLimit(Integer.MAX_VALUE)
+                .build();
+    }
+
+    @SuppressWarnings("unchecked")
+    public Task createTask(String filename, long segment, int syncPosition, boolean completed) {
+        return new Task(filename, segment, syncPosition, completed) {
+
+            public void run() {
+                log.debug("Starting task={} lasSentPosition={}", this, segmentOffsetWriter.position(Optional.empty(), segment));
+                File file = getFile();
+                try {
+                    int lastSentPosition = -1;
+                    if (!file.exists()) {
+                        log.warn("CL file={} does not exist any more, ignoring", file.getName());
+                        finish(TaskStatus.SUCCESS, -1);
+                        return;
+                    }
+                    long seg = CommitLogUtil.extractTimestamp(file.getName());
+
+                    int currentPosition = segmentOffsetWriter.position(Optional.empty(), seg);
+                    if (syncPosition >= currentPosition) {
+                        CommitLogPosition minPosition = new CommitLogPosition(seg, currentPosition);
+                        CommitLogReadHandlerImpl commitLogReadHandler = new CommitLogReadHandlerImpl((MutationSender<TableMetadata>) mutationSender, this, currentPosition);
+                        CommitLogReader commitLogReader = new CommitLogReader();
+                        commitLogReader.readCommitLogSegment(commitLogReadHandler, new org.apache.cassandra.io.util.File(file), minPosition, org.apache.cassandra.db.commitlog.CommitLogReader.ALL_MUTATIONS, false);
+                        lastSentPosition = commitLogReadHandler.getProcessedPosition();
+                    }
+                    finish(TaskStatus.SUCCESS, lastSentPosition);
+                } catch (Exception e) {
+                    log.warn("Task failed {}", this, e);
+                    finish(TaskStatus.ERROR, -1);
+                } finally {
+                    CdcMetrics.executedTasks.inc();
+                }
+            }
+
+            @Override
+            public File getFile() {
+                return new File(DatabaseDescriptor.getCDCLogLocation(), filename);
+            }
+        };
+    }
+}
diff --git a/agent-c5/src/main/java/com/datastax/oss/cdc/agent/Mutation.java b/agent-c5/src/main/java/com/datastax/oss/cdc/agent/Mutation.java
new file mode 100644
index 00000000..41855db4
--- /dev/null
+++ b/agent-c5/src/main/java/com/datastax/oss/cdc/agent/Mutation.java
@@ -0,0 +1,68 @@
+/**
+ * Copyright DataStax, Inc 2021.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.datastax.oss.cdc.agent;
+
+import org.apache.cassandra.schema.ColumnMetadata;
+import org.apache.cassandra.schema.TableMetadata;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+
+public class Mutation extends AbstractMutation<TableMetadata> {
+
+    public Mutation(UUID nodeId, Long segment, int position, Object[] pkValues, long tsMicro, String md5Digest, TableMetadata t, Object token) {
+        super(nodeId, segment, position, pkValues, tsMicro, md5Digest, t, token);
+    }
+
+    @Override
+    public String key() {
+        return metadata.keyspace + "." + metadata.name;
+    }
+
+    @Override
+    public String name() {
+        return metadata.name;
+    }
+
+    @Override
+    public String keyspace() {
+        return metadata.keyspace;
+    }
+
+    @Override
+    public List<ColumnInfo> primaryKeyColumns() {
+        List<ColumnInfo> columnInfos = new ArrayList<>();
+        for(ColumnMetadata cm :metadata.primaryKeyColumns())
+            columnInfos.add(new ColumnInfo() {
+                @Override
+                public String name() {
+                    return cm.name.toString();
+                }
+
+                @Override
+                public String cql3Type() {
+                    return cm.type.asCQL3Type().toString();
+                }
+
+                @Override
+                public boolean isClusteringKey() {
+                    return cm.isClusteringColumn();
+                }
+            });
+        return columnInfos;
+    }
+}
diff --git a/agent-c5/src/main/java/com/datastax/oss/cdc/agent/MutationMaker.java b/agent-c5/src/main/java/com/datastax/oss/cdc/agent/MutationMaker.java
new file mode 100644
index 00000000..bfb90009
--- /dev/null
+++ b/agent-c5/src/main/java/com/datastax/oss/cdc/agent/MutationMaker.java
@@ -0,0 +1,39 @@
+/**
+ * Copyright DataStax, Inc 2021.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.datastax.oss.cdc.agent;
+
+import com.datastax.oss.cdc.agent.exceptions.CassandraConnectorTaskException;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.cassandra.schema.TableMetadata;
+
+import java.util.UUID;
+
+@Slf4j
+public class MutationMaker extends AbstractMutationMaker<TableMetadata, Mutation> {
+
+    public void createRecord(UUID nodeId, long segment, int position,
+                              long tsMicro, Object[] pkValues, BlockingConsumer<Mutation> consumer,
+                              String md5Digest, TableMetadata t, Object token) {
+        Mutation record = new Mutation(nodeId, segment, position, pkValues, tsMicro, md5Digest, t, token);
+        try {
+            consumer.accept(record);
+        }
+        catch (InterruptedException e) {
+            log.error("Interruption while enqueuing Change Event {}", record);
+            throw new CassandraConnectorTaskException("Enqueuing has been interrupted: ", e);
+        }
+    }
+}
diff --git a/agent-c5/src/main/java/com/datastax/oss/cdc/agent/PulsarMutationSender.java b/agent-c5/src/main/java/com/datastax/oss/cdc/agent/PulsarMutationSender.java
new file mode 100644
index 00000000..6195ab7c
--- /dev/null
+++ b/agent-c5/src/main/java/com/datastax/oss/cdc/agent/PulsarMutationSender.java
@@ -0,0 +1,161 @@
+/**
+ * Copyright DataStax, Inc 2021.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.datastax.oss.cdc.agent;
+
+import com.datastax.oss.cdc.CqlLogicalTypes;
+import com.google.common.collect.ImmutableMap;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.cql3.ColumnIdentifier;
+import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.db.marshal.AsciiType;
+import org.apache.cassandra.db.marshal.BooleanType;
+import org.apache.cassandra.db.marshal.ByteType;
+import org.apache.cassandra.db.marshal.BytesType;
+import org.apache.cassandra.db.marshal.DecimalType;
+import org.apache.cassandra.db.marshal.DoubleType;
+import org.apache.cassandra.db.marshal.DurationType;
+import org.apache.cassandra.db.marshal.FloatType;
+import org.apache.cassandra.db.marshal.InetAddressType;
+import org.apache.cassandra.db.marshal.Int32Type;
+import org.apache.cassandra.db.marshal.IntegerType;
+import org.apache.cassandra.db.marshal.LongType;
+import org.apache.cassandra.db.marshal.ReversedType;
+import org.apache.cassandra.db.marshal.ShortType;
+import org.apache.cassandra.db.marshal.SimpleDateType;
+import org.apache.cassandra.db.marshal.TimeType;
+import org.apache.cassandra.db.marshal.TimeUUIDType;
+import org.apache.cassandra.db.marshal.TimestampType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.db.marshal.UUIDType;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.schema.ColumnMetadata;
+import org.apache.cassandra.schema.TableMetadata;
+import org.apache.cassandra.service.StorageService;
+
+import java.net.InetAddress;
+import java.time.Duration;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.util.Date;
+import java.util.UUID;
+
+@Slf4j
+public class PulsarMutationSender extends AbstractPulsarMutationSender<TableMetadata> {
+
+    private static final ImmutableMap<String, org.apache.avro.Schema> avroSchemaTypes = ImmutableMap.<String, org.apache.avro.Schema>builder()
+            .put(UTF8Type.instance.asCQL3Type().toString(), org.apache.avro.Schema.create(org.apache.avro.Schema.Type.STRING))
+            .put(AsciiType.instance.asCQL3Type().toString(), org.apache.avro.Schema.create(org.apache.avro.Schema.Type.STRING))
+            .put(BooleanType.instance.asCQL3Type().toString(), org.apache.avro.Schema.create(org.apache.avro.Schema.Type.BOOLEAN))
+            .put(BytesType.instance.asCQL3Type().toString(), org.apache.avro.Schema.create(org.apache.avro.Schema.Type.BYTES))
+            .put(ByteType.instance.asCQL3Type().toString(), org.apache.avro.Schema.create(org.apache.avro.Schema.Type.INT))   // INT8 not supported by AVRO
+            .put(ShortType.instance.asCQL3Type().toString(), org.apache.avro.Schema.create(org.apache.avro.Schema.Type.INT))  // INT16 not supported by AVRO
+            .put(Int32Type.instance.asCQL3Type().toString(), org.apache.avro.Schema.create(org.apache.avro.Schema.Type.INT))
+            .put(IntegerType.instance.asCQL3Type().toString(), CqlLogicalTypes.varintType)
+            .put(LongType.instance.asCQL3Type().toString(), org.apache.avro.Schema.create(org.apache.avro.Schema.Type.LONG))
+            .put(FloatType.instance.asCQL3Type().toString(), org.apache.avro.Schema.create(org.apache.avro.Schema.Type.FLOAT))
+            .put(DoubleType.instance.asCQL3Type().toString(), org.apache.avro.Schema.create(org.apache.avro.Schema.Type.DOUBLE))
+            .put(DecimalType.instance.asCQL3Type().toString(), CqlLogicalTypes.decimalType)
+            .put(InetAddressType.instance.asCQL3Type().toString(), org.apache.avro.Schema.create(org.apache.avro.Schema.Type.STRING))
+            .put(TimestampType.instance.asCQL3Type().toString(), CqlLogicalTypes.timestampMillisType)
+            .put(SimpleDateType.instance.asCQL3Type().toString(), CqlLogicalTypes.dateType)
+            .put(TimeType.instance.asCQL3Type().toString(), CqlLogicalTypes.timeMicrosType)
+            .put(DurationType.instance.asCQL3Type().toString(), CqlLogicalTypes.durationType)
+            .put(UUIDType.instance.asCQL3Type().toString(), CqlLogicalTypes.uuidType)
+            .put(TimeUUIDType.instance.asCQL3Type().toString(), CqlLogicalTypes.uuidType)
+            .build();
+
+    public PulsarMutationSender(AgentConfig config) {
+        super(config, DatabaseDescriptor.getPartitionerName().equals(Murmur3Partitioner.class.getName()));
+    }
+
+    public PulsarMutationSender(AgentConfig config, boolean useMurmur3Partitioner) {
+        super(config, useMurmur3Partitioner);
+    }
+
+    @Override
+    public void incSkippedMutations() {
+        CdcMetrics.skippedMutations.inc();
+    }
+
+    @Override
+    public UUID getHostId() {
+        return StorageService.instance.getLocalHostUUID();
+    }
+
+    @Override
+    public org.apache.avro.Schema getNativeSchema(String cql3Type) {
+        return avroSchemaTypes.get(cql3Type);
+    }
+
+    /**
+     * Check the primary key has supported columns.
+     * @param mutation
+     * @return false if the primary key has unsupported CQL columns
+     */
+    @Override
+    public boolean isSupported(final AbstractMutation<TableMetadata> mutation) {
+        if (!pkSchemas.containsKey(mutation.key())) {
+            for (ColumnMetadata cm : mutation.metadata.primaryKeyColumns()) {
+                if (!avroSchemaTypes.containsKey(cm.type.asCQL3Type().toString())) {
+                    log.warn("Unsupported primary key column={}.{}.{} type={}, skipping mutation", cm.ksName, cm.cfName, cm.name, cm.type.asCQL3Type().toString());
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    @Override
+    public Object cqlToAvro(TableMetadata tableMetadata, String columnName, Object value) {
+        ColumnMetadata columnMetadata = tableMetadata.getColumn(ColumnIdentifier.getInterned(columnName, false));
+        AbstractType<?> type = columnMetadata.type.isReversed() ? ((ReversedType) columnMetadata.type).baseType : columnMetadata.type;
+        log.trace("column name={} type={} class={} value={}",
+                columnMetadata.name, type.getClass().getName(),
+                value != null ? value.getClass().getName() : null, value);
+
+        if (value == null)
+            return null;
+
+        if (type instanceof TimestampType) {
+            if (value instanceof Date)
+                return ((Date) value).getTime();
+            if (value instanceof Instant)
+                return ((Instant) value).toEpochMilli();
+        }
+        if (type instanceof SimpleDateType && value instanceof Integer) {
+            long timeInMillis = Duration.ofDays((Integer) value + Integer.MIN_VALUE).toMillis();
+            Instant instant = Instant.ofEpochMilli(timeInMillis);
+            LocalDate localDate = LocalDateTime.ofInstant(instant, ZoneOffset.UTC).toLocalDate();
+            return (int) localDate.toEpochDay(); // Avro date is an int that stores the number of days from the unix epoch
+        }
+        if (type instanceof TimeType && value instanceof Long) {
+            return ((Long) value / 1000); // Avro time is in microseconds
+        }
+        if (type instanceof InetAddressType) {
+            return ((InetAddress) value).getHostAddress();
+        }
+        if (type instanceof ByteType) {
+            return Byte.toUnsignedInt((byte) value); // AVRO does not support INT8
+        }
+        if (type instanceof ShortType) {
+            return Short.toUnsignedInt((short) value); // AVRO does not support INT16
+        }
+        return value;
+    }
+}
diff --git a/agent-c5/src/test/java/com/datastax/oss/cdc/agent/PulsarDualNodeC5Tests.java b/agent-c5/src/test/java/com/datastax/oss/cdc/agent/PulsarDualNodeC5Tests.java
new file mode 100644
index 00000000..6a9e5e10
--- /dev/null
+++ b/agent-c5/src/test/java/com/datastax/oss/cdc/agent/PulsarDualNodeC5Tests.java
@@ -0,0 +1,37 @@
+/**
+ * Copyright DataStax, Inc 2021.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.datastax.oss.cdc.agent;
+
+import com.datastax.oss.cdc.AgentTestUtil;
+import com.datastax.oss.cdc.PulsarDualNodeTests;
+import com.datastax.testcontainers.cassandra.CassandraContainer;
+import lombok.extern.slf4j.Slf4j;
+import org.testcontainers.containers.Network;
+
+@Slf4j
+public class PulsarDualNodeC5Tests extends PulsarDualNodeTests {
+
+    public PulsarDualNodeC5Tests() {
+        super(AgentTestUtil.Version.C5);
+    }
+
+    @Override
+    public CassandraContainer<?> createCassandraContainer(int nodeIndex, String pulsarServiceUrl, Network testNetwork) {
+        return CassandraContainer.createCassandraContainerWithAgent(
+                PulsarSingleNodeC5Tests.CASSANDRA_IMAGE, testNetwork, nodeIndex, "c5", pulsarServiceUrl);
+    }
+
+}
diff --git a/agent-c5/src/test/java/com/datastax/oss/cdc/agent/PulsarSingleNodeC5Tests.java b/agent-c5/src/test/java/com/datastax/oss/cdc/agent/PulsarSingleNodeC5Tests.java
new file mode 100644
index 00000000..6d52ffba
--- /dev/null
+++ b/agent-c5/src/test/java/com/datastax/oss/cdc/agent/PulsarSingleNodeC5Tests.java
@@ -0,0 +1,50 @@
+/**
+ * Copyright DataStax, Inc 2021.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.datastax.oss.cdc.agent;
+
+import com.datastax.oss.cdc.AgentTestUtil;
+import com.datastax.oss.cdc.PulsarSingleNodeTests;
+import com.datastax.testcontainers.cassandra.CassandraContainer;
+import lombok.extern.slf4j.Slf4j;
+import org.testcontainers.containers.Network;
+import org.testcontainers.utility.DockerImageName;
+
+import java.util.Optional;
+
+@Slf4j
+public class PulsarSingleNodeC5Tests extends PulsarSingleNodeTests {
+
+    public static final DockerImageName CASSANDRA_IMAGE = DockerImageName.parse(
+            Optional.ofNullable(System.getenv("CASSANDRA_IMAGE"))
+                    .orElse("cassandra:" + System.getProperty("cassandraVersion"))
+    ).asCompatibleSubstituteFor("cassandra");
+
+    public PulsarSingleNodeC5Tests() {
+        super(AgentTestUtil.Version.C5);
+    }
+
+    @Override
+    public CassandraContainer<?> createCassandraContainer(int nodeIndex, String pulsarServiceUrl, Network testNetwork) {
+        return CassandraContainer.createCassandraContainerWithAgent(
+                CASSANDRA_IMAGE, testNetwork, nodeIndex, "c5", pulsarServiceUrl);
+    }
+
+    @Override
+    public int getSegmentSize() {
+        return 1024 * 1024;
+    }
+
+}
diff --git a/agent-c5/src/test/resources/cassandra/cassandra.yaml b/agent-c5/src/test/resources/cassandra/cassandra.yaml
new file mode 100644
index 00000000..bba869a5
--- /dev/null
+++ b/agent-c5/src/test/resources/cassandra/cassandra.yaml
@@ -0,0 +1,1430 @@
+# Cassandra storage config YAML
+
+# NOTE:
+#   See https://cassandra.apache.org/doc/latest/configuration/ for
+#   full explanations of configuration directives
+# /NOTE
+
+# The name of the cluster. This is mainly used to prevent machines in
+# one logical cluster from joining another.
+cluster_name: 'Test Cluster'
+
+# This defines the number of tokens randomly assigned to this node on the ring
+# The more tokens, relative to other nodes, the larger the proportion of data
+# that this node will store. You probably want all nodes to have the same number
+# of tokens assuming they have equal hardware capability.
+#
+# If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility,
+# and will use the initial_token as described below.
+#
+# Specifying initial_token will override this setting on the node's initial start,
+# on subsequent starts, this setting will apply even if initial token is set.
+#
+# See https://cassandra.apache.org/doc/latest/getting_started/production.html#tokens for
+# best practice information about num_tokens.
+#
+num_tokens: 16
+
+# Triggers automatic allocation of num_tokens tokens for this node. The allocation
+# algorithm attempts to choose tokens in a way that optimizes replicated load over
+# the nodes in the datacenter for the replica factor.
+#
+# The load assigned to each node will be close to proportional to its number of
+# vnodes.
+#
+# Only supported with the Murmur3Partitioner.
+
+# Replica factor is determined via the replication strategy used by the specified
+# keyspace.
+# allocate_tokens_for_keyspace: KEYSPACE
+
+# Replica factor is explicitly set, regardless of keyspace or datacenter.
+# This is the replica factor within the datacenter, like NTS.
+allocate_tokens_for_local_replication_factor: 3
+
+# initial_token allows you to specify tokens manually.  While you can use it with
+# vnodes (num_tokens > 1, above) -- in which case you should provide a
+# comma-separated list -- it's primarily used when adding nodes to legacy clusters
+# that do not have vnodes enabled.
+# initial_token:
+
+# May either be "true" or "false" to enable globally
+hinted_handoff_enabled: true
+
+# When hinted_handoff_enabled is true, a black list of data centers that will not
+# perform hinted handoff
+# hinted_handoff_disabled_datacenters:
+#    - DC1
+#    - DC2
+
+# this defines the maximum amount of time a dead host will have hints
+# generated.  After it has been dead this long, new hints for it will not be
+# created until it has been seen alive and gone down again.
+max_hint_window_in_ms: 10800000 # 3 hours
+
+# Maximum throttle in KBs per second, per delivery thread.  This will be
+# reduced proportionally to the number of nodes in the cluster.  (If there
+# are two nodes in the cluster, each delivery thread will use the maximum
+# rate; if there are three, each will throttle to half of the maximum,
+# since we expect two nodes to be delivering hints simultaneously.)
+hinted_handoff_throttle_in_kb: 1024
+
+# Number of threads with which to deliver hints;
+# Consider increasing this number when you have multi-dc deployments, since
+# cross-dc handoff tends to be slower
+max_hints_delivery_threads: 2
+
+# Directory where Cassandra should store hints.
+# If not set, the default directory is $CASSANDRA_HOME/data/hints.
+# hints_directory: /var/lib/cassandra/hints
+
+# How often hints should be flushed from the internal buffers to disk.
+# Will *not* trigger fsync.
+hints_flush_period_in_ms: 10000
+
+# Maximum size for a single hints file, in megabytes.
+max_hints_file_size_in_mb: 128
+
+# Compression to apply to the hint files. If omitted, hints files
+# will be written uncompressed. LZ4, Snappy, and Deflate compressors
+# are supported.
+#hints_compression:
+#   - class_name: LZ4Compressor
+#     parameters:
+#         -
+
+# Maximum throttle in KBs per second, total. This will be
+# reduced proportionally to the number of nodes in the cluster.
+batchlog_replay_throttle_in_kb: 1024
+
+# Authentication backend, implementing IAuthenticator; used to identify users
+# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator,
+# PasswordAuthenticator}.
+#
+# - AllowAllAuthenticator performs no checks - set it to disable authentication.
+# - PasswordAuthenticator relies on username/password pairs to authenticate
+#   users. It keeps usernames and hashed passwords in system_auth.roles table.
+#   Please increase system_auth keyspace replication factor if you use this authenticator.
+#   If using PasswordAuthenticator, CassandraRoleManager must also be used (see below)
+authenticator: AllowAllAuthenticator
+
+# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
+# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer,
+# CassandraAuthorizer}.
+#
+# - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
+# - CassandraAuthorizer stores permissions in system_auth.role_permissions table. Please
+#   increase system_auth keyspace replication factor if you use this authorizer.
+authorizer: AllowAllAuthorizer
+
+# Part of the Authentication & Authorization backend, implementing IRoleManager; used
+# to maintain grants and memberships between roles.
+# Out of the box, Cassandra provides org.apache.cassandra.auth.CassandraRoleManager,
+# which stores role information in the system_auth keyspace. Most functions of the
+# IRoleManager require an authenticated login, so unless the configured IAuthenticator
+# actually implements authentication, most of this functionality will be unavailable.
+#
+# - CassandraRoleManager stores role data in the system_auth keyspace. Please
+#   increase system_auth keyspace replication factor if you use this role manager.
+role_manager: CassandraRoleManager
+
+# Network authorization backend, implementing INetworkAuthorizer; used to restrict user
+# access to certain DCs
+# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllNetworkAuthorizer,
+# CassandraNetworkAuthorizer}.
+#
+# - AllowAllNetworkAuthorizer allows access to any DC to any user - set it to disable authorization.
+# - CassandraNetworkAuthorizer stores permissions in system_auth.network_permissions table. Please
+#   increase system_auth keyspace replication factor if you use this authorizer.
+network_authorizer: AllowAllNetworkAuthorizer
+
+# Validity period for roles cache (fetching granted roles can be an expensive
+# operation depending on the role manager, CassandraRoleManager is one example)
+# Granted roles are cached for authenticated sessions in AuthenticatedUser and
+# after the period specified here, become eligible for (async) reload.
+# Defaults to 2000, set to 0 to disable caching entirely.
+# Will be disabled automatically for AllowAllAuthenticator.
+roles_validity_in_ms: 2000
+
+# Refresh interval for roles cache (if enabled).
+# After this interval, cache entries become eligible for refresh. Upon next
+# access, an async reload is scheduled and the old value returned until it
+# completes. If roles_validity_in_ms is non-zero, then this must be
+# also.
+# Defaults to the same value as roles_validity_in_ms.
+# roles_update_interval_in_ms: 2000
+
+# Validity period for permissions cache (fetching permissions can be an
+# expensive operation depending on the authorizer, CassandraAuthorizer is
+# one example). Defaults to 2000, set to 0 to disable.
+# Will be disabled automatically for AllowAllAuthorizer.
+permissions_validity_in_ms: 2000
+
+# Refresh interval for permissions cache (if enabled).
+# After this interval, cache entries become eligible for refresh. Upon next
+# access, an async reload is scheduled and the old value returned until it
+# completes. If permissions_validity_in_ms is non-zero, then this must be
+# also.
+# Defaults to the same value as permissions_validity_in_ms.
+# permissions_update_interval_in_ms: 2000
+
+# Validity period for credentials cache. This cache is tightly coupled to
+# the provided PasswordAuthenticator implementation of IAuthenticator. If
+# another IAuthenticator implementation is configured, this cache will not
+# be automatically used and so the following settings will have no effect.
+# Please note, credentials are cached in their encrypted form, so while
+# activating this cache may reduce the number of queries made to the
+# underlying table, it may not  bring a significant reduction in the
+# latency of individual authentication attempts.
+# Defaults to 2000, set to 0 to disable credentials caching.
+credentials_validity_in_ms: 2000
+
+# Refresh interval for credentials cache (if enabled).
+# After this interval, cache entries become eligible for refresh. Upon next
+# access, an async reload is scheduled and the old value returned until it
+# completes. If credentials_validity_in_ms is non-zero, then this must be
+# also.
+# Defaults to the same value as credentials_validity_in_ms.
+# credentials_update_interval_in_ms: 2000
+
+# The partitioner is responsible for distributing groups of rows (by
+# partition key) across nodes in the cluster. The partitioner can NOT be
+# changed without reloading all data.  If you are adding nodes or upgrading,
+# you should set this to the same partitioner that you are currently using.
+#
+# The default partitioner is the Murmur3Partitioner. Older partitioners
+# such as the RandomPartitioner, ByteOrderedPartitioner, and
+# OrderPreservingPartitioner have been included for backward compatibility only.
+# For new clusters, you should NOT change this value.
+#
+partitioner: org.apache.cassandra.dht.Murmur3Partitioner
+
+# Directories where Cassandra should store data on disk. If multiple
+# directories are specified, Cassandra will spread data evenly across
+# them by partitioning the token ranges.
+# If not set, the default directory is $CASSANDRA_HOME/data/data.
+# data_file_directories:
+#     - /var/lib/cassandra/data
+
+# commit log.  when running on magnetic HDD, this should be a
+# separate spindle than the data directories.
+# If not set, the default directory is $CASSANDRA_HOME/data/commitlog.
+# commitlog_directory: /var/lib/cassandra/commitlog
+
+# Enable / disable CDC functionality on a per-node basis. This modifies the logic used
+# for write path allocation rejection (standard: never reject. cdc: reject Mutation
+# containing a CDC-enabled table if at space limit in cdc_raw_directory).
+cdc_enabled: true
+
+# CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the
+# segment contains mutations for a CDC-enabled table. This should be placed on a
+# separate spindle than the data directories. If not set, the default directory is
+# $CASSANDRA_HOME/data/cdc_raw.
+cdc_raw_directory: /var/lib/cassandra/cdc_raw
+
+# Policy for data disk failures:
+#
+# die
+#   shut down gossip and client transports and kill the JVM for any fs errors or
+#   single-sstable errors, so the node can be replaced.
+#
+# stop_paranoid
+#   shut down gossip and client transports even for single-sstable errors,
+#   kill the JVM for errors during startup.
+#
+# stop
+#   shut down gossip and client transports, leaving the node effectively dead, but
+#   can still be inspected via JMX, kill the JVM for errors during startup.
+#
+# best_effort
+#    stop using the failed disk and respond to requests based on
+#    remaining available sstables.  This means you WILL see obsolete
+#    data at CL.ONE!
+#
+# ignore
+#    ignore fatal errors and let requests fail, as in pre-1.2 Cassandra
+disk_failure_policy: stop
+
+# Policy for commit disk failures:
+#
+# die
+#   shut down the node and kill the JVM, so the node can be replaced.
+#
+# stop
+#   shut down the node, leaving the node effectively dead, but
+#   can still be inspected via JMX.
+#
+# stop_commit
+#   shutdown the commit log, letting writes collect but
+#   continuing to service reads, as in pre-2.0.5 Cassandra
+#
+# ignore
+#   ignore fatal errors and let the batches fail
+commit_failure_policy: stop
+
+# Maximum size of the native protocol prepared statement cache
+#
+# Valid values are either "auto" (omitting the value) or a value greater 0.
+#
+# Note that specifying a too large value will result in long running GCs and possbily
+# out-of-memory errors. Keep the value at a small fraction of the heap.
+#
+# If you constantly see "prepared statements discarded in the last minute because
+# cache limit reached" messages, the first step is to investigate the root cause
+# of these messages and check whether prepared statements are used correctly -
+# i.e. use bind markers for variable parts.
+#
+# Do only change the default value, if you really have more prepared statements than
+# fit in the cache. In most cases it is not neccessary to change this value.
+# Constantly re-preparing statements is a performance penalty.
+#
+# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater
+prepared_statements_cache_size_mb:
+
+# Maximum size of the key cache in memory.
+#
+# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the
+# minimum, sometimes more. The key cache is fairly tiny for the amount of
+# time it saves, so it's worthwhile to use it at large numbers.
+# The row cache saves even more time, but must contain the entire row,
+# so it is extremely space-intensive. It's best to only use the
+# row cache if you have hot rows or static rows.
+#
+# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
+#
+# Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache.
+key_cache_size_in_mb:
+
+# Duration in seconds after which Cassandra should
+# save the key cache. Caches are saved to saved_caches_directory as
+# specified in this configuration file.
+#
+# Saved caches greatly improve cold-start speeds, and is relatively cheap in
+# terms of I/O for the key cache. Row cache saving is much more expensive and
+# has limited use.
+#
+# Default is 14400 or 4 hours.
+key_cache_save_period: 14400
+
+# Number of keys from the key cache to save
+# Disabled by default, meaning all keys are going to be saved
+# key_cache_keys_to_save: 100
+
+# Row cache implementation class name. Available implementations:
+#
+# org.apache.cassandra.cache.OHCProvider
+#   Fully off-heap row cache implementation (default).
+#
+# org.apache.cassandra.cache.SerializingCacheProvider
+#   This is the row cache implementation availabile
+#   in previous releases of Cassandra.
+# row_cache_class_name: org.apache.cassandra.cache.OHCProvider
+
+# Maximum size of the row cache in memory.
+# Please note that OHC cache implementation requires some additional off-heap memory to manage
+# the map structures and some in-flight memory during operations before/after cache entries can be
+# accounted against the cache capacity. This overhead is usually small compared to the whole capacity.
+# Do not specify more memory that the system can afford in the worst usual situation and leave some
+# headroom for OS block level cache. Do never allow your system to swap.
+#
+# Default value is 0, to disable row caching.
+row_cache_size_in_mb: 0
+
+# Duration in seconds after which Cassandra should save the row cache.
+# Caches are saved to saved_caches_directory as specified in this configuration file.
+#
+# Saved caches greatly improve cold-start speeds, and is relatively cheap in
+# terms of I/O for the key cache. Row cache saving is much more expensive and
+# has limited use.
+#
+# Default is 0 to disable saving the row cache.
+row_cache_save_period: 0
+
+# Number of keys from the row cache to save.
+# Specify 0 (which is the default), meaning all keys are going to be saved
+# row_cache_keys_to_save: 100
+
+# Maximum size of the counter cache in memory.
+#
+# Counter cache helps to reduce counter locks' contention for hot counter cells.
+# In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before
+# write entirely. With RF > 1 a counter cache hit will still help to reduce the duration
+# of the lock hold, helping with hot counter cell updates, but will not allow skipping
+# the read entirely. Only the local (clock, count) tuple of a counter cell is kept
+# in memory, not the whole counter, so it's relatively cheap.
+#
+# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
+#
+# Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache.
+# NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache.
+counter_cache_size_in_mb:
+
+# Duration in seconds after which Cassandra should
+# save the counter cache (keys only). Caches are saved to saved_caches_directory as
+# specified in this configuration file.
+#
+# Default is 7200 or 2 hours.
+counter_cache_save_period: 7200
+
+# Number of keys from the counter cache to save
+# Disabled by default, meaning all keys are going to be saved
+# counter_cache_keys_to_save: 100
+
+# saved caches
+# If not set, the default directory is $CASSANDRA_HOME/data/saved_caches.
+# saved_caches_directory: /var/lib/cassandra/saved_caches
+
+# commitlog_sync may be either "periodic", "group", or "batch."
+#
+# When in batch mode, Cassandra won't ack writes until the commit log
+# has been flushed to disk.  Each incoming write will trigger the flush task.
+# commitlog_sync_batch_window_in_ms is a deprecated value. Previously it had
+# almost no value, and is being removed.
+#
+# commitlog_sync_batch_window_in_ms: 2
+#
+# group mode is similar to batch mode, where Cassandra will not ack writes
+# until the commit log has been flushed to disk. The difference is group
+# mode will wait up to commitlog_sync_group_window_in_ms between flushes.
+#
+# commitlog_sync_group_window_in_ms: 1000
+#
+# the default option is "periodic" where writes may be acked immediately
+# and the CommitLog is simply synced every commitlog_sync_period_in_ms
+# milliseconds.
+commitlog_sync: periodic
+commitlog_sync_period_in_ms: 10000
+
+# When in periodic commitlog mode, the number of milliseconds to block writes
+# while waiting for a slow disk flush to complete.
+# periodic_commitlog_sync_lag_block_in_ms:
+
+# The size of the individual commitlog file segments.  A commitlog
+# segment may be archived, deleted, or recycled once all the data
+# in it (potentially from each columnfamily in the system) has been
+# flushed to sstables.
+#
+# The default size is 32, which is almost always fine, but if you are
+# archiving commitlog segments (see commitlog_archiving.properties),
+# then you probably want a finer granularity of archiving; 8 or 16 MB
+# is reasonable.
+# Max mutation size is also configurable via max_mutation_size_in_kb setting in
+# cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024.
+# This should be positive and less than 2048.
+#
+# NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must
+# be set to at least twice the size of max_mutation_size_in_kb / 1024
+#
+commitlog_segment_size_in_mb: 1
+
+# Compression to apply to the commit log. If omitted, the commit log
+# will be written uncompressed.  LZ4, Snappy, and Deflate compressors
+# are supported.
+# commitlog_compression:
+#   - class_name: LZ4Compressor
+#     parameters:
+#         -
+
+# Compression to apply to SSTables as they flush for compressed tables.
+# Note that tables without compression enabled do not respect this flag.
+#
+# As high ratio compressors like LZ4HC, Zstd, and Deflate can potentially
+# block flushes for too long, the default is to flush with a known fast
+# compressor in those cases. Options are:
+#
+# none : Flush without compressing blocks but while still doing checksums.
+# fast : Flush with a fast compressor. If the table is already using a
+#        fast compressor that compressor is used.
+# table: Always flush with the same compressor that the table uses. This
+#        was the pre 4.0 behavior.
+#
+# flush_compression: fast
+
+# any class that implements the SeedProvider interface and has a
+# constructor that takes a Map<String, String> of parameters will do.
+seed_provider:
+    # Addresses of hosts that are deemed contact points.
+    # Cassandra nodes use this list of hosts to find each other and learn
+    # the topology of the ring.  You must change this if you are running
+    # multiple nodes!
+    - class_name: org.apache.cassandra.locator.SimpleSeedProvider
+      parameters:
+          # seeds is actually a comma-delimited list of addresses.
+          # Ex: "<ip1>,<ip2>,<ip3>"
+          - seeds: "127.0.0.1:7000"
+
+# For workloads with more data than can fit in memory, Cassandra's
+# bottleneck will be reads that need to fetch data from
+# disk. "concurrent_reads" should be set to (16 * number_of_drives) in
+# order to allow the operations to enqueue low enough in the stack
+# that the OS and drives can reorder them. Same applies to
+# "concurrent_counter_writes", since counter writes read the current
+# values before incrementing and writing them back.
+#
+# On the other hand, since writes are almost never IO bound, the ideal
+# number of "concurrent_writes" is dependent on the number of cores in
+# your system; (8 * number_of_cores) is a good rule of thumb.
+concurrent_reads: 32
+concurrent_writes: 32
+concurrent_counter_writes: 32
+
+# For materialized view writes, as there is a read involved, so this should
+# be limited by the less of concurrent reads or concurrent writes.
+concurrent_materialized_view_writes: 32
+
+# Maximum memory to use for inter-node and client-server networking buffers.
+#
+# Defaults to the smaller of 1/16 of heap or 128MB. This pool is allocated off-heap,
+# so is in addition to the memory allocated for heap. The cache also has on-heap
+# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size
+# if the default 64k chunk size is used).
+# Memory is only allocated when needed.
+# networking_cache_size_in_mb: 128
+
+# Enable the sstable chunk cache.  The chunk cache will store recently accessed
+# sections of the sstable in-memory as uncompressed buffers.
+# file_cache_enabled: false
+
+# Maximum memory to use for sstable chunk cache and buffer pooling.
+# 32MB of this are reserved for pooling buffers, the rest is used for chunk cache
+# that holds uncompressed sstable chunks.
+# Defaults to the smaller of 1/4 of heap or 512MB. This pool is allocated off-heap,
+# so is in addition to the memory allocated for heap. The cache also has on-heap
+# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size
+# if the default 64k chunk size is used).
+# Memory is only allocated when needed.
+# file_cache_size_in_mb: 512
+
+# Flag indicating whether to allocate on or off heap when the sstable buffer
+# pool is exhausted, that is when it has exceeded the maximum memory
+# file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request.
+
+# buffer_pool_use_heap_if_exhausted: true
+
+# The strategy for optimizing disk read
+# Possible values are:
+# ssd (for solid state disks, the default)
+# spinning (for spinning disks)
+# disk_optimization_strategy: ssd
+
+# Total permitted memory to use for memtables. Cassandra will stop
+# accepting writes when the limit is exceeded until a flush completes,
+# and will trigger a flush based on memtable_cleanup_threshold
+# If omitted, Cassandra will set both to 1/4 the size of the heap.
+# memtable_heap_space_in_mb: 2048
+# memtable_offheap_space_in_mb: 2048
+
+# memtable_cleanup_threshold is deprecated. The default calculation
+# is the only reasonable choice. See the comments on  memtable_flush_writers
+# for more information.
+#
+# Ratio of occupied non-flushing memtable size to total permitted size
+# that will trigger a flush of the largest memtable. Larger mct will
+# mean larger flushes and hence less compaction, but also less concurrent
+# flush activity which can make it difficult to keep your disks fed
+# under heavy write load.
+#
+# memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1)
+# memtable_cleanup_threshold: 0.11
+
+# Specify the way Cassandra allocates and manages memtable memory.
+# Options are:
+#
+# heap_buffers
+#   on heap nio buffers
+#
+# offheap_buffers
+#   off heap (direct) nio buffers
+#
+# offheap_objects
+#    off heap objects
+memtable_allocation_type: heap_buffers
+
+# Limit memory usage for Merkle tree calculations during repairs. The default
+# is 1/16th of the available heap. The main tradeoff is that smaller trees
+# have less resolution, which can lead to over-streaming data. If you see heap
+# pressure during repairs, consider lowering this, but you cannot go below
+# one megabyte. If you see lots of over-streaming, consider raising
+# this or using subrange repair.
+#
+# For more details see https://issues.apache.org/jira/browse/CASSANDRA-14096.
+#
+# repair_session_space_in_mb:
+
+# Total space to use for commit logs on disk.
+#
+# If space gets above this value, Cassandra will flush every dirty CF
+# in the oldest segment and remove it.  So a small total commitlog space
+# will tend to cause more flush activity on less-active columnfamilies.
+#
+# The default value is the smaller of 8192, and 1/4 of the total space
+# of the commitlog volume.
+#
+# commitlog_total_space_in_mb: 8192
+
+# This sets the number of memtable flush writer threads per disk
+# as well as the total number of memtables that can be flushed concurrently.
+# These are generally a combination of compute and IO bound.
+#
+# Memtable flushing is more CPU efficient than memtable ingest and a single thread
+# can keep up with the ingest rate of a whole server on a single fast disk
+# until it temporarily becomes IO bound under contention typically with compaction.
+# At that point you need multiple flush threads. At some point in the future
+# it may become CPU bound all the time.
+#
+# You can tell if flushing is falling behind using the MemtablePool.BlockedOnAllocation
+# metric which should be 0, but will be non-zero if threads are blocked waiting on flushing
+# to free memory.
+#
+# memtable_flush_writers defaults to two for a single data directory.
+# This means that two  memtables can be flushed concurrently to the single data directory.
+# If you have multiple data directories the default is one memtable flushing at a time
+# but the flush will use a thread per data directory so you will get two or more writers.
+#
+# Two is generally enough to flush on a fast disk [array] mounted as a single data directory.
+# Adding more flush writers will result in smaller more frequent flushes that introduce more
+# compaction overhead.
+#
+# There is a direct tradeoff between number of memtables that can be flushed concurrently
+# and flush size and frequency. More is not better you just need enough flush writers
+# to never stall waiting for flushing to free memory.
+#
+#memtable_flush_writers: 2
+
+# Total space to use for change-data-capture logs on disk.
+#
+# If space gets above this value, Cassandra will throw WriteTimeoutException
+# on Mutations including tables with CDC enabled. A CDCCompactor is responsible
+# for parsing the raw CDC logs and deleting them when parsing is completed.
+#
+# The default value is the min of 4096 mb and 1/8th of the total space
+# of the drive where cdc_raw_directory resides.
+# cdc_total_space_in_mb: 4096
+
+# When we hit our cdc_raw limit and the CDCCompactor is either running behind
+# or experiencing backpressure, we check at the following interval to see if any
+# new space for cdc-tracked tables has been made available. Default to 250ms
+# cdc_free_space_check_interval_ms: 250
+
+# A fixed memory pool size in MB for for SSTable index summaries. If left
+# empty, this will default to 5% of the heap size. If the memory usage of
+# all index summaries exceeds this limit, SSTables with low read rates will
+# shrink their index summaries in order to meet this limit.  However, this
+# is a best-effort process. In extreme conditions Cassandra may need to use
+# more than this amount of memory.
+index_summary_capacity_in_mb:
+
+# How frequently index summaries should be resampled.  This is done
+# periodically to redistribute memory from the fixed-size pool to sstables
+# proportional their recent read rates.  Setting to -1 will disable this
+# process, leaving existing index summaries at their current sampling level.
+index_summary_resize_interval_in_minutes: 60
+
+# Whether to, when doing sequential writing, fsync() at intervals in
+# order to force the operating system to flush the dirty
+# buffers. Enable this to avoid sudden dirty buffer flushing from
+# impacting read latencies. Almost always a good idea on SSDs; not
+# necessarily on platters.
+trickle_fsync: false
+trickle_fsync_interval_in_kb: 10240
+
+# TCP port, for commands and data
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+storage_port: 7000
+
+# SSL port, for legacy encrypted communication. This property is unused unless enabled in
+# server_encryption_options (see below). As of cassandra 4.0, this property is deprecated
+# as a single port can be used for either/both secure and insecure connections.
+# For security reasons, you should not expose this port to the internet. Firewall it if needed.
+ssl_storage_port: 7001
+
+# Address or interface to bind to and tell other Cassandra nodes to connect to.
+# You _must_ change this if you want multiple nodes to be able to communicate!
+#
+# Set listen_address OR listen_interface, not both.
+#
+# Leaving it blank leaves it up to InetAddress.getLocalHost(). This
+# will always do the Right Thing _if_ the node is properly configured
+# (hostname, name resolution, etc), and the Right Thing is to use the
+# address associated with the hostname (it might not be). If unresolvable
+# it will fall back to InetAddress.getLoopbackAddress(), which is wrong for production systems.
+#
+# Setting listen_address to 0.0.0.0 is always wrong.
+#
+listen_address: localhost
+
+# Set listen_address OR listen_interface, not both. Interfaces must correspond
+# to a single address, IP aliasing is not supported.
+# listen_interface: eth0
+
+# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
+# you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4
+# address will be used. If true the first ipv6 address will be used. Defaults to false preferring
+# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
+# listen_interface_prefer_ipv6: false
+
+# Address to broadcast to other Cassandra nodes
+# Leaving this blank will set it to the same value as listen_address
+# broadcast_address: 1.2.3.4
+
+# When using multiple physical network interfaces, set this
+# to true to listen on broadcast_address in addition to
+# the listen_address, allowing nodes to communicate in both
+# interfaces.
+# Ignore this property if the network configuration automatically
+# routes  between the public and private networks such as EC2.
+# listen_on_broadcast_address: false
+
+# Internode authentication backend, implementing IInternodeAuthenticator;
+# used to allow/disallow connections from peer nodes.
+# internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator
+
+# Whether to start the native transport server.
+# The address on which the native transport is bound is defined by rpc_address.
+start_native_transport: true
+# port for the CQL native transport to listen for clients on
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+native_transport_port: 9042
+# Enabling native transport encryption in client_encryption_options allows you to either use
+# encryption for the standard port or to use a dedicated, additional port along with the unencrypted
+# standard native_transport_port.
+# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption
+# for native_transport_port. Setting native_transport_port_ssl to a different value
+# from native_transport_port will use encryption for native_transport_port_ssl while
+# keeping native_transport_port unencrypted.
+# native_transport_port_ssl: 9142
+# The maximum threads for handling requests (note that idle threads are stopped
+# after 30 seconds so there is not corresponding minimum setting).
+# native_transport_max_threads: 128
+#
+# The maximum size of allowed frame. Frame (requests) larger than this will
+# be rejected as invalid. The default is 256MB. If you're changing this parameter,
+# you may want to adjust max_value_size_in_mb accordingly. This should be positive and less than 2048.
+# native_transport_max_frame_size_in_mb: 256
+
+# The maximum number of concurrent client connections.
+# The default is -1, which means unlimited.
+# native_transport_max_concurrent_connections: -1
+
+# The maximum number of concurrent client connections per source ip.
+# The default is -1, which means unlimited.
+# native_transport_max_concurrent_connections_per_ip: -1
+
+# Controls whether Cassandra honors older, yet currently supported, protocol versions.
+# The default is true, which means all supported protocols will be honored.
+native_transport_allow_older_protocols: true
+
+# Controls when idle client connections are closed. Idle connections are ones that had neither reads
+# nor writes for a time period.
+#
+# Clients may implement heartbeats by sending OPTIONS native protocol message after a timeout, which
+# will reset idle timeout timer on the server side. To close idle client connections, corresponding
+# values for heartbeat intervals have to be set on the client side.
+#
+# Idle connection timeouts are disabled by default.
+# native_transport_idle_timeout_in_ms: 60000
+
+# The address or interface to bind the native transport server to.
+#
+# Set rpc_address OR rpc_interface, not both.
+#
+# Leaving rpc_address blank has the same effect as on listen_address
+# (i.e. it will be based on the configured hostname of the node).
+#
+# Note that unlike listen_address, you can specify 0.0.0.0, but you must also
+# set broadcast_rpc_address to a value other than 0.0.0.0.
+#
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+rpc_address: localhost
+
+# Set rpc_address OR rpc_interface, not both. Interfaces must correspond
+# to a single address, IP aliasing is not supported.
+# rpc_interface: eth1
+
+# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
+# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4
+# address will be used. If true the first ipv6 address will be used. Defaults to false preferring
+# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
+# rpc_interface_prefer_ipv6: false
+
+# RPC address to broadcast to drivers and other Cassandra nodes. This cannot
+# be set to 0.0.0.0. If left blank, this will be set to the value of
+# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must
+# be set.
+# broadcast_rpc_address: 1.2.3.4
+
+# enable or disable keepalive on rpc/native connections
+rpc_keepalive: true
+
+# Uncomment to set socket buffer size for internode communication
+# Note that when setting this, the buffer size is limited by net.core.wmem_max
+# and when not setting it it is defined by net.ipv4.tcp_wmem
+# See also:
+# /proc/sys/net/core/wmem_max
+# /proc/sys/net/core/rmem_max
+# /proc/sys/net/ipv4/tcp_wmem
+# /proc/sys/net/ipv4/tcp_wmem
+# and 'man tcp'
+# internode_send_buff_size_in_bytes:
+
+# Uncomment to set socket buffer size for internode communication
+# Note that when setting this, the buffer size is limited by net.core.wmem_max
+# and when not setting it it is defined by net.ipv4.tcp_wmem
+# internode_recv_buff_size_in_bytes:
+
+# Set to true to have Cassandra create a hard link to each sstable
+# flushed or streamed locally in a backups/ subdirectory of the
+# keyspace data.  Removing these links is the operator's
+# responsibility.
+incremental_backups: false
+
+# Whether or not to take a snapshot before each compaction.  Be
+# careful using this option, since Cassandra won't clean up the
+# snapshots for you.  Mostly useful if you're paranoid when there
+# is a data format change.
+snapshot_before_compaction: false
+
+# Whether or not a snapshot is taken of the data before keyspace truncation
+# or dropping of column families. The STRONGLY advised default of true
+# should be used to provide data safety. If you set this flag to false, you will
+# lose data on truncation or drop.
+auto_snapshot: true
+
+# The act of creating or clearing a snapshot involves creating or removing
+# potentially tens of thousands of links, which can cause significant performance
+# impact, especially on consumer grade SSDs. A non-zero value here can
+# be used to throttle these links to avoid negative performance impact of
+# taking and clearing snapshots
+snapshot_links_per_second: 0
+
+# Granularity of the collation index of rows within a partition.
+# Increase if your rows are large, or if you have a very large
+# number of rows per partition.  The competing goals are these:
+#
+# - a smaller granularity means more index entries are generated
+#   and looking up rows withing the partition by collation column
+#   is faster
+# - but, Cassandra will keep the collation index in memory for hot
+#   rows (as part of the key cache), so a larger granularity means
+#   you can cache more hot rows
+column_index_size_in_kb: 64
+
+# Per sstable indexed key cache entries (the collation index in memory
+# mentioned above) exceeding this size will not be held on heap.
+# This means that only partition information is held on heap and the
+# index entries are read from disk.
+#
+# Note that this size refers to the size of the
+# serialized index information and not the size of the partition.
+column_index_cache_size_in_kb: 2
+
+# Number of simultaneous compactions to allow, NOT including
+# validation "compactions" for anti-entropy repair.  Simultaneous
+# compactions can help preserve read performance in a mixed read/write
+# workload, by mitigating the tendency of small sstables to accumulate
+# during a single long running compactions. The default is usually
+# fine and if you experience problems with compaction running too
+# slowly or too fast, you should look at
+# compaction_throughput_mb_per_sec first.
+#
+# concurrent_compactors defaults to the smaller of (number of disks,
+# number of cores), with a minimum of 2 and a maximum of 8.
+#
+# If your data directories are backed by SSD, you should increase this
+# to the number of cores.
+#concurrent_compactors: 1
+
+# Number of simultaneous repair validations to allow. If not set or set to
+# a value less than 1, it defaults to the value of concurrent_compactors.
+# To set a value greeater than concurrent_compactors at startup, the system
+# property cassandra.allow_unlimited_concurrent_validations must be set to
+# true. To dynamically resize to a value > concurrent_compactors on a running
+# node, first call the bypassConcurrentValidatorsLimit method on the
+# org.apache.cassandra.db:type=StorageService mbean
+# concurrent_validations: 0
+
+# Number of simultaneous materialized view builder tasks to allow.
+concurrent_materialized_view_builders: 1
+
+# Throttles compaction to the given total throughput across the entire
+# system. The faster you insert data, the faster you need to compact in
+# order to keep the sstable count down, but in general, setting this to
+# 16 to 32 times the rate you are inserting data is more than sufficient.
+# Setting this to 0 disables throttling. Note that this accounts for all types
+# of compaction, including validation compaction (building Merkle trees
+# for repairs).
+compaction_throughput_mb_per_sec: 64
+
+# When compacting, the replacement sstable(s) can be opened before they
+# are completely written, and used in place of the prior sstables for
+# any range that has been written. This helps to smoothly transfer reads
+# between the sstables, reducing page cache churn and keeping hot rows hot
+sstable_preemptive_open_interval_in_mb: 50
+
+# When enabled, permits Cassandra to zero-copy stream entire eligible
+# SSTables between nodes, including every component.
+# This speeds up the network transfer significantly subject to
+# throttling specified by stream_throughput_outbound_megabits_per_sec.
+# Enabling this will reduce the GC pressure on sending and receiving node.
+# When unset, the default is enabled. While this feature tries to keep the
+# disks balanced, it cannot guarantee it. This feature will be automatically
+# disabled if internode encryption is enabled. Currently this can be used with
+# Leveled Compaction. Once CASSANDRA-14586 is fixed other compaction strategies
+# will benefit as well when used in combination with CASSANDRA-6696.
+# stream_entire_sstables: true
+
+# Throttles all outbound streaming file transfers on this node to the
+# given total throughput in Mbps. This is necessary because Cassandra does
+# mostly sequential IO when streaming data during bootstrap or repair, which
+# can lead to saturating the network connection and degrading rpc performance.
+# When unset, the default is 200 Mbps or 25 MB/s.
+# stream_throughput_outbound_megabits_per_sec: 200
+
+# Throttles all streaming file transfer between the datacenters,
+# this setting allows users to throttle inter dc stream throughput in addition
+# to throttling all network stream traffic as configured with
+# stream_throughput_outbound_megabits_per_sec
+# When unset, the default is 200 Mbps or 25 MB/s
+# inter_dc_stream_throughput_outbound_megabits_per_sec: 200
+
+# How long the coordinator should wait for read operations to complete.
+# Lowest acceptable value is 10 ms.
+read_request_timeout_in_ms: 5000
+# How long the coordinator should wait for seq or index scans to complete.
+# Lowest acceptable value is 10 ms.
+range_request_timeout_in_ms: 10000
+# How long the coordinator should wait for writes to complete.
+# Lowest acceptable value is 10 ms.
+write_request_timeout_in_ms: 2000
+# How long the coordinator should wait for counter writes to complete.
+# Lowest acceptable value is 10 ms.
+counter_write_request_timeout_in_ms: 5000
+# How long a coordinator should continue to retry a CAS operation
+# that contends with other proposals for the same row.
+# Lowest acceptable value is 10 ms.
+cas_contention_timeout_in_ms: 1000
+# How long the coordinator should wait for truncates to complete
+# (This can be much longer, because unless auto_snapshot is disabled
+# we need to flush first so we can snapshot before removing the data.)
+# Lowest acceptable value is 10 ms.
+truncate_request_timeout_in_ms: 60000
+# The default timeout for other, miscellaneous operations.
+# Lowest acceptable value is 10 ms.
+request_timeout_in_ms: 10000
+
+# Defensive settings for protecting Cassandra from true network partitions.
+# See (CASSANDRA-14358) for details.
+#
+# The amount of time to wait for internode tcp connections to establish.
+# internode_tcp_connect_timeout_in_ms = 2000
+#
+# The amount of time unacknowledged data is allowed on a connection before we throw out the connection
+# Note this is only supported on Linux + epoll, and it appears to behave oddly above a setting of 30000
+# (it takes much longer than 30s) as of Linux 4.12. If you want something that high set this to 0
+# which picks up the OS default and configure the net.ipv4.tcp_retries2 sysctl to be ~8.
+# internode_tcp_user_timeout_in_ms = 30000
+
+# The amount of time unacknowledged data is allowed on a streaming connection.
+# The default is 5 minutes. Increase it or set it to 0 in order to increase the timeout.
+# internode_streaming_tcp_user_timeout_in_ms = 300000
+
+# The maximum continuous period a connection may be unwritable in application space
+# internode_application_timeout_in_ms = 30000
+
+# Global, per-endpoint and per-connection limits imposed on messages queued for delivery to other nodes
+# and waiting to be processed on arrival from other nodes in the cluster.  These limits are applied to the on-wire
+# size of the message being sent or received.
+#
+# The basic per-link limit is consumed in isolation before any endpoint or global limit is imposed.
+# Each node-pair has three links: urgent, small and large.  So any given node may have a maximum of
+# N*3*(internode_application_send_queue_capacity_in_bytes+internode_application_receive_queue_capacity_in_bytes)
+# messages queued without any coordination between them although in practice, with token-aware routing, only RF*tokens
+# nodes should need to communicate with significant bandwidth.
+#
+# The per-endpoint limit is imposed on all messages exceeding the per-link limit, simultaneously with the global limit,
+# on all links to or from a single node in the cluster.
+# The global limit is imposed on all messages exceeding the per-link limit, simultaneously with the per-endpoint limit,
+# on all links to or from any node in the cluster.
+#
+# internode_application_send_queue_capacity_in_bytes: 4194304                       #4MiB
+# internode_application_send_queue_reserve_endpoint_capacity_in_bytes: 134217728    #128MiB
+# internode_application_send_queue_reserve_global_capacity_in_bytes: 536870912      #512MiB
+# internode_application_receive_queue_capacity_in_bytes: 4194304                    #4MiB
+# internode_application_receive_queue_reserve_endpoint_capacity_in_bytes: 134217728 #128MiB
+# internode_application_receive_queue_reserve_global_capacity_in_bytes: 536870912   #512MiB
+
+
+# How long before a node logs slow queries. Select queries that take longer than
+# this timeout to execute, will generate an aggregated log message, so that slow queries
+# can be identified. Set this value to zero to disable slow query logging.
+slow_query_log_timeout_in_ms: 500
+
+# Enable operation timeout information exchange between nodes to accurately
+# measure request timeouts.  If disabled, replicas will assume that requests
+# were forwarded to them instantly by the coordinator, which means that
+# under overload conditions we will waste that much extra time processing
+# already-timed-out requests.
+#
+# Warning: It is generally assumed that users have setup NTP on their clusters, and that clocks are modestly in sync,
+# since this is a requirement for general correctness of last write wins.
+#cross_node_timeout: true
+
+# Set keep-alive period for streaming
+# This node will send a keep-alive message periodically with this period.
+# If the node does not receive a keep-alive message from the peer for
+# 2 keep-alive cycles the stream session times out and fail
+# Default value is 300s (5 minutes), which means stalled stream
+# times out in 10 minutes by default
+# streaming_keep_alive_period_in_secs: 300
+
+# Limit number of connections per host for streaming
+# Increase this when you notice that joins are CPU-bound rather that network
+# bound (for example a few nodes with big files).
+# streaming_connections_per_host: 1
+
+
+# phi value that must be reached for a host to be marked down.
+# most users should never need to adjust this.
+# phi_convict_threshold: 8
+
+# endpoint_snitch -- Set this to a class that implements
+# IEndpointSnitch.  The snitch has two functions:
+#
+# - it teaches Cassandra enough about your network topology to route
+#   requests efficiently
+# - it allows Cassandra to spread replicas around your cluster to avoid
+#   correlated failures. It does this by grouping machines into
+#   "datacenters" and "racks."  Cassandra will do its best not to have
+#   more than one replica on the same "rack" (which may not actually
+#   be a physical location)
+#
+# CASSANDRA WILL NOT ALLOW YOU TO SWITCH TO AN INCOMPATIBLE SNITCH
+# ONCE DATA IS INSERTED INTO THE CLUSTER.  This would cause data loss.
+# This means that if you start with the default SimpleSnitch, which
+# locates every node on "rack1" in "datacenter1", your only options
+# if you need to add another datacenter are GossipingPropertyFileSnitch
+# (and the older PFS).  From there, if you want to migrate to an
+# incompatible snitch like Ec2Snitch you can do it by adding new nodes
+# under Ec2Snitch (which will locate them in a new "datacenter") and
+# decommissioning the old ones.
+#
+# Out of the box, Cassandra provides:
+#
+# SimpleSnitch:
+#    Treats Strategy order as proximity. This can improve cache
+#    locality when disabling read repair.  Only appropriate for
+#    single-datacenter deployments.
+#
+# GossipingPropertyFileSnitch
+#    This should be your go-to snitch for production use.  The rack
+#    and datacenter for the local node are defined in
+#    cassandra-rackdc.properties and propagated to other nodes via
+#    gossip.  If cassandra-topology.properties exists, it is used as a
+#    fallback, allowing migration from the PropertyFileSnitch.
+#
+# PropertyFileSnitch:
+#    Proximity is determined by rack and data center, which are
+#    explicitly configured in cassandra-topology.properties.
+#
+# Ec2Snitch:
+#    Appropriate for EC2 deployments in a single Region. Loads Region
+#    and Availability Zone information from the EC2 API. The Region is
+#    treated as the datacenter, and the Availability Zone as the rack.
+#    Only private IPs are used, so this will not work across multiple
+#    Regions.
+#
+# Ec2MultiRegionSnitch:
+#    Uses public IPs as broadcast_address to allow cross-region
+#    connectivity.  (Thus, you should set seed addresses to the public
+#    IP as well.) You will need to open the storage_port or
+#    ssl_storage_port on the public IP firewall.  (For intra-Region
+#    traffic, Cassandra will switch to the private IP after
+#    establishing a connection.)
+#
+# RackInferringSnitch:
+#    Proximity is determined by rack and data center, which are
+#    assumed to correspond to the 3rd and 2nd octet of each node's IP
+#    address, respectively.  Unless this happens to match your
+#    deployment conventions, this is best used as an example of
+#    writing a custom Snitch class and is provided in that spirit.
+#
+# You can use a custom Snitch by setting this to the full class name
+# of the snitch, which will be assumed to be on your classpath.
+endpoint_snitch: SimpleSnitch
+
+# controls how often to perform the more expensive part of host score
+# calculation
+dynamic_snitch_update_interval_in_ms: 100
+# controls how often to reset all host scores, allowing a bad host to
+# possibly recover
+dynamic_snitch_reset_interval_in_ms: 600000
+# if set greater than zero, this will allow
+# 'pinning' of replicas to hosts in order to increase cache capacity.
+# The badness threshold will control how much worse the pinned host has to be
+# before the dynamic snitch will prefer other replicas over it.  This is
+# expressed as a double which represents a percentage.  Thus, a value of
+# 0.2 means Cassandra would continue to prefer the static snitch values
+# until the pinned host was 20% worse than the fastest.
+dynamic_snitch_badness_threshold: 1.0
+
+# Configure server-to-server internode encryption
+#
+# JVM and netty defaults for supported SSL socket protocols and cipher suites can
+# be replaced using custom encryption options. This is not recommended
+# unless you have policies in place that dictate certain settings, or
+# need to disable vulnerable ciphers or protocols in case the JVM cannot
+# be updated.
+#
+# FIPS compliant settings can be configured at JVM level and should not
+# involve changing encryption settings here:
+# https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/FIPS.html
+#
+# **NOTE** this default configuration is an insecure configuration. If you need to
+# enable server-to-server encryption generate server keystores (and truststores for mutual
+# authentication) per:
+# http://download.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
+# Then perform the following configuration changes:
+#
+# Step 1: Set internode_encryption=<dc|rack|all> and explicitly set optional=true. Restart all nodes
+#
+# Step 2: Set optional=false (or remove it) and if you generated truststores and want to use mutual
+# auth set require_client_auth=true. Restart all nodes
+server_encryption_options:
+    # On outbound connections, determine which type of peers to securely connect to.
+    #   The available options are :
+    #     none : Do not encrypt outgoing connections
+    #     dc   : Encrypt connections to peers in other datacenters but not within datacenters
+    #     rack : Encrypt connections to peers in other racks but not within racks
+    #     all  : Always use encrypted connections
+    internode_encryption: none
+    # When set to true, encrypted and unencrypted connections are allowed on the storage_port
+    # This should _only be true_ while in unencrypted or transitional operation
+    # optional defaults to true if internode_encryption is none
+    # optional: true
+    # If enabled, will open up an encrypted listening socket on ssl_storage_port. Should only be used
+    # during upgrade to 4.0; otherwise, set to false.
+    enable_legacy_ssl_storage_port: false
+    # Set to a valid keystore if internode_encryption is dc, rack or all
+    keystore: conf/.keystore
+    keystore_password: cassandra
+    # Verify peer server certificates
+    require_client_auth: false
+    # Set to a valid trustore if require_client_auth is true
+    truststore: conf/.truststore
+    truststore_password: cassandra
+    # Verify that the host name in the certificate matches the connected host
+    require_endpoint_verification: false
+    # More advanced defaults:
+    # protocol: TLS
+    # store_type: JKS
+    # cipher_suites: [
+    #   TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
+    #   TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
+    #   TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_128_CBC_SHA,
+    #   TLS_RSA_WITH_AES_256_CBC_SHA
+    # ]
+
+# Configure client-to-server encryption.
+#
+# **NOTE** this default configuration is an insecure configuration. If you need to
+# enable client-to-server encryption generate server keystores (and truststores for mutual
+# authentication) per:
+# http://download.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
+# Then perform the following configuration changes:
+#
+# Step 1: Set enabled=true and explicitly set optional=true. Restart all nodes
+#
+# Step 2: Set optional=false (or remove it) and if you generated truststores and want to use mutual
+# auth set require_client_auth=true. Restart all nodes
+client_encryption_options:
+    # Enable client-to-server encryption
+    enabled: false
+    # When set to true, encrypted and unencrypted connections are allowed on the native_transport_port
+    # This should _only be true_ while in unencrypted or transitional operation
+    # optional defaults to true when enabled is false, and false when enabled is true.
+    # optional: true
+    # Set keystore and keystore_password to valid keystores if enabled is true
+    keystore: conf/.keystore
+    keystore_password: cassandra
+    # Verify client certificates
+    require_client_auth: false
+    # Set trustore and truststore_password if require_client_auth is true
+    # truststore: conf/.truststore
+    # truststore_password: cassandra
+    # More advanced defaults:
+    # protocol: TLS
+    # store_type: JKS
+    # cipher_suites: [
+    #   TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
+    #   TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
+    #   TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_128_CBC_SHA,
+    #   TLS_RSA_WITH_AES_256_CBC_SHA
+    # ]
+
+# internode_compression controls whether traffic between nodes is
+# compressed.
+# Can be:
+#
+# all
+#   all traffic is compressed
+#
+# dc
+#   traffic between different datacenters is compressed
+#
+# none
+#   nothing is compressed.
+internode_compression: dc
+
+# Enable or disable tcp_nodelay for inter-dc communication.
+# Disabling it will result in larger (but fewer) network packets being sent,
+# reducing overhead from the TCP protocol itself, at the cost of increasing
+# latency if you block for cross-datacenter responses.
+inter_dc_tcp_nodelay: false
+
+# TTL for different trace types used during logging of the repair process.
+tracetype_query_ttl: 86400
+tracetype_repair_ttl: 604800
+
+# If unset, all GC Pauses greater than gc_log_threshold_in_ms will log at
+# INFO level
+# UDFs (user defined functions) are disabled by default.
+# As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code.
+enable_user_defined_functions: false
+
+# Enables scripted UDFs (JavaScript UDFs).
+# Java UDFs are always enabled, if enable_user_defined_functions is true.
+# Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider.
+# This option has no effect, if enable_user_defined_functions is false.
+enable_scripted_user_defined_functions: false
+
+# The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation.
+# Lowering this value on Windows can provide much tighter latency and better throughput, however
+# some virtualized environments may see a negative performance impact from changing this setting
+# below their system default. The sysinternals 'clockres' tool can confirm your system's default
+# setting.
+windows_timer_interval: 1
+
+
+# Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from
+# a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by
+# the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys
+# can still (and should!) be in the keystore and will be used on decrypt operations
+# (to handle the case of key rotation).
+#
+# It is strongly recommended to download and install Java Cryptography Extension (JCE)
+# Unlimited Strength Jurisdiction Policy Files for your version of the JDK.
+# (current link: http://www.oracle.com/technetwork/java/javase/downloads/jce8-download-2133166.html)
+#
+# Currently, only the following file types are supported for transparent data encryption, although
+# more are coming in future cassandra releases: commitlog, hints
+transparent_data_encryption_options:
+    enabled: false
+    chunk_length_kb: 64
+    cipher: AES/CBC/PKCS5Padding
+    key_alias: testing:1
+    # CBC IV length for AES needs to be 16 bytes (which is also the default size)
+    # iv_length: 16
+    key_provider:
+      - class_name: org.apache.cassandra.security.JKSKeyProvider
+        parameters:
+          - keystore: conf/.keystore
+            keystore_password: cassandra
+            store_type: JCEKS
+            key_password: cassandra
+
+
+#####################
+# SAFETY THRESHOLDS #
+#####################
+
+# When executing a scan, within or across a partition, we need to keep the
+# tombstones seen in memory so we can return them to the coordinator, which
+# will use them to make sure other replicas also know about the deleted rows.
+# With workloads that generate a lot of tombstones, this can cause performance
+# problems and even exaust the server heap.
+# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets)
+# Adjust the thresholds here if you understand the dangers and want to
+# scan more tombstones anyway.  These thresholds may also be adjusted at runtime
+# using the StorageService mbean.
+tombstone_warn_threshold: 1000
+tombstone_failure_threshold: 100000
+
+# Filtering and secondary index queries at read consistency levels above ONE/LOCAL_ONE use a
+# mechanism called replica filtering protection to ensure that results from stale replicas do
+# not violate consistency. (See CASSANDRA-8272 and CASSANDRA-15907 for more details.) This
+# mechanism materializes replica results by partition on-heap at the coordinator. The more possibly
+# stale results returned by the replicas, the more rows materialized during the query.
+replica_filtering_protection:
+    # These thresholds exist to limit the damage severely out-of-date replicas can cause during these
+    # queries. They limit the number of rows from all replicas individual index and filtering queries
+    # can materialize on-heap to return correct results at the desired read consistency level.
+    #
+    # "cached_replica_rows_warn_threshold" is the per-query threshold at which a warning will be logged.
+    # "cached_replica_rows_fail_threshold" is the per-query threshold at which the query will fail.
+    #
+    # These thresholds may also be adjusted at runtime using the StorageService mbean.
+    #
+    # If the failure threshold is breached, it is likely that either the current page/fetch size
+    # is too large or one or more replicas is severely out-of-sync and in need of repair.
+    cached_rows_warn_threshold: 2000
+    cached_rows_fail_threshold: 32000
+
+# Log WARN on any multiple-partition batch size exceeding this value. 5kb per batch by default.
+# Caution should be taken on increasing the size of this threshold as it can lead to node instability.
+batch_size_warn_threshold_in_kb: 5
+
+# Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default.
+batch_size_fail_threshold_in_kb: 50
+
+# Log WARN on any batches not of type LOGGED than span across more partitions than this limit
+unlogged_batch_across_partitions_warn_threshold: 10
+
+# Log a warning when compacting partitions larger than this value
+compaction_large_partition_warning_threshold_mb: 100
+
+# GC Pauses greater than 200 ms will be logged at INFO level
+# This threshold can be adjusted to minimize logging if necessary
+# gc_log_threshold_in_ms: 200
+
+# GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level
+# Adjust the threshold based on your application throughput requirement. Setting to 0
+# will deactivate the feature.
+# gc_warn_threshold_in_ms: 1000
+
+# Maximum size of any value in SSTables. Safety measure to detect SSTable corruption
+# early. Any value size larger than this threshold will result into marking an SSTable
+# as corrupted. This should be positive and less than 2048.
+# max_value_size_in_mb: 256
+
+# Coalescing Strategies #
+# Coalescing multiples messages turns out to significantly boost message processing throughput (think doubling or more).
+# On bare metal, the floor for packet processing throughput is high enough that many applications won't notice, but in
+# virtualized environments, the point at which an application can be bound by network packet processing can be
+# surprisingly low compared to the throughput of task processing that is possible inside a VM. It's not that bare metal
+# doesn't benefit from coalescing messages, it's that the number of packets a bare metal network interface can process
+# is sufficient for many applications such that no load starvation is experienced even without coalescing.
+# There are other benefits to coalescing network messages that are harder to isolate with a simple metric like messages
+# per second. By coalescing multiple tasks together, a network thread can process multiple messages for the cost of one
+# trip to read from a socket, and all the task submission work can be done at the same time reducing context switching
+# and increasing cache friendliness of network message processing.
+# See CASSANDRA-8692 for details.
+
+# Strategy to use for coalescing messages in OutboundTcpConnection.
+# Can be fixed, movingaverage, timehorizon, disabled (default).
+# You can also specify a subclass of CoalescingStrategies.CoalescingStrategy by name.
+# otc_coalescing_strategy: DISABLED
+
+# How many microseconds to wait for coalescing. For fixed strategy this is the amount of time after the first
+# message is received before it will be sent with any accompanying messages. For moving average this is the
+# maximum amount of time that will be waited as well as the interval at which messages must arrive on average
+# for coalescing to be enabled.
+# otc_coalescing_window_us: 200
+
+# Do not try to coalesce messages if we already got that many messages. This should be more than 2 and less than 128.
+# otc_coalescing_enough_coalesced_messages: 8
+
+# How many milliseconds to wait between two expiration runs on the backlog (queue) of the OutboundTcpConnection.
+# Expiration is done if messages are piling up in the backlog. Droppable messages are expired to free the memory
+# taken by expired messages. The interval should be between 0 and 1000, and in most installations the default value
+# will be appropriate. A smaller value could potentially expire messages slightly sooner at the expense of more CPU
+# time and queue contention while iterating the backlog of messages.
+# An interval of 0 disables any wait time, which is the behavior of former Cassandra versions.
+#
+# otc_backlog_expiration_interval_ms: 200
+
+# Track a metric per keyspace indicating whether replication achieved the ideal consistency
+# level for writes without timing out. This is different from the consistency level requested by
+# each write which may be lower in order to facilitate availability.
+# ideal_consistency_level: EACH_QUORUM
+
+# Automatically upgrade sstables after upgrade - if there is no ordinary compaction to do, the
+# oldest non-upgraded sstable will get upgraded to the latest version
+# automatic_sstable_upgrade: false
+# Limit the number of concurrent sstable upgrades
+# max_concurrent_automatic_sstable_upgrades: 1
+
+# Audit logging - Logs every incoming CQL command request, authentication to a node. See the docs
+# on audit_logging for full details about the various configuration options.
+audit_logging_options:
+    enabled: false
+    logger:
+      - class_name: BinAuditLogger
+    # audit_logs_dir:
+    # included_keyspaces:
+    # excluded_keyspaces: system, system_schema, system_virtual_schema
+    # included_categories:
+    # excluded_categories:
+    # included_users:
+    # excluded_users:
+    # roll_cycle: HOURLY
+    # block: true
+    # max_queue_weight: 268435456 # 256 MiB
+    # max_log_size: 17179869184 # 16 GiB
+    ## archive command is "/path/to/script.sh %path" where %path is replaced with the file being rolled:
+    # archive_command:
+    # max_archive_retries: 10
+
+
+# default options for full query logging - these can be overridden from command line when executing
+# nodetool enablefullquerylog
+#full_query_logging_options:
+    # log_dir:
+    # roll_cycle: HOURLY
+    # block: true
+    # max_queue_weight: 268435456 # 256 MiB
+    # max_log_size: 17179869184 # 16 GiB
+    ## archive command is "/path/to/script.sh %path" where %path is replaced with the file being rolled:
+    # archive_command:
+    # max_archive_retries: 10
+
+# validate tombstones on reads and compaction
+# can be either "disabled", "warn" or "exception"
+# corrupted_tombstone_strategy: disabled
+
+# Diagnostic Events #
+# If enabled, diagnostic events can be helpful for troubleshooting operational issues. Emitted events contain details
+# on internal state and temporal relationships across events, accessible by clients via JMX.
+diagnostic_events_enabled: false
+
+# Use native transport TCP message coalescing. If on upgrade to 4.0 you found your throughput decreasing, and in
+# particular you run an old kernel or have very fewer client connections, this option might be worth evaluating.
+#native_transport_flush_in_batches_legacy: false
+
+# Enable tracking of repaired state of data during reads and comparison between replicas
+# Mismatches between the repaired sets of replicas can be characterized as either confirmed
+# or unconfirmed. In this context, unconfirmed indicates that the presence of pending repair
+# sessions, unrepaired partition tombstones, or some other condition means that the disparity
+# cannot be considered conclusive. Confirmed mismatches should be a trigger for investigation
+# as they may be indicative of corruption or data loss.
+# There are separate flags for range vs partition reads as single partition reads are only tracked
+# when CL > 1 and a digest mismatch occurs. Currently, range queries don't use digests so if
+# enabled for range reads, all range reads will include repaired data tracking. As this adds
+# some overhead, operators may wish to disable it whilst still enabling it for partition reads
+repaired_data_tracking_for_range_reads_enabled: false
+repaired_data_tracking_for_partition_reads_enabled: false
+# If false, only confirmed mismatches will be reported. If true, a separate metric for unconfirmed
+# mismatches will also be recorded. This is to avoid potential signal:noise issues are unconfirmed
+# mismatches are less actionable than confirmed ones.
+report_unconfirmed_repaired_data_mismatches: false
+
+# Having many tables and/or keyspaces negatively affects performance of many operations in the
+# cluster. When the number of tables/keyspaces in the cluster exceeds the following thresholds
+# a client warning will be sent back to the user when creating a table or keyspace.
+# table_count_warn_threshold: 150
+# keyspace_count_warn_threshold: 40
+
+#########################
+# EXPERIMENTAL FEATURES #
+#########################
+
+# Enables materialized view creation on this node.
+# Materialized views are considered experimental and are not recommended for production use.
+enable_materialized_views: false
+
+# Enables SASI index creation on this node.
+# SASI indexes are considered experimental and are not recommended for production use.
+enable_sasi_indexes: false
+
+# Enables creation of transiently replicated keyspaces on this node.
+# Transient replication is experimental and is not recommended for production use.
+enable_transient_replication: false
diff --git a/agent-c5/src/test/resources/cassandra/logback.xml b/agent-c5/src/test/resources/cassandra/logback.xml
new file mode 100644
index 00000000..d4261fbe
--- /dev/null
+++ b/agent-c5/src/test/resources/cassandra/logback.xml
@@ -0,0 +1,103 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.  See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<!--
+In order to disable debug.log, comment-out the ASYNCDEBUGLOG
+appender reference in the root level section below.
+-->
+
+<configuration scan="true" scanPeriod="60 seconds">
+    <jmxConfigurator />
+
+    <!-- No shutdown hook; we run it ourselves in StorageService after shutdown -->
+
+    <!-- SYSTEMLOG rolling file appender to system.log (INFO level) -->
+
+    <appender name="SYSTEMLOG" class="ch.qos.logback.core.rolling.RollingFileAppender">
+        <filter class="ch.qos.logback.classic.filter.ThresholdFilter">
+            <level>TRACE</level>
+        </filter>
+        <file>${cassandra.logdir}/system.log</file>
+        <rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
+            <!-- rollover daily -->
+            <fileNamePattern>${cassandra.logdir}/system.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern>
+            <!-- each file should be at most 50MB, keep 7 days worth of history, but at most 5GB -->
+            <maxFileSize>50MB</maxFileSize>
+            <maxHistory>7</maxHistory>
+            <totalSizeCap>5GB</totalSizeCap>
+        </rollingPolicy>
+        <encoder>
+            <pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
+        </encoder>
+    </appender>
+
+    <!-- DEBUGLOG rolling file appender to debug.log (all levels) -->
+
+    <appender name="DEBUGLOG" class="ch.qos.logback.core.rolling.RollingFileAppender">
+        <file>${cassandra.logdir}/debug.log</file>
+        <rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
+            <!-- rollover daily -->
+            <fileNamePattern>${cassandra.logdir}/debug.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern>
+            <!-- each file should be at most 50MB, keep 7 days worth of history, but at most 5GB -->
+            <maxFileSize>50MB</maxFileSize>
+            <maxHistory>7</maxHistory>
+            <totalSizeCap>5GB</totalSizeCap>
+        </rollingPolicy>
+        <encoder>
+            <pattern>%-5level [%thread] %date{ISO8601} %F:%L %m %msg%n</pattern>
+        </encoder>
+    </appender>
+
+    <!-- ASYNCLOG assynchronous appender to debug.log (all levels) -->
+
+    <appender name="ASYNCDEBUGLOG" class="ch.qos.logback.classic.AsyncAppender">
+        <queueSize>1024</queueSize>
+        <discardingThreshold>0</discardingThreshold>
+        <includeCallerData>true</includeCallerData>
+        <appender-ref ref="DEBUGLOG" />
+    </appender>
+
+    <!-- STDOUT console appender to stdout (INFO level) -->
+
+    <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+        <filter class="ch.qos.logback.classic.filter.ThresholdFilter">
+            <level>TRACE</level>
+        </filter>
+        <encoder>
+            <pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
+        </encoder>
+    </appender>
+
+    <!-- Uncomment bellow and corresponding appender-ref to activate logback metrics
+    <appender name="LogbackMetrics" class="com.codahale.metrics.logback.InstrumentedAppender" />
+     -->
+
+    <root level="INFO">
+        <appender-ref ref="SYSTEMLOG" />
+        <appender-ref ref="STDOUT" />
+<!--        <appender-ref ref="DEBUGLOG" /> &lt;!&ndash; Comment this line to disable debug.log &ndash;&gt;-->
+        <!--
+        <appender-ref ref="LogbackMetrics" />
+        -->
+    </root>
+
+    <logger name="org.apache.cassandra" level="INFO"/>
+    <logger name="org.apache.pulsar" level="INFO"/>
+    <logger name="com.datastax.oss.cdc" level="TRACE"/>
+</configuration>
diff --git a/agent-c5/src/test/resources/logback-test.xml b/agent-c5/src/test/resources/logback-test.xml
new file mode 100644
index 00000000..6a95771c
--- /dev/null
+++ b/agent-c5/src/test/resources/logback-test.xml
@@ -0,0 +1,15 @@
+<configuration>
+    <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+        <encoder>
+            <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger - %msg%n</pattern>
+        </encoder>
+    </appender>
+
+    <root level="info">
+        <appender-ref ref="STDOUT"/>
+    </root>
+
+    <logger name="org.testcontainers" level="INFO"/>
+    <logger name="com.github.dockerjava" level="WARN"/>
+    <logger name="com.datastax.oss.cdc" level="INFO"/>
+</configuration>
diff --git a/gradle.properties b/gradle.properties
index 8762d9d6..4c5c4748 100644
--- a/gradle.properties
+++ b/gradle.properties
@@ -12,6 +12,7 @@ lombokVersion=1.18.20
 ossDriverVersion=4.16.0
 cassandra3Version=3.11.10
 cassandra4Version=4.0.4
+cassandra5Version=5.0.4
 dse4Version=6.8.23
 
 pulsarGroup=org.apache.pulsar
@@ -42,7 +43,7 @@ commitlog_sync_period_in_ms=2000
 cdc_total_space_in_mb=70
 
 # default docker repo + slash
-dockerRepo=myrepo/
+dockerRepo=cassandra-cdc/
 
 # CDC backfilling Client
 dsbulkVersion=1.10.0
diff --git a/settings.gradle b/settings.gradle
index e23f93e5..5bd913ec 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -22,6 +22,8 @@ include 'agent-c3'
 
 include 'agent-c4'
 
+include 'agent-c5'
+
 if (startParameter.projectProperties.containsKey("dse4")) {
     include 'agent-dse4'
 }
diff --git a/testcontainers/src/main/java/com/datastax/oss/cdc/AgentTestUtil.java b/testcontainers/src/main/java/com/datastax/oss/cdc/AgentTestUtil.java
index 1091dd6a..94fb29ed 100644
--- a/testcontainers/src/main/java/com/datastax/oss/cdc/AgentTestUtil.java
+++ b/testcontainers/src/main/java/com/datastax/oss/cdc/AgentTestUtil.java
@@ -91,6 +91,7 @@ public static ByteBuffer randomizeBuffer(int size) {
     public enum Version {
         C3,     // Cassandra 3.11.x
         C4,     // Cassandra 4.x
+        C5,     // Cassandra 5.x
         DSE4    // Datastax Enterprise Server
     }
 }

From 2d2f8c5a6a52f502c612bf499ba4c9d805fe24d5 Mon Sep 17 00:00:00 2001
From: SreedevT <talisreedev@gmail.com>
Date: Tue, 30 Sep 2025 00:36:11 +0530
Subject: [PATCH 02/10] add c5 to ci workflow

---
 .github/workflows/ci.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index af4fa504..d2d2936c 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -1,6 +1,7 @@
 name: CI
 
 on:
+  workflow_dispatch:
   pull_request:
     branches:
       - master
@@ -37,7 +38,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        module: ['agent', 'agent-c3', 'agent-c4', 'agent-dse4', 'connector']
+        module: ['agent', 'agent-c3', 'agent-c4', 'agent-c5','agent-dse4', 'connector']
         jdk: ['11', '17']
         pulsarImage: ['datastax/lunastreaming:2.10_3.4', 'apachepulsar/pulsar:2.10.3', 'apachepulsar/pulsar:2.11.0']
     steps:

From 7c419517ba722c78cd5604eb6ca747474a8d1ee8 Mon Sep 17 00:00:00 2001
From: SreedevT <talisreedev@gmail.com>
Date: Tue, 30 Sep 2025 00:43:53 +0530
Subject: [PATCH 03/10] declare java build version for c5

---
 agent-c5/build.gradle | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/agent-c5/build.gradle b/agent-c5/build.gradle
index 63accc37..798d4f44 100644
--- a/agent-c5/build.gradle
+++ b/agent-c5/build.gradle
@@ -7,6 +7,21 @@ plugins {
     id 'docker-compose'
 }
 
+// Cassandra 5.0 requires Java 17+ due to dependencies like Caffeine 3.1.8
+java {
+    toolchain {
+        languageVersion = JavaLanguageVersion.of(17)
+    }
+}
+
+sourceCompatibility = 17
+targetCompatibility = 17
+
+compileJava {
+    sourceCompatibility = 17
+    targetCompatibility = 17
+}
+
 application {
     mainClass = "$mainClassName"
 }

From c20b6077d303a497be3057450a174d531c093374 Mon Sep 17 00:00:00 2001
From: SreedevT <talisreedev@gmail.com>
Date: Tue, 30 Sep 2025 00:47:07 +0530
Subject: [PATCH 04/10] revert commits that are not needed

---
 .github/workflows/ci.yaml | 1 -
 gradle.properties         | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index d2d2936c..c47c4753 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -1,7 +1,6 @@
 name: CI
 
 on:
-  workflow_dispatch:
   pull_request:
     branches:
       - master
diff --git a/gradle.properties b/gradle.properties
index 4c5c4748..9c4eda93 100644
--- a/gradle.properties
+++ b/gradle.properties
@@ -43,7 +43,7 @@ commitlog_sync_period_in_ms=2000
 cdc_total_space_in_mb=70
 
 # default docker repo + slash
-dockerRepo=cassandra-cdc/
+dockerRepo=myrepo/
 
 # CDC backfilling Client
 dsbulkVersion=1.10.0

From e76693be22f735f2bd7660acdc83586bb214295c Mon Sep 17 00:00:00 2001
From: SreedevT <talisreedev@gmail.com>
Date: Tue, 30 Sep 2025 00:49:03 +0530
Subject: [PATCH 05/10] workflow updates

---
 .github/workflows/backfill-ci.yaml | 2 +-
 .github/workflows/ci.yaml          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/backfill-ci.yaml b/.github/workflows/backfill-ci.yaml
index 0fc77f7e..c05ffba0 100644
--- a/.github/workflows/backfill-ci.yaml
+++ b/.github/workflows/backfill-ci.yaml
@@ -39,7 +39,7 @@ jobs:
       matrix:
         jdk: ['11'] # TODO: Enable java 17 tests https://issues.apache.org/jira/browse/CASSANDRA-16895
         pulsarImage: ['datastax/lunastreaming:2.10_3.4', 'apachepulsar/pulsar:2.10.3', 'apachepulsar/pulsar:2.11.0']
-        cassandraFamily: ['c3', 'c4', 'dse4']
+        cassandraFamily: ['c3', 'c4', 'c5']
     steps:
       - uses: actions/checkout@v3
       - name: Set up JDK ${{ matrix.jdk }}
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index c47c4753..0cc7fdfa 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -37,7 +37,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        module: ['agent', 'agent-c3', 'agent-c4', 'agent-c5','agent-dse4', 'connector']
+        module: ['agent', 'agent-c3', 'agent-c4', 'agent-c5', 'connector']
         jdk: ['11', '17']
         pulsarImage: ['datastax/lunastreaming:2.10_3.4', 'apachepulsar/pulsar:2.10.3', 'apachepulsar/pulsar:2.11.0']
     steps:

From 67566f3779fcd5b92a93f0c1d39a854a7bc0aa56 Mon Sep 17 00:00:00 2001
From: SreedevT <talisreedev@gmail.com>
Date: Tue, 30 Sep 2025 01:37:42 +0530
Subject: [PATCH 06/10] only c5 with datastax lunastreaming

---
 .github/workflows/backfill-ci.yaml | 4 ++--
 .github/workflows/ci.yaml          | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/backfill-ci.yaml b/.github/workflows/backfill-ci.yaml
index c05ffba0..450625fd 100644
--- a/.github/workflows/backfill-ci.yaml
+++ b/.github/workflows/backfill-ci.yaml
@@ -38,8 +38,8 @@ jobs:
       fail-fast: false
       matrix:
         jdk: ['11'] # TODO: Enable java 17 tests https://issues.apache.org/jira/browse/CASSANDRA-16895
-        pulsarImage: ['datastax/lunastreaming:2.10_3.4', 'apachepulsar/pulsar:2.10.3', 'apachepulsar/pulsar:2.11.0']
-        cassandraFamily: ['c3', 'c4', 'c5']
+        pulsarImage: ['datastax/lunastreaming:4.0_3.6']
+        cassandraFamily: ['c5']
     steps:
       - uses: actions/checkout@v3
       - name: Set up JDK ${{ matrix.jdk }}
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 0cc7fdfa..7c2493db 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -37,9 +37,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        module: ['agent', 'agent-c3', 'agent-c4', 'agent-c5', 'connector']
+        module: ['agent-c5', 'connector']
         jdk: ['11', '17']
-        pulsarImage: ['datastax/lunastreaming:2.10_3.4', 'apachepulsar/pulsar:2.10.3', 'apachepulsar/pulsar:2.11.0']
+        pulsarImage: ['datastax/lunastreaming:4.0_3.6']
     steps:
       - uses: actions/checkout@v3
       - name: Set up JDK ${{ matrix.jdk }}

From dc50743d046334b10de72e80350a2fdad6415e37 Mon Sep 17 00:00:00 2001
From: SreedevT <talisreedev@gmail.com>
Date: Tue, 30 Sep 2025 01:41:26 +0530
Subject: [PATCH 07/10] add c5 to backfill gradle

---
 backfill-cli/build.gradle | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/backfill-cli/build.gradle b/backfill-cli/build.gradle
index 313f4864..d78ba851 100644
--- a/backfill-cli/build.gradle
+++ b/backfill-cli/build.gradle
@@ -155,13 +155,19 @@ task e2eTest(type: Test) {
         systemProperty "cassandraFamily", "c4"
         systemProperty "agentBuildDir", project(':agent-c4').buildDir
         environment 'CASSANDRA_IMAGE', 'cassandra:' + cassandra4Version
+    } else if (cassandraFamily == "c5") {
+        dependsOn project(':agent-c5').shadowJar
+        systemProperty "cassandraFamily", "c5"
+        systemProperty "agentBuildDir", project(':agent-c5').buildDir
+        environment 'CASSANDRA_IMAGE', 'cassandra:' + cassandra5Version
     } else if (cassandraFamily == "dse4") {
         dependsOn project(':agent-dse4').shadowJar
         systemProperty "cassandraFamily", "dse4"
         systemProperty "agentBuildDir", project(':agent-dse4').buildDir
         environment 'CASSANDRA_IMAGE', 'datastax/dse-server:' + dse4Version
+
     } else {
-        throw new GradleException("Unknown Cassandra family. Use -PcassandraFamily=[c3|c4|dse4]")
+        throw new GradleException("Unknown Cassandra family. Use -PcassandraFamily=[c3|c4|c5|dse4]")
     }
 
     systemProperty "connectorBuildDir", project(':connector').buildDir

From f1227c62637ea0ad154dd9abbc94642941c6b640 Mon Sep 17 00:00:00 2001
From: SreedevT <talisreedev@gmail.com>
Date: Tue, 30 Sep 2025 02:23:03 +0530
Subject: [PATCH 08/10] backfill-cli c5 agent

---
 .../src/test/resources/c5/cassandra.yaml      | 1430 +++++++++++++++++
 .../src/test/resources/c5/logback.xml         |  103 ++
 2 files changed, 1533 insertions(+)
 create mode 100644 backfill-cli/src/test/resources/c5/cassandra.yaml
 create mode 100644 backfill-cli/src/test/resources/c5/logback.xml

diff --git a/backfill-cli/src/test/resources/c5/cassandra.yaml b/backfill-cli/src/test/resources/c5/cassandra.yaml
new file mode 100644
index 00000000..bba869a5
--- /dev/null
+++ b/backfill-cli/src/test/resources/c5/cassandra.yaml
@@ -0,0 +1,1430 @@
+# Cassandra storage config YAML
+
+# NOTE:
+#   See https://cassandra.apache.org/doc/latest/configuration/ for
+#   full explanations of configuration directives
+# /NOTE
+
+# The name of the cluster. This is mainly used to prevent machines in
+# one logical cluster from joining another.
+cluster_name: 'Test Cluster'
+
+# This defines the number of tokens randomly assigned to this node on the ring
+# The more tokens, relative to other nodes, the larger the proportion of data
+# that this node will store. You probably want all nodes to have the same number
+# of tokens assuming they have equal hardware capability.
+#
+# If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility,
+# and will use the initial_token as described below.
+#
+# Specifying initial_token will override this setting on the node's initial start,
+# on subsequent starts, this setting will apply even if initial token is set.
+#
+# See https://cassandra.apache.org/doc/latest/getting_started/production.html#tokens for
+# best practice information about num_tokens.
+#
+num_tokens: 16
+
+# Triggers automatic allocation of num_tokens tokens for this node. The allocation
+# algorithm attempts to choose tokens in a way that optimizes replicated load over
+# the nodes in the datacenter for the replica factor.
+#
+# The load assigned to each node will be close to proportional to its number of
+# vnodes.
+#
+# Only supported with the Murmur3Partitioner.
+
+# Replica factor is determined via the replication strategy used by the specified
+# keyspace.
+# allocate_tokens_for_keyspace: KEYSPACE
+
+# Replica factor is explicitly set, regardless of keyspace or datacenter.
+# This is the replica factor within the datacenter, like NTS.
+allocate_tokens_for_local_replication_factor: 3
+
+# initial_token allows you to specify tokens manually.  While you can use it with
+# vnodes (num_tokens > 1, above) -- in which case you should provide a
+# comma-separated list -- it's primarily used when adding nodes to legacy clusters
+# that do not have vnodes enabled.
+# initial_token:
+
+# May either be "true" or "false" to enable globally
+hinted_handoff_enabled: true
+
+# When hinted_handoff_enabled is true, a black list of data centers that will not
+# perform hinted handoff
+# hinted_handoff_disabled_datacenters:
+#    - DC1
+#    - DC2
+
+# this defines the maximum amount of time a dead host will have hints
+# generated.  After it has been dead this long, new hints for it will not be
+# created until it has been seen alive and gone down again.
+max_hint_window_in_ms: 10800000 # 3 hours
+
+# Maximum throttle in KBs per second, per delivery thread.  This will be
+# reduced proportionally to the number of nodes in the cluster.  (If there
+# are two nodes in the cluster, each delivery thread will use the maximum
+# rate; if there are three, each will throttle to half of the maximum,
+# since we expect two nodes to be delivering hints simultaneously.)
+hinted_handoff_throttle_in_kb: 1024
+
+# Number of threads with which to deliver hints;
+# Consider increasing this number when you have multi-dc deployments, since
+# cross-dc handoff tends to be slower
+max_hints_delivery_threads: 2
+
+# Directory where Cassandra should store hints.
+# If not set, the default directory is $CASSANDRA_HOME/data/hints.
+# hints_directory: /var/lib/cassandra/hints
+
+# How often hints should be flushed from the internal buffers to disk.
+# Will *not* trigger fsync.
+hints_flush_period_in_ms: 10000
+
+# Maximum size for a single hints file, in megabytes.
+max_hints_file_size_in_mb: 128
+
+# Compression to apply to the hint files. If omitted, hints files
+# will be written uncompressed. LZ4, Snappy, and Deflate compressors
+# are supported.
+#hints_compression:
+#   - class_name: LZ4Compressor
+#     parameters:
+#         -
+
+# Maximum throttle in KBs per second, total. This will be
+# reduced proportionally to the number of nodes in the cluster.
+batchlog_replay_throttle_in_kb: 1024
+
+# Authentication backend, implementing IAuthenticator; used to identify users
+# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator,
+# PasswordAuthenticator}.
+#
+# - AllowAllAuthenticator performs no checks - set it to disable authentication.
+# - PasswordAuthenticator relies on username/password pairs to authenticate
+#   users. It keeps usernames and hashed passwords in system_auth.roles table.
+#   Please increase system_auth keyspace replication factor if you use this authenticator.
+#   If using PasswordAuthenticator, CassandraRoleManager must also be used (see below)
+authenticator: AllowAllAuthenticator
+
+# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
+# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer,
+# CassandraAuthorizer}.
+#
+# - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
+# - CassandraAuthorizer stores permissions in system_auth.role_permissions table. Please
+#   increase system_auth keyspace replication factor if you use this authorizer.
+authorizer: AllowAllAuthorizer
+
+# Part of the Authentication & Authorization backend, implementing IRoleManager; used
+# to maintain grants and memberships between roles.
+# Out of the box, Cassandra provides org.apache.cassandra.auth.CassandraRoleManager,
+# which stores role information in the system_auth keyspace. Most functions of the
+# IRoleManager require an authenticated login, so unless the configured IAuthenticator
+# actually implements authentication, most of this functionality will be unavailable.
+#
+# - CassandraRoleManager stores role data in the system_auth keyspace. Please
+#   increase system_auth keyspace replication factor if you use this role manager.
+role_manager: CassandraRoleManager
+
+# Network authorization backend, implementing INetworkAuthorizer; used to restrict user
+# access to certain DCs
+# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllNetworkAuthorizer,
+# CassandraNetworkAuthorizer}.
+#
+# - AllowAllNetworkAuthorizer allows access to any DC to any user - set it to disable authorization.
+# - CassandraNetworkAuthorizer stores permissions in system_auth.network_permissions table. Please
+#   increase system_auth keyspace replication factor if you use this authorizer.
+network_authorizer: AllowAllNetworkAuthorizer
+
+# Validity period for roles cache (fetching granted roles can be an expensive
+# operation depending on the role manager, CassandraRoleManager is one example)
+# Granted roles are cached for authenticated sessions in AuthenticatedUser and
+# after the period specified here, become eligible for (async) reload.
+# Defaults to 2000, set to 0 to disable caching entirely.
+# Will be disabled automatically for AllowAllAuthenticator.
+roles_validity_in_ms: 2000
+
+# Refresh interval for roles cache (if enabled).
+# After this interval, cache entries become eligible for refresh. Upon next
+# access, an async reload is scheduled and the old value returned until it
+# completes. If roles_validity_in_ms is non-zero, then this must be
+# also.
+# Defaults to the same value as roles_validity_in_ms.
+# roles_update_interval_in_ms: 2000
+
+# Validity period for permissions cache (fetching permissions can be an
+# expensive operation depending on the authorizer, CassandraAuthorizer is
+# one example). Defaults to 2000, set to 0 to disable.
+# Will be disabled automatically for AllowAllAuthorizer.
+permissions_validity_in_ms: 2000
+
+# Refresh interval for permissions cache (if enabled).
+# After this interval, cache entries become eligible for refresh. Upon next
+# access, an async reload is scheduled and the old value returned until it
+# completes. If permissions_validity_in_ms is non-zero, then this must be
+# also.
+# Defaults to the same value as permissions_validity_in_ms.
+# permissions_update_interval_in_ms: 2000
+
+# Validity period for credentials cache. This cache is tightly coupled to
+# the provided PasswordAuthenticator implementation of IAuthenticator. If
+# another IAuthenticator implementation is configured, this cache will not
+# be automatically used and so the following settings will have no effect.
+# Please note, credentials are cached in their encrypted form, so while
+# activating this cache may reduce the number of queries made to the
+# underlying table, it may not  bring a significant reduction in the
+# latency of individual authentication attempts.
+# Defaults to 2000, set to 0 to disable credentials caching.
+credentials_validity_in_ms: 2000
+
+# Refresh interval for credentials cache (if enabled).
+# After this interval, cache entries become eligible for refresh. Upon next
+# access, an async reload is scheduled and the old value returned until it
+# completes. If credentials_validity_in_ms is non-zero, then this must be
+# also.
+# Defaults to the same value as credentials_validity_in_ms.
+# credentials_update_interval_in_ms: 2000
+
+# The partitioner is responsible for distributing groups of rows (by
+# partition key) across nodes in the cluster. The partitioner can NOT be
+# changed without reloading all data.  If you are adding nodes or upgrading,
+# you should set this to the same partitioner that you are currently using.
+#
+# The default partitioner is the Murmur3Partitioner. Older partitioners
+# such as the RandomPartitioner, ByteOrderedPartitioner, and
+# OrderPreservingPartitioner have been included for backward compatibility only.
+# For new clusters, you should NOT change this value.
+#
+partitioner: org.apache.cassandra.dht.Murmur3Partitioner
+
+# Directories where Cassandra should store data on disk. If multiple
+# directories are specified, Cassandra will spread data evenly across
+# them by partitioning the token ranges.
+# If not set, the default directory is $CASSANDRA_HOME/data/data.
+# data_file_directories:
+#     - /var/lib/cassandra/data
+
+# commit log.  when running on magnetic HDD, this should be a
+# separate spindle than the data directories.
+# If not set, the default directory is $CASSANDRA_HOME/data/commitlog.
+# commitlog_directory: /var/lib/cassandra/commitlog
+
+# Enable / disable CDC functionality on a per-node basis. This modifies the logic used
+# for write path allocation rejection (standard: never reject. cdc: reject Mutation
+# containing a CDC-enabled table if at space limit in cdc_raw_directory).
+cdc_enabled: true
+
+# CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the
+# segment contains mutations for a CDC-enabled table. This should be placed on a
+# separate spindle than the data directories. If not set, the default directory is
+# $CASSANDRA_HOME/data/cdc_raw.
+cdc_raw_directory: /var/lib/cassandra/cdc_raw
+
+# Policy for data disk failures:
+#
+# die
+#   shut down gossip and client transports and kill the JVM for any fs errors or
+#   single-sstable errors, so the node can be replaced.
+#
+# stop_paranoid
+#   shut down gossip and client transports even for single-sstable errors,
+#   kill the JVM for errors during startup.
+#
+# stop
+#   shut down gossip and client transports, leaving the node effectively dead, but
+#   can still be inspected via JMX, kill the JVM for errors during startup.
+#
+# best_effort
+#    stop using the failed disk and respond to requests based on
+#    remaining available sstables.  This means you WILL see obsolete
+#    data at CL.ONE!
+#
+# ignore
+#    ignore fatal errors and let requests fail, as in pre-1.2 Cassandra
+disk_failure_policy: stop
+
+# Policy for commit disk failures:
+#
+# die
+#   shut down the node and kill the JVM, so the node can be replaced.
+#
+# stop
+#   shut down the node, leaving the node effectively dead, but
+#   can still be inspected via JMX.
+#
+# stop_commit
+#   shutdown the commit log, letting writes collect but
+#   continuing to service reads, as in pre-2.0.5 Cassandra
+#
+# ignore
+#   ignore fatal errors and let the batches fail
+commit_failure_policy: stop
+
+# Maximum size of the native protocol prepared statement cache
+#
+# Valid values are either "auto" (omitting the value) or a value greater 0.
+#
+# Note that specifying a too large value will result in long running GCs and possbily
+# out-of-memory errors. Keep the value at a small fraction of the heap.
+#
+# If you constantly see "prepared statements discarded in the last minute because
+# cache limit reached" messages, the first step is to investigate the root cause
+# of these messages and check whether prepared statements are used correctly -
+# i.e. use bind markers for variable parts.
+#
+# Do only change the default value, if you really have more prepared statements than
+# fit in the cache. In most cases it is not neccessary to change this value.
+# Constantly re-preparing statements is a performance penalty.
+#
+# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater
+prepared_statements_cache_size_mb:
+
+# Maximum size of the key cache in memory.
+#
+# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the
+# minimum, sometimes more. The key cache is fairly tiny for the amount of
+# time it saves, so it's worthwhile to use it at large numbers.
+# The row cache saves even more time, but must contain the entire row,
+# so it is extremely space-intensive. It's best to only use the
+# row cache if you have hot rows or static rows.
+#
+# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
+#
+# Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache.
+key_cache_size_in_mb:
+
+# Duration in seconds after which Cassandra should
+# save the key cache. Caches are saved to saved_caches_directory as
+# specified in this configuration file.
+#
+# Saved caches greatly improve cold-start speeds, and is relatively cheap in
+# terms of I/O for the key cache. Row cache saving is much more expensive and
+# has limited use.
+#
+# Default is 14400 or 4 hours.
+key_cache_save_period: 14400
+
+# Number of keys from the key cache to save
+# Disabled by default, meaning all keys are going to be saved
+# key_cache_keys_to_save: 100
+
+# Row cache implementation class name. Available implementations:
+#
+# org.apache.cassandra.cache.OHCProvider
+#   Fully off-heap row cache implementation (default).
+#
+# org.apache.cassandra.cache.SerializingCacheProvider
+#   This is the row cache implementation availabile
+#   in previous releases of Cassandra.
+# row_cache_class_name: org.apache.cassandra.cache.OHCProvider
+
+# Maximum size of the row cache in memory.
+# Please note that OHC cache implementation requires some additional off-heap memory to manage
+# the map structures and some in-flight memory during operations before/after cache entries can be
+# accounted against the cache capacity. This overhead is usually small compared to the whole capacity.
+# Do not specify more memory that the system can afford in the worst usual situation and leave some
+# headroom for OS block level cache. Do never allow your system to swap.
+#
+# Default value is 0, to disable row caching.
+row_cache_size_in_mb: 0
+
+# Duration in seconds after which Cassandra should save the row cache.
+# Caches are saved to saved_caches_directory as specified in this configuration file.
+#
+# Saved caches greatly improve cold-start speeds, and is relatively cheap in
+# terms of I/O for the key cache. Row cache saving is much more expensive and
+# has limited use.
+#
+# Default is 0 to disable saving the row cache.
+row_cache_save_period: 0
+
+# Number of keys from the row cache to save.
+# Specify 0 (which is the default), meaning all keys are going to be saved
+# row_cache_keys_to_save: 100
+
+# Maximum size of the counter cache in memory.
+#
+# Counter cache helps to reduce counter locks' contention for hot counter cells.
+# In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before
+# write entirely. With RF > 1 a counter cache hit will still help to reduce the duration
+# of the lock hold, helping with hot counter cell updates, but will not allow skipping
+# the read entirely. Only the local (clock, count) tuple of a counter cell is kept
+# in memory, not the whole counter, so it's relatively cheap.
+#
+# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
+#
+# Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache.
+# NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache.
+counter_cache_size_in_mb:
+
+# Duration in seconds after which Cassandra should
+# save the counter cache (keys only). Caches are saved to saved_caches_directory as
+# specified in this configuration file.
+#
+# Default is 7200 or 2 hours.
+counter_cache_save_period: 7200
+
+# Number of keys from the counter cache to save
+# Disabled by default, meaning all keys are going to be saved
+# counter_cache_keys_to_save: 100
+
+# saved caches
+# If not set, the default directory is $CASSANDRA_HOME/data/saved_caches.
+# saved_caches_directory: /var/lib/cassandra/saved_caches
+
+# commitlog_sync may be either "periodic", "group", or "batch."
+#
+# When in batch mode, Cassandra won't ack writes until the commit log
+# has been flushed to disk.  Each incoming write will trigger the flush task.
+# commitlog_sync_batch_window_in_ms is a deprecated value. Previously it had
+# almost no value, and is being removed.
+#
+# commitlog_sync_batch_window_in_ms: 2
+#
+# group mode is similar to batch mode, where Cassandra will not ack writes
+# until the commit log has been flushed to disk. The difference is group
+# mode will wait up to commitlog_sync_group_window_in_ms between flushes.
+#
+# commitlog_sync_group_window_in_ms: 1000
+#
+# the default option is "periodic" where writes may be acked immediately
+# and the CommitLog is simply synced every commitlog_sync_period_in_ms
+# milliseconds.
+commitlog_sync: periodic
+commitlog_sync_period_in_ms: 10000
+
+# When in periodic commitlog mode, the number of milliseconds to block writes
+# while waiting for a slow disk flush to complete.
+# periodic_commitlog_sync_lag_block_in_ms:
+
+# The size of the individual commitlog file segments.  A commitlog
+# segment may be archived, deleted, or recycled once all the data
+# in it (potentially from each columnfamily in the system) has been
+# flushed to sstables.
+#
+# The default size is 32, which is almost always fine, but if you are
+# archiving commitlog segments (see commitlog_archiving.properties),
+# then you probably want a finer granularity of archiving; 8 or 16 MB
+# is reasonable.
+# Max mutation size is also configurable via max_mutation_size_in_kb setting in
+# cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024.
+# This should be positive and less than 2048.
+#
+# NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must
+# be set to at least twice the size of max_mutation_size_in_kb / 1024
+#
+commitlog_segment_size_in_mb: 1
+
+# Compression to apply to the commit log. If omitted, the commit log
+# will be written uncompressed.  LZ4, Snappy, and Deflate compressors
+# are supported.
+# commitlog_compression:
+#   - class_name: LZ4Compressor
+#     parameters:
+#         -
+
+# Compression to apply to SSTables as they flush for compressed tables.
+# Note that tables without compression enabled do not respect this flag.
+#
+# As high ratio compressors like LZ4HC, Zstd, and Deflate can potentially
+# block flushes for too long, the default is to flush with a known fast
+# compressor in those cases. Options are:
+#
+# none : Flush without compressing blocks but while still doing checksums.
+# fast : Flush with a fast compressor. If the table is already using a
+#        fast compressor that compressor is used.
+# table: Always flush with the same compressor that the table uses. This
+#        was the pre 4.0 behavior.
+#
+# flush_compression: fast
+
+# any class that implements the SeedProvider interface and has a
+# constructor that takes a Map<String, String> of parameters will do.
+seed_provider:
+    # Addresses of hosts that are deemed contact points.
+    # Cassandra nodes use this list of hosts to find each other and learn
+    # the topology of the ring.  You must change this if you are running
+    # multiple nodes!
+    - class_name: org.apache.cassandra.locator.SimpleSeedProvider
+      parameters:
+          # seeds is actually a comma-delimited list of addresses.
+          # Ex: "<ip1>,<ip2>,<ip3>"
+          - seeds: "127.0.0.1:7000"
+
+# For workloads with more data than can fit in memory, Cassandra's
+# bottleneck will be reads that need to fetch data from
+# disk. "concurrent_reads" should be set to (16 * number_of_drives) in
+# order to allow the operations to enqueue low enough in the stack
+# that the OS and drives can reorder them. Same applies to
+# "concurrent_counter_writes", since counter writes read the current
+# values before incrementing and writing them back.
+#
+# On the other hand, since writes are almost never IO bound, the ideal
+# number of "concurrent_writes" is dependent on the number of cores in
+# your system; (8 * number_of_cores) is a good rule of thumb.
+concurrent_reads: 32
+concurrent_writes: 32
+concurrent_counter_writes: 32
+
+# For materialized view writes, as there is a read involved, so this should
+# be limited by the less of concurrent reads or concurrent writes.
+concurrent_materialized_view_writes: 32
+
+# Maximum memory to use for inter-node and client-server networking buffers.
+#
+# Defaults to the smaller of 1/16 of heap or 128MB. This pool is allocated off-heap,
+# so is in addition to the memory allocated for heap. The cache also has on-heap
+# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size
+# if the default 64k chunk size is used).
+# Memory is only allocated when needed.
+# networking_cache_size_in_mb: 128
+
+# Enable the sstable chunk cache.  The chunk cache will store recently accessed
+# sections of the sstable in-memory as uncompressed buffers.
+# file_cache_enabled: false
+
+# Maximum memory to use for sstable chunk cache and buffer pooling.
+# 32MB of this are reserved for pooling buffers, the rest is used for chunk cache
+# that holds uncompressed sstable chunks.
+# Defaults to the smaller of 1/4 of heap or 512MB. This pool is allocated off-heap,
+# so is in addition to the memory allocated for heap. The cache also has on-heap
+# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size
+# if the default 64k chunk size is used).
+# Memory is only allocated when needed.
+# file_cache_size_in_mb: 512
+
+# Flag indicating whether to allocate on or off heap when the sstable buffer
+# pool is exhausted, that is when it has exceeded the maximum memory
+# file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request.
+
+# buffer_pool_use_heap_if_exhausted: true
+
+# The strategy for optimizing disk read
+# Possible values are:
+# ssd (for solid state disks, the default)
+# spinning (for spinning disks)
+# disk_optimization_strategy: ssd
+
+# Total permitted memory to use for memtables. Cassandra will stop
+# accepting writes when the limit is exceeded until a flush completes,
+# and will trigger a flush based on memtable_cleanup_threshold
+# If omitted, Cassandra will set both to 1/4 the size of the heap.
+# memtable_heap_space_in_mb: 2048
+# memtable_offheap_space_in_mb: 2048
+
+# memtable_cleanup_threshold is deprecated. The default calculation
+# is the only reasonable choice. See the comments on  memtable_flush_writers
+# for more information.
+#
+# Ratio of occupied non-flushing memtable size to total permitted size
+# that will trigger a flush of the largest memtable. Larger mct will
+# mean larger flushes and hence less compaction, but also less concurrent
+# flush activity which can make it difficult to keep your disks fed
+# under heavy write load.
+#
+# memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1)
+# memtable_cleanup_threshold: 0.11
+
+# Specify the way Cassandra allocates and manages memtable memory.
+# Options are:
+#
+# heap_buffers
+#   on heap nio buffers
+#
+# offheap_buffers
+#   off heap (direct) nio buffers
+#
+# offheap_objects
+#    off heap objects
+memtable_allocation_type: heap_buffers
+
+# Limit memory usage for Merkle tree calculations during repairs. The default
+# is 1/16th of the available heap. The main tradeoff is that smaller trees
+# have less resolution, which can lead to over-streaming data. If you see heap
+# pressure during repairs, consider lowering this, but you cannot go below
+# one megabyte. If you see lots of over-streaming, consider raising
+# this or using subrange repair.
+#
+# For more details see https://issues.apache.org/jira/browse/CASSANDRA-14096.
+#
+# repair_session_space_in_mb:
+
+# Total space to use for commit logs on disk.
+#
+# If space gets above this value, Cassandra will flush every dirty CF
+# in the oldest segment and remove it.  So a small total commitlog space
+# will tend to cause more flush activity on less-active columnfamilies.
+#
+# The default value is the smaller of 8192, and 1/4 of the total space
+# of the commitlog volume.
+#
+# commitlog_total_space_in_mb: 8192
+
+# This sets the number of memtable flush writer threads per disk
+# as well as the total number of memtables that can be flushed concurrently.
+# These are generally a combination of compute and IO bound.
+#
+# Memtable flushing is more CPU efficient than memtable ingest and a single thread
+# can keep up with the ingest rate of a whole server on a single fast disk
+# until it temporarily becomes IO bound under contention typically with compaction.
+# At that point you need multiple flush threads. At some point in the future
+# it may become CPU bound all the time.
+#
+# You can tell if flushing is falling behind using the MemtablePool.BlockedOnAllocation
+# metric which should be 0, but will be non-zero if threads are blocked waiting on flushing
+# to free memory.
+#
+# memtable_flush_writers defaults to two for a single data directory.
+# This means that two  memtables can be flushed concurrently to the single data directory.
+# If you have multiple data directories the default is one memtable flushing at a time
+# but the flush will use a thread per data directory so you will get two or more writers.
+#
+# Two is generally enough to flush on a fast disk [array] mounted as a single data directory.
+# Adding more flush writers will result in smaller more frequent flushes that introduce more
+# compaction overhead.
+#
+# There is a direct tradeoff between number of memtables that can be flushed concurrently
+# and flush size and frequency. More is not better you just need enough flush writers
+# to never stall waiting for flushing to free memory.
+#
+#memtable_flush_writers: 2
+
+# Total space to use for change-data-capture logs on disk.
+#
+# If space gets above this value, Cassandra will throw WriteTimeoutException
+# on Mutations including tables with CDC enabled. A CDCCompactor is responsible
+# for parsing the raw CDC logs and deleting them when parsing is completed.
+#
+# The default value is the min of 4096 mb and 1/8th of the total space
+# of the drive where cdc_raw_directory resides.
+# cdc_total_space_in_mb: 4096
+
+# When we hit our cdc_raw limit and the CDCCompactor is either running behind
+# or experiencing backpressure, we check at the following interval to see if any
+# new space for cdc-tracked tables has been made available. Default to 250ms
+# cdc_free_space_check_interval_ms: 250
+
+# A fixed memory pool size in MB for for SSTable index summaries. If left
+# empty, this will default to 5% of the heap size. If the memory usage of
+# all index summaries exceeds this limit, SSTables with low read rates will
+# shrink their index summaries in order to meet this limit.  However, this
+# is a best-effort process. In extreme conditions Cassandra may need to use
+# more than this amount of memory.
+index_summary_capacity_in_mb:
+
+# How frequently index summaries should be resampled.  This is done
+# periodically to redistribute memory from the fixed-size pool to sstables
+# proportional their recent read rates.  Setting to -1 will disable this
+# process, leaving existing index summaries at their current sampling level.
+index_summary_resize_interval_in_minutes: 60
+
+# Whether to, when doing sequential writing, fsync() at intervals in
+# order to force the operating system to flush the dirty
+# buffers. Enable this to avoid sudden dirty buffer flushing from
+# impacting read latencies. Almost always a good idea on SSDs; not
+# necessarily on platters.
+trickle_fsync: false
+trickle_fsync_interval_in_kb: 10240
+
+# TCP port, for commands and data
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+storage_port: 7000
+
+# SSL port, for legacy encrypted communication. This property is unused unless enabled in
+# server_encryption_options (see below). As of cassandra 4.0, this property is deprecated
+# as a single port can be used for either/both secure and insecure connections.
+# For security reasons, you should not expose this port to the internet. Firewall it if needed.
+ssl_storage_port: 7001
+
+# Address or interface to bind to and tell other Cassandra nodes to connect to.
+# You _must_ change this if you want multiple nodes to be able to communicate!
+#
+# Set listen_address OR listen_interface, not both.
+#
+# Leaving it blank leaves it up to InetAddress.getLocalHost(). This
+# will always do the Right Thing _if_ the node is properly configured
+# (hostname, name resolution, etc), and the Right Thing is to use the
+# address associated with the hostname (it might not be). If unresolvable
+# it will fall back to InetAddress.getLoopbackAddress(), which is wrong for production systems.
+#
+# Setting listen_address to 0.0.0.0 is always wrong.
+#
+listen_address: localhost
+
+# Set listen_address OR listen_interface, not both. Interfaces must correspond
+# to a single address, IP aliasing is not supported.
+# listen_interface: eth0
+
+# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
+# you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4
+# address will be used. If true the first ipv6 address will be used. Defaults to false preferring
+# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
+# listen_interface_prefer_ipv6: false
+
+# Address to broadcast to other Cassandra nodes
+# Leaving this blank will set it to the same value as listen_address
+# broadcast_address: 1.2.3.4
+
+# When using multiple physical network interfaces, set this
+# to true to listen on broadcast_address in addition to
+# the listen_address, allowing nodes to communicate in both
+# interfaces.
+# Ignore this property if the network configuration automatically
+# routes  between the public and private networks such as EC2.
+# listen_on_broadcast_address: false
+
+# Internode authentication backend, implementing IInternodeAuthenticator;
+# used to allow/disallow connections from peer nodes.
+# internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator
+
+# Whether to start the native transport server.
+# The address on which the native transport is bound is defined by rpc_address.
+start_native_transport: true
+# port for the CQL native transport to listen for clients on
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+native_transport_port: 9042
+# Enabling native transport encryption in client_encryption_options allows you to either use
+# encryption for the standard port or to use a dedicated, additional port along with the unencrypted
+# standard native_transport_port.
+# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption
+# for native_transport_port. Setting native_transport_port_ssl to a different value
+# from native_transport_port will use encryption for native_transport_port_ssl while
+# keeping native_transport_port unencrypted.
+# native_transport_port_ssl: 9142
+# The maximum threads for handling requests (note that idle threads are stopped
+# after 30 seconds so there is not corresponding minimum setting).
+# native_transport_max_threads: 128
+#
+# The maximum size of allowed frame. Frame (requests) larger than this will
+# be rejected as invalid. The default is 256MB. If you're changing this parameter,
+# you may want to adjust max_value_size_in_mb accordingly. This should be positive and less than 2048.
+# native_transport_max_frame_size_in_mb: 256
+
+# The maximum number of concurrent client connections.
+# The default is -1, which means unlimited.
+# native_transport_max_concurrent_connections: -1
+
+# The maximum number of concurrent client connections per source ip.
+# The default is -1, which means unlimited.
+# native_transport_max_concurrent_connections_per_ip: -1
+
+# Controls whether Cassandra honors older, yet currently supported, protocol versions.
+# The default is true, which means all supported protocols will be honored.
+native_transport_allow_older_protocols: true
+
+# Controls when idle client connections are closed. Idle connections are ones that had neither reads
+# nor writes for a time period.
+#
+# Clients may implement heartbeats by sending OPTIONS native protocol message after a timeout, which
+# will reset idle timeout timer on the server side. To close idle client connections, corresponding
+# values for heartbeat intervals have to be set on the client side.
+#
+# Idle connection timeouts are disabled by default.
+# native_transport_idle_timeout_in_ms: 60000
+
+# The address or interface to bind the native transport server to.
+#
+# Set rpc_address OR rpc_interface, not both.
+#
+# Leaving rpc_address blank has the same effect as on listen_address
+# (i.e. it will be based on the configured hostname of the node).
+#
+# Note that unlike listen_address, you can specify 0.0.0.0, but you must also
+# set broadcast_rpc_address to a value other than 0.0.0.0.
+#
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+rpc_address: localhost
+
+# Set rpc_address OR rpc_interface, not both. Interfaces must correspond
+# to a single address, IP aliasing is not supported.
+# rpc_interface: eth1
+
+# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
+# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4
+# address will be used. If true the first ipv6 address will be used. Defaults to false preferring
+# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
+# rpc_interface_prefer_ipv6: false
+
+# RPC address to broadcast to drivers and other Cassandra nodes. This cannot
+# be set to 0.0.0.0. If left blank, this will be set to the value of
+# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must
+# be set.
+# broadcast_rpc_address: 1.2.3.4
+
+# enable or disable keepalive on rpc/native connections
+rpc_keepalive: true
+
+# Uncomment to set socket buffer size for internode communication
+# Note that when setting this, the buffer size is limited by net.core.wmem_max
+# and when not setting it it is defined by net.ipv4.tcp_wmem
+# See also:
+# /proc/sys/net/core/wmem_max
+# /proc/sys/net/core/rmem_max
+# /proc/sys/net/ipv4/tcp_wmem
+# /proc/sys/net/ipv4/tcp_wmem
+# and 'man tcp'
+# internode_send_buff_size_in_bytes:
+
+# Uncomment to set socket buffer size for internode communication
+# Note that when setting this, the buffer size is limited by net.core.wmem_max
+# and when not setting it it is defined by net.ipv4.tcp_wmem
+# internode_recv_buff_size_in_bytes:
+
+# Set to true to have Cassandra create a hard link to each sstable
+# flushed or streamed locally in a backups/ subdirectory of the
+# keyspace data.  Removing these links is the operator's
+# responsibility.
+incremental_backups: false
+
+# Whether or not to take a snapshot before each compaction.  Be
+# careful using this option, since Cassandra won't clean up the
+# snapshots for you.  Mostly useful if you're paranoid when there
+# is a data format change.
+snapshot_before_compaction: false
+
+# Whether or not a snapshot is taken of the data before keyspace truncation
+# or dropping of column families. The STRONGLY advised default of true
+# should be used to provide data safety. If you set this flag to false, you will
+# lose data on truncation or drop.
+auto_snapshot: true
+
+# The act of creating or clearing a snapshot involves creating or removing
+# potentially tens of thousands of links, which can cause significant performance
+# impact, especially on consumer grade SSDs. A non-zero value here can
+# be used to throttle these links to avoid negative performance impact of
+# taking and clearing snapshots
+snapshot_links_per_second: 0
+
+# Granularity of the collation index of rows within a partition.
+# Increase if your rows are large, or if you have a very large
+# number of rows per partition.  The competing goals are these:
+#
+# - a smaller granularity means more index entries are generated
+#   and looking up rows withing the partition by collation column
+#   is faster
+# - but, Cassandra will keep the collation index in memory for hot
+#   rows (as part of the key cache), so a larger granularity means
+#   you can cache more hot rows
+column_index_size_in_kb: 64
+
+# Per sstable indexed key cache entries (the collation index in memory
+# mentioned above) exceeding this size will not be held on heap.
+# This means that only partition information is held on heap and the
+# index entries are read from disk.
+#
+# Note that this size refers to the size of the
+# serialized index information and not the size of the partition.
+column_index_cache_size_in_kb: 2
+
+# Number of simultaneous compactions to allow, NOT including
+# validation "compactions" for anti-entropy repair.  Simultaneous
+# compactions can help preserve read performance in a mixed read/write
+# workload, by mitigating the tendency of small sstables to accumulate
+# during a single long running compactions. The default is usually
+# fine and if you experience problems with compaction running too
+# slowly or too fast, you should look at
+# compaction_throughput_mb_per_sec first.
+#
+# concurrent_compactors defaults to the smaller of (number of disks,
+# number of cores), with a minimum of 2 and a maximum of 8.
+#
+# If your data directories are backed by SSD, you should increase this
+# to the number of cores.
+#concurrent_compactors: 1
+
+# Number of simultaneous repair validations to allow. If not set or set to
+# a value less than 1, it defaults to the value of concurrent_compactors.
+# To set a value greeater than concurrent_compactors at startup, the system
+# property cassandra.allow_unlimited_concurrent_validations must be set to
+# true. To dynamically resize to a value > concurrent_compactors on a running
+# node, first call the bypassConcurrentValidatorsLimit method on the
+# org.apache.cassandra.db:type=StorageService mbean
+# concurrent_validations: 0
+
+# Number of simultaneous materialized view builder tasks to allow.
+concurrent_materialized_view_builders: 1
+
+# Throttles compaction to the given total throughput across the entire
+# system. The faster you insert data, the faster you need to compact in
+# order to keep the sstable count down, but in general, setting this to
+# 16 to 32 times the rate you are inserting data is more than sufficient.
+# Setting this to 0 disables throttling. Note that this accounts for all types
+# of compaction, including validation compaction (building Merkle trees
+# for repairs).
+compaction_throughput_mb_per_sec: 64
+
+# When compacting, the replacement sstable(s) can be opened before they
+# are completely written, and used in place of the prior sstables for
+# any range that has been written. This helps to smoothly transfer reads
+# between the sstables, reducing page cache churn and keeping hot rows hot
+sstable_preemptive_open_interval_in_mb: 50
+
+# When enabled, permits Cassandra to zero-copy stream entire eligible
+# SSTables between nodes, including every component.
+# This speeds up the network transfer significantly subject to
+# throttling specified by stream_throughput_outbound_megabits_per_sec.
+# Enabling this will reduce the GC pressure on sending and receiving node.
+# When unset, the default is enabled. While this feature tries to keep the
+# disks balanced, it cannot guarantee it. This feature will be automatically
+# disabled if internode encryption is enabled. Currently this can be used with
+# Leveled Compaction. Once CASSANDRA-14586 is fixed other compaction strategies
+# will benefit as well when used in combination with CASSANDRA-6696.
+# stream_entire_sstables: true
+
+# Throttles all outbound streaming file transfers on this node to the
+# given total throughput in Mbps. This is necessary because Cassandra does
+# mostly sequential IO when streaming data during bootstrap or repair, which
+# can lead to saturating the network connection and degrading rpc performance.
+# When unset, the default is 200 Mbps or 25 MB/s.
+# stream_throughput_outbound_megabits_per_sec: 200
+
+# Throttles all streaming file transfer between the datacenters,
+# this setting allows users to throttle inter dc stream throughput in addition
+# to throttling all network stream traffic as configured with
+# stream_throughput_outbound_megabits_per_sec
+# When unset, the default is 200 Mbps or 25 MB/s
+# inter_dc_stream_throughput_outbound_megabits_per_sec: 200
+
+# How long the coordinator should wait for read operations to complete.
+# Lowest acceptable value is 10 ms.
+read_request_timeout_in_ms: 5000
+# How long the coordinator should wait for seq or index scans to complete.
+# Lowest acceptable value is 10 ms.
+range_request_timeout_in_ms: 10000
+# How long the coordinator should wait for writes to complete.
+# Lowest acceptable value is 10 ms.
+write_request_timeout_in_ms: 2000
+# How long the coordinator should wait for counter writes to complete.
+# Lowest acceptable value is 10 ms.
+counter_write_request_timeout_in_ms: 5000
+# How long a coordinator should continue to retry a CAS operation
+# that contends with other proposals for the same row.
+# Lowest acceptable value is 10 ms.
+cas_contention_timeout_in_ms: 1000
+# How long the coordinator should wait for truncates to complete
+# (This can be much longer, because unless auto_snapshot is disabled
+# we need to flush first so we can snapshot before removing the data.)
+# Lowest acceptable value is 10 ms.
+truncate_request_timeout_in_ms: 60000
+# The default timeout for other, miscellaneous operations.
+# Lowest acceptable value is 10 ms.
+request_timeout_in_ms: 10000
+
+# Defensive settings for protecting Cassandra from true network partitions.
+# See (CASSANDRA-14358) for details.
+#
+# The amount of time to wait for internode tcp connections to establish.
+# internode_tcp_connect_timeout_in_ms = 2000
+#
+# The amount of time unacknowledged data is allowed on a connection before we throw out the connection
+# Note this is only supported on Linux + epoll, and it appears to behave oddly above a setting of 30000
+# (it takes much longer than 30s) as of Linux 4.12. If you want something that high set this to 0
+# which picks up the OS default and configure the net.ipv4.tcp_retries2 sysctl to be ~8.
+# internode_tcp_user_timeout_in_ms = 30000
+
+# The amount of time unacknowledged data is allowed on a streaming connection.
+# The default is 5 minutes. Increase it or set it to 0 in order to increase the timeout.
+# internode_streaming_tcp_user_timeout_in_ms = 300000
+
+# The maximum continuous period a connection may be unwritable in application space
+# internode_application_timeout_in_ms = 30000
+
+# Global, per-endpoint and per-connection limits imposed on messages queued for delivery to other nodes
+# and waiting to be processed on arrival from other nodes in the cluster.  These limits are applied to the on-wire
+# size of the message being sent or received.
+#
+# The basic per-link limit is consumed in isolation before any endpoint or global limit is imposed.
+# Each node-pair has three links: urgent, small and large.  So any given node may have a maximum of
+# N*3*(internode_application_send_queue_capacity_in_bytes+internode_application_receive_queue_capacity_in_bytes)
+# messages queued without any coordination between them although in practice, with token-aware routing, only RF*tokens
+# nodes should need to communicate with significant bandwidth.
+#
+# The per-endpoint limit is imposed on all messages exceeding the per-link limit, simultaneously with the global limit,
+# on all links to or from a single node in the cluster.
+# The global limit is imposed on all messages exceeding the per-link limit, simultaneously with the per-endpoint limit,
+# on all links to or from any node in the cluster.
+#
+# internode_application_send_queue_capacity_in_bytes: 4194304                       #4MiB
+# internode_application_send_queue_reserve_endpoint_capacity_in_bytes: 134217728    #128MiB
+# internode_application_send_queue_reserve_global_capacity_in_bytes: 536870912      #512MiB
+# internode_application_receive_queue_capacity_in_bytes: 4194304                    #4MiB
+# internode_application_receive_queue_reserve_endpoint_capacity_in_bytes: 134217728 #128MiB
+# internode_application_receive_queue_reserve_global_capacity_in_bytes: 536870912   #512MiB
+
+
+# How long before a node logs slow queries. Select queries that take longer than
+# this timeout to execute, will generate an aggregated log message, so that slow queries
+# can be identified. Set this value to zero to disable slow query logging.
+slow_query_log_timeout_in_ms: 500
+
+# Enable operation timeout information exchange between nodes to accurately
+# measure request timeouts.  If disabled, replicas will assume that requests
+# were forwarded to them instantly by the coordinator, which means that
+# under overload conditions we will waste that much extra time processing
+# already-timed-out requests.
+#
+# Warning: It is generally assumed that users have setup NTP on their clusters, and that clocks are modestly in sync,
+# since this is a requirement for general correctness of last write wins.
+#cross_node_timeout: true
+
+# Set keep-alive period for streaming
+# This node will send a keep-alive message periodically with this period.
+# If the node does not receive a keep-alive message from the peer for
+# 2 keep-alive cycles the stream session times out and fail
+# Default value is 300s (5 minutes), which means stalled stream
+# times out in 10 minutes by default
+# streaming_keep_alive_period_in_secs: 300
+
+# Limit number of connections per host for streaming
+# Increase this when you notice that joins are CPU-bound rather that network
+# bound (for example a few nodes with big files).
+# streaming_connections_per_host: 1
+
+
+# phi value that must be reached for a host to be marked down.
+# most users should never need to adjust this.
+# phi_convict_threshold: 8
+
+# endpoint_snitch -- Set this to a class that implements
+# IEndpointSnitch.  The snitch has two functions:
+#
+# - it teaches Cassandra enough about your network topology to route
+#   requests efficiently
+# - it allows Cassandra to spread replicas around your cluster to avoid
+#   correlated failures. It does this by grouping machines into
+#   "datacenters" and "racks."  Cassandra will do its best not to have
+#   more than one replica on the same "rack" (which may not actually
+#   be a physical location)
+#
+# CASSANDRA WILL NOT ALLOW YOU TO SWITCH TO AN INCOMPATIBLE SNITCH
+# ONCE DATA IS INSERTED INTO THE CLUSTER.  This would cause data loss.
+# This means that if you start with the default SimpleSnitch, which
+# locates every node on "rack1" in "datacenter1", your only options
+# if you need to add another datacenter are GossipingPropertyFileSnitch
+# (and the older PFS).  From there, if you want to migrate to an
+# incompatible snitch like Ec2Snitch you can do it by adding new nodes
+# under Ec2Snitch (which will locate them in a new "datacenter") and
+# decommissioning the old ones.
+#
+# Out of the box, Cassandra provides:
+#
+# SimpleSnitch:
+#    Treats Strategy order as proximity. This can improve cache
+#    locality when disabling read repair.  Only appropriate for
+#    single-datacenter deployments.
+#
+# GossipingPropertyFileSnitch
+#    This should be your go-to snitch for production use.  The rack
+#    and datacenter for the local node are defined in
+#    cassandra-rackdc.properties and propagated to other nodes via
+#    gossip.  If cassandra-topology.properties exists, it is used as a
+#    fallback, allowing migration from the PropertyFileSnitch.
+#
+# PropertyFileSnitch:
+#    Proximity is determined by rack and data center, which are
+#    explicitly configured in cassandra-topology.properties.
+#
+# Ec2Snitch:
+#    Appropriate for EC2 deployments in a single Region. Loads Region
+#    and Availability Zone information from the EC2 API. The Region is
+#    treated as the datacenter, and the Availability Zone as the rack.
+#    Only private IPs are used, so this will not work across multiple
+#    Regions.
+#
+# Ec2MultiRegionSnitch:
+#    Uses public IPs as broadcast_address to allow cross-region
+#    connectivity.  (Thus, you should set seed addresses to the public
+#    IP as well.) You will need to open the storage_port or
+#    ssl_storage_port on the public IP firewall.  (For intra-Region
+#    traffic, Cassandra will switch to the private IP after
+#    establishing a connection.)
+#
+# RackInferringSnitch:
+#    Proximity is determined by rack and data center, which are
+#    assumed to correspond to the 3rd and 2nd octet of each node's IP
+#    address, respectively.  Unless this happens to match your
+#    deployment conventions, this is best used as an example of
+#    writing a custom Snitch class and is provided in that spirit.
+#
+# You can use a custom Snitch by setting this to the full class name
+# of the snitch, which will be assumed to be on your classpath.
+endpoint_snitch: SimpleSnitch
+
+# controls how often to perform the more expensive part of host score
+# calculation
+dynamic_snitch_update_interval_in_ms: 100
+# controls how often to reset all host scores, allowing a bad host to
+# possibly recover
+dynamic_snitch_reset_interval_in_ms: 600000
+# if set greater than zero, this will allow
+# 'pinning' of replicas to hosts in order to increase cache capacity.
+# The badness threshold will control how much worse the pinned host has to be
+# before the dynamic snitch will prefer other replicas over it.  This is
+# expressed as a double which represents a percentage.  Thus, a value of
+# 0.2 means Cassandra would continue to prefer the static snitch values
+# until the pinned host was 20% worse than the fastest.
+dynamic_snitch_badness_threshold: 1.0
+
+# Configure server-to-server internode encryption
+#
+# JVM and netty defaults for supported SSL socket protocols and cipher suites can
+# be replaced using custom encryption options. This is not recommended
+# unless you have policies in place that dictate certain settings, or
+# need to disable vulnerable ciphers or protocols in case the JVM cannot
+# be updated.
+#
+# FIPS compliant settings can be configured at JVM level and should not
+# involve changing encryption settings here:
+# https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/FIPS.html
+#
+# **NOTE** this default configuration is an insecure configuration. If you need to
+# enable server-to-server encryption generate server keystores (and truststores for mutual
+# authentication) per:
+# http://download.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
+# Then perform the following configuration changes:
+#
+# Step 1: Set internode_encryption=<dc|rack|all> and explicitly set optional=true. Restart all nodes
+#
+# Step 2: Set optional=false (or remove it) and if you generated truststores and want to use mutual
+# auth set require_client_auth=true. Restart all nodes
+server_encryption_options:
+    # On outbound connections, determine which type of peers to securely connect to.
+    #   The available options are :
+    #     none : Do not encrypt outgoing connections
+    #     dc   : Encrypt connections to peers in other datacenters but not within datacenters
+    #     rack : Encrypt connections to peers in other racks but not within racks
+    #     all  : Always use encrypted connections
+    internode_encryption: none
+    # When set to true, encrypted and unencrypted connections are allowed on the storage_port
+    # This should _only be true_ while in unencrypted or transitional operation
+    # optional defaults to true if internode_encryption is none
+    # optional: true
+    # If enabled, will open up an encrypted listening socket on ssl_storage_port. Should only be used
+    # during upgrade to 4.0; otherwise, set to false.
+    enable_legacy_ssl_storage_port: false
+    # Set to a valid keystore if internode_encryption is dc, rack or all
+    keystore: conf/.keystore
+    keystore_password: cassandra
+    # Verify peer server certificates
+    require_client_auth: false
+    # Set to a valid trustore if require_client_auth is true
+    truststore: conf/.truststore
+    truststore_password: cassandra
+    # Verify that the host name in the certificate matches the connected host
+    require_endpoint_verification: false
+    # More advanced defaults:
+    # protocol: TLS
+    # store_type: JKS
+    # cipher_suites: [
+    #   TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
+    #   TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
+    #   TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_128_CBC_SHA,
+    #   TLS_RSA_WITH_AES_256_CBC_SHA
+    # ]
+
+# Configure client-to-server encryption.
+#
+# **NOTE** this default configuration is an insecure configuration. If you need to
+# enable client-to-server encryption generate server keystores (and truststores for mutual
+# authentication) per:
+# http://download.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
+# Then perform the following configuration changes:
+#
+# Step 1: Set enabled=true and explicitly set optional=true. Restart all nodes
+#
+# Step 2: Set optional=false (or remove it) and if you generated truststores and want to use mutual
+# auth set require_client_auth=true. Restart all nodes
+client_encryption_options:
+    # Enable client-to-server encryption
+    enabled: false
+    # When set to true, encrypted and unencrypted connections are allowed on the native_transport_port
+    # This should _only be true_ while in unencrypted or transitional operation
+    # optional defaults to true when enabled is false, and false when enabled is true.
+    # optional: true
+    # Set keystore and keystore_password to valid keystores if enabled is true
+    keystore: conf/.keystore
+    keystore_password: cassandra
+    # Verify client certificates
+    require_client_auth: false
+    # Set trustore and truststore_password if require_client_auth is true
+    # truststore: conf/.truststore
+    # truststore_password: cassandra
+    # More advanced defaults:
+    # protocol: TLS
+    # store_type: JKS
+    # cipher_suites: [
+    #   TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
+    #   TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
+    #   TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_128_CBC_SHA,
+    #   TLS_RSA_WITH_AES_256_CBC_SHA
+    # ]
+
+# internode_compression controls whether traffic between nodes is
+# compressed.
+# Can be:
+#
+# all
+#   all traffic is compressed
+#
+# dc
+#   traffic between different datacenters is compressed
+#
+# none
+#   nothing is compressed.
+internode_compression: dc
+
+# Enable or disable tcp_nodelay for inter-dc communication.
+# Disabling it will result in larger (but fewer) network packets being sent,
+# reducing overhead from the TCP protocol itself, at the cost of increasing
+# latency if you block for cross-datacenter responses.
+inter_dc_tcp_nodelay: false
+
+# TTL for different trace types used during logging of the repair process.
+tracetype_query_ttl: 86400
+tracetype_repair_ttl: 604800
+
+# If unset, all GC Pauses greater than gc_log_threshold_in_ms will log at
+# INFO level
+# UDFs (user defined functions) are disabled by default.
+# As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code.
+enable_user_defined_functions: false
+
+# Enables scripted UDFs (JavaScript UDFs).
+# Java UDFs are always enabled, if enable_user_defined_functions is true.
+# Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider.
+# This option has no effect, if enable_user_defined_functions is false.
+enable_scripted_user_defined_functions: false
+
+# The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation.
+# Lowering this value on Windows can provide much tighter latency and better throughput, however
+# some virtualized environments may see a negative performance impact from changing this setting
+# below their system default. The sysinternals 'clockres' tool can confirm your system's default
+# setting.
+windows_timer_interval: 1
+
+
+# Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from
+# a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by
+# the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys
+# can still (and should!) be in the keystore and will be used on decrypt operations
+# (to handle the case of key rotation).
+#
+# It is strongly recommended to download and install Java Cryptography Extension (JCE)
+# Unlimited Strength Jurisdiction Policy Files for your version of the JDK.
+# (current link: http://www.oracle.com/technetwork/java/javase/downloads/jce8-download-2133166.html)
+#
+# Currently, only the following file types are supported for transparent data encryption, although
+# more are coming in future cassandra releases: commitlog, hints
+transparent_data_encryption_options:
+    enabled: false
+    chunk_length_kb: 64
+    cipher: AES/CBC/PKCS5Padding
+    key_alias: testing:1
+    # CBC IV length for AES needs to be 16 bytes (which is also the default size)
+    # iv_length: 16
+    key_provider:
+      - class_name: org.apache.cassandra.security.JKSKeyProvider
+        parameters:
+          - keystore: conf/.keystore
+            keystore_password: cassandra
+            store_type: JCEKS
+            key_password: cassandra
+
+
+#####################
+# SAFETY THRESHOLDS #
+#####################
+
+# When executing a scan, within or across a partition, we need to keep the
+# tombstones seen in memory so we can return them to the coordinator, which
+# will use them to make sure other replicas also know about the deleted rows.
+# With workloads that generate a lot of tombstones, this can cause performance
+# problems and even exaust the server heap.
+# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets)
+# Adjust the thresholds here if you understand the dangers and want to
+# scan more tombstones anyway.  These thresholds may also be adjusted at runtime
+# using the StorageService mbean.
+tombstone_warn_threshold: 1000
+tombstone_failure_threshold: 100000
+
+# Filtering and secondary index queries at read consistency levels above ONE/LOCAL_ONE use a
+# mechanism called replica filtering protection to ensure that results from stale replicas do
+# not violate consistency. (See CASSANDRA-8272 and CASSANDRA-15907 for more details.) This
+# mechanism materializes replica results by partition on-heap at the coordinator. The more possibly
+# stale results returned by the replicas, the more rows materialized during the query.
+replica_filtering_protection:
+    # These thresholds exist to limit the damage severely out-of-date replicas can cause during these
+    # queries. They limit the number of rows from all replicas individual index and filtering queries
+    # can materialize on-heap to return correct results at the desired read consistency level.
+    #
+    # "cached_replica_rows_warn_threshold" is the per-query threshold at which a warning will be logged.
+    # "cached_replica_rows_fail_threshold" is the per-query threshold at which the query will fail.
+    #
+    # These thresholds may also be adjusted at runtime using the StorageService mbean.
+    #
+    # If the failure threshold is breached, it is likely that either the current page/fetch size
+    # is too large or one or more replicas is severely out-of-sync and in need of repair.
+    cached_rows_warn_threshold: 2000
+    cached_rows_fail_threshold: 32000
+
+# Log WARN on any multiple-partition batch size exceeding this value. 5kb per batch by default.
+# Caution should be taken on increasing the size of this threshold as it can lead to node instability.
+batch_size_warn_threshold_in_kb: 5
+
+# Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default.
+batch_size_fail_threshold_in_kb: 50
+
+# Log WARN on any batches not of type LOGGED than span across more partitions than this limit
+unlogged_batch_across_partitions_warn_threshold: 10
+
+# Log a warning when compacting partitions larger than this value
+compaction_large_partition_warning_threshold_mb: 100
+
+# GC Pauses greater than 200 ms will be logged at INFO level
+# This threshold can be adjusted to minimize logging if necessary
+# gc_log_threshold_in_ms: 200
+
+# GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level
+# Adjust the threshold based on your application throughput requirement. Setting to 0
+# will deactivate the feature.
+# gc_warn_threshold_in_ms: 1000
+
+# Maximum size of any value in SSTables. Safety measure to detect SSTable corruption
+# early. Any value size larger than this threshold will result into marking an SSTable
+# as corrupted. This should be positive and less than 2048.
+# max_value_size_in_mb: 256
+
+# Coalescing Strategies #
+# Coalescing multiples messages turns out to significantly boost message processing throughput (think doubling or more).
+# On bare metal, the floor for packet processing throughput is high enough that many applications won't notice, but in
+# virtualized environments, the point at which an application can be bound by network packet processing can be
+# surprisingly low compared to the throughput of task processing that is possible inside a VM. It's not that bare metal
+# doesn't benefit from coalescing messages, it's that the number of packets a bare metal network interface can process
+# is sufficient for many applications such that no load starvation is experienced even without coalescing.
+# There are other benefits to coalescing network messages that are harder to isolate with a simple metric like messages
+# per second. By coalescing multiple tasks together, a network thread can process multiple messages for the cost of one
+# trip to read from a socket, and all the task submission work can be done at the same time reducing context switching
+# and increasing cache friendliness of network message processing.
+# See CASSANDRA-8692 for details.
+
+# Strategy to use for coalescing messages in OutboundTcpConnection.
+# Can be fixed, movingaverage, timehorizon, disabled (default).
+# You can also specify a subclass of CoalescingStrategies.CoalescingStrategy by name.
+# otc_coalescing_strategy: DISABLED
+
+# How many microseconds to wait for coalescing. For fixed strategy this is the amount of time after the first
+# message is received before it will be sent with any accompanying messages. For moving average this is the
+# maximum amount of time that will be waited as well as the interval at which messages must arrive on average
+# for coalescing to be enabled.
+# otc_coalescing_window_us: 200
+
+# Do not try to coalesce messages if we already got that many messages. This should be more than 2 and less than 128.
+# otc_coalescing_enough_coalesced_messages: 8
+
+# How many milliseconds to wait between two expiration runs on the backlog (queue) of the OutboundTcpConnection.
+# Expiration is done if messages are piling up in the backlog. Droppable messages are expired to free the memory
+# taken by expired messages. The interval should be between 0 and 1000, and in most installations the default value
+# will be appropriate. A smaller value could potentially expire messages slightly sooner at the expense of more CPU
+# time and queue contention while iterating the backlog of messages.
+# An interval of 0 disables any wait time, which is the behavior of former Cassandra versions.
+#
+# otc_backlog_expiration_interval_ms: 200
+
+# Track a metric per keyspace indicating whether replication achieved the ideal consistency
+# level for writes without timing out. This is different from the consistency level requested by
+# each write which may be lower in order to facilitate availability.
+# ideal_consistency_level: EACH_QUORUM
+
+# Automatically upgrade sstables after upgrade - if there is no ordinary compaction to do, the
+# oldest non-upgraded sstable will get upgraded to the latest version
+# automatic_sstable_upgrade: false
+# Limit the number of concurrent sstable upgrades
+# max_concurrent_automatic_sstable_upgrades: 1
+
+# Audit logging - Logs every incoming CQL command request, authentication to a node. See the docs
+# on audit_logging for full details about the various configuration options.
+audit_logging_options:
+    enabled: false
+    logger:
+      - class_name: BinAuditLogger
+    # audit_logs_dir:
+    # included_keyspaces:
+    # excluded_keyspaces: system, system_schema, system_virtual_schema
+    # included_categories:
+    # excluded_categories:
+    # included_users:
+    # excluded_users:
+    # roll_cycle: HOURLY
+    # block: true
+    # max_queue_weight: 268435456 # 256 MiB
+    # max_log_size: 17179869184 # 16 GiB
+    ## archive command is "/path/to/script.sh %path" where %path is replaced with the file being rolled:
+    # archive_command:
+    # max_archive_retries: 10
+
+
+# default options for full query logging - these can be overridden from command line when executing
+# nodetool enablefullquerylog
+#full_query_logging_options:
+    # log_dir:
+    # roll_cycle: HOURLY
+    # block: true
+    # max_queue_weight: 268435456 # 256 MiB
+    # max_log_size: 17179869184 # 16 GiB
+    ## archive command is "/path/to/script.sh %path" where %path is replaced with the file being rolled:
+    # archive_command:
+    # max_archive_retries: 10
+
+# validate tombstones on reads and compaction
+# can be either "disabled", "warn" or "exception"
+# corrupted_tombstone_strategy: disabled
+
+# Diagnostic Events #
+# If enabled, diagnostic events can be helpful for troubleshooting operational issues. Emitted events contain details
+# on internal state and temporal relationships across events, accessible by clients via JMX.
+diagnostic_events_enabled: false
+
+# Use native transport TCP message coalescing. If on upgrade to 4.0 you found your throughput decreasing, and in
+# particular you run an old kernel or have very fewer client connections, this option might be worth evaluating.
+#native_transport_flush_in_batches_legacy: false
+
+# Enable tracking of repaired state of data during reads and comparison between replicas
+# Mismatches between the repaired sets of replicas can be characterized as either confirmed
+# or unconfirmed. In this context, unconfirmed indicates that the presence of pending repair
+# sessions, unrepaired partition tombstones, or some other condition means that the disparity
+# cannot be considered conclusive. Confirmed mismatches should be a trigger for investigation
+# as they may be indicative of corruption or data loss.
+# There are separate flags for range vs partition reads as single partition reads are only tracked
+# when CL > 1 and a digest mismatch occurs. Currently, range queries don't use digests so if
+# enabled for range reads, all range reads will include repaired data tracking. As this adds
+# some overhead, operators may wish to disable it whilst still enabling it for partition reads
+repaired_data_tracking_for_range_reads_enabled: false
+repaired_data_tracking_for_partition_reads_enabled: false
+# If false, only confirmed mismatches will be reported. If true, a separate metric for unconfirmed
+# mismatches will also be recorded. This is to avoid potential signal:noise issues are unconfirmed
+# mismatches are less actionable than confirmed ones.
+report_unconfirmed_repaired_data_mismatches: false
+
+# Having many tables and/or keyspaces negatively affects performance of many operations in the
+# cluster. When the number of tables/keyspaces in the cluster exceeds the following thresholds
+# a client warning will be sent back to the user when creating a table or keyspace.
+# table_count_warn_threshold: 150
+# keyspace_count_warn_threshold: 40
+
+#########################
+# EXPERIMENTAL FEATURES #
+#########################
+
+# Enables materialized view creation on this node.
+# Materialized views are considered experimental and are not recommended for production use.
+enable_materialized_views: false
+
+# Enables SASI index creation on this node.
+# SASI indexes are considered experimental and are not recommended for production use.
+enable_sasi_indexes: false
+
+# Enables creation of transiently replicated keyspaces on this node.
+# Transient replication is experimental and is not recommended for production use.
+enable_transient_replication: false
diff --git a/backfill-cli/src/test/resources/c5/logback.xml b/backfill-cli/src/test/resources/c5/logback.xml
new file mode 100644
index 00000000..d4261fbe
--- /dev/null
+++ b/backfill-cli/src/test/resources/c5/logback.xml
@@ -0,0 +1,103 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.  See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<!--
+In order to disable debug.log, comment-out the ASYNCDEBUGLOG
+appender reference in the root level section below.
+-->
+
+<configuration scan="true" scanPeriod="60 seconds">
+    <jmxConfigurator />
+
+    <!-- No shutdown hook; we run it ourselves in StorageService after shutdown -->
+
+    <!-- SYSTEMLOG rolling file appender to system.log (INFO level) -->
+
+    <appender name="SYSTEMLOG" class="ch.qos.logback.core.rolling.RollingFileAppender">
+        <filter class="ch.qos.logback.classic.filter.ThresholdFilter">
+            <level>TRACE</level>
+        </filter>
+        <file>${cassandra.logdir}/system.log</file>
+        <rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
+            <!-- rollover daily -->
+            <fileNamePattern>${cassandra.logdir}/system.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern>
+            <!-- each file should be at most 50MB, keep 7 days worth of history, but at most 5GB -->
+            <maxFileSize>50MB</maxFileSize>
+            <maxHistory>7</maxHistory>
+            <totalSizeCap>5GB</totalSizeCap>
+        </rollingPolicy>
+        <encoder>
+            <pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
+        </encoder>
+    </appender>
+
+    <!-- DEBUGLOG rolling file appender to debug.log (all levels) -->
+
+    <appender name="DEBUGLOG" class="ch.qos.logback.core.rolling.RollingFileAppender">
+        <file>${cassandra.logdir}/debug.log</file>
+        <rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
+            <!-- rollover daily -->
+            <fileNamePattern>${cassandra.logdir}/debug.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern>
+            <!-- each file should be at most 50MB, keep 7 days worth of history, but at most 5GB -->
+            <maxFileSize>50MB</maxFileSize>
+            <maxHistory>7</maxHistory>
+            <totalSizeCap>5GB</totalSizeCap>
+        </rollingPolicy>
+        <encoder>
+            <pattern>%-5level [%thread] %date{ISO8601} %F:%L %m %msg%n</pattern>
+        </encoder>
+    </appender>
+
+    <!-- ASYNCLOG assynchronous appender to debug.log (all levels) -->
+
+    <appender name="ASYNCDEBUGLOG" class="ch.qos.logback.classic.AsyncAppender">
+        <queueSize>1024</queueSize>
+        <discardingThreshold>0</discardingThreshold>
+        <includeCallerData>true</includeCallerData>
+        <appender-ref ref="DEBUGLOG" />
+    </appender>
+
+    <!-- STDOUT console appender to stdout (INFO level) -->
+
+    <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+        <filter class="ch.qos.logback.classic.filter.ThresholdFilter">
+            <level>TRACE</level>
+        </filter>
+        <encoder>
+            <pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
+        </encoder>
+    </appender>
+
+    <!-- Uncomment bellow and corresponding appender-ref to activate logback metrics
+    <appender name="LogbackMetrics" class="com.codahale.metrics.logback.InstrumentedAppender" />
+     -->
+
+    <root level="INFO">
+        <appender-ref ref="SYSTEMLOG" />
+        <appender-ref ref="STDOUT" />
+<!--        <appender-ref ref="DEBUGLOG" /> &lt;!&ndash; Comment this line to disable debug.log &ndash;&gt;-->
+        <!--
+        <appender-ref ref="LogbackMetrics" />
+        -->
+    </root>
+
+    <logger name="org.apache.cassandra" level="INFO"/>
+    <logger name="org.apache.pulsar" level="INFO"/>
+    <logger name="com.datastax.oss.cdc" level="TRACE"/>
+</configuration>

From dfb2ed1c712e8ffdd0b53b55c4087143b478e21b Mon Sep 17 00:00:00 2001
From: SreedevT <talisreedev@gmail.com>
Date: Tue, 30 Sep 2025 02:52:15 +0530
Subject: [PATCH 09/10] handle cassandra timeuuid class

---
 .../java/com/datastax/oss/cdc/agent/PulsarMutationSender.java  | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/agent-c5/src/main/java/com/datastax/oss/cdc/agent/PulsarMutationSender.java b/agent-c5/src/main/java/com/datastax/oss/cdc/agent/PulsarMutationSender.java
index 6195ab7c..b4e73aeb 100644
--- a/agent-c5/src/main/java/com/datastax/oss/cdc/agent/PulsarMutationSender.java
+++ b/agent-c5/src/main/java/com/datastax/oss/cdc/agent/PulsarMutationSender.java
@@ -156,6 +156,9 @@ public Object cqlToAvro(TableMetadata tableMetadata, String columnName, Object v
         if (type instanceof ShortType) {
             return Short.toUnsignedInt((short) value); // AVRO does not support INT16
         }
+        if ((type instanceof TimeUUIDType || type instanceof UUIDType) && value instanceof org.apache.cassandra.utils.TimeUUID) {
+            return ((org.apache.cassandra.utils.TimeUUID) value).asUUID(); // Handle Cassandra 5 TimeUUID type - convert to UUID string for Avro serialization
+        }
         return value;
     }
 }

From 59970afeb6ac31bddfd20930c58126a81004eb04 Mon Sep 17 00:00:00 2001
From: SreedevT <talisreedev@gmail.com>
Date: Tue, 30 Sep 2025 15:30:27 +0530
Subject: [PATCH 10/10] revert removal of other modules from ci

---
 .github/workflows/backfill-ci.yaml | 4 ++--
 .github/workflows/ci.yaml          | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/backfill-ci.yaml b/.github/workflows/backfill-ci.yaml
index 450625fd..7cf16b65 100644
--- a/.github/workflows/backfill-ci.yaml
+++ b/.github/workflows/backfill-ci.yaml
@@ -38,8 +38,8 @@ jobs:
       fail-fast: false
       matrix:
         jdk: ['11'] # TODO: Enable java 17 tests https://issues.apache.org/jira/browse/CASSANDRA-16895
-        pulsarImage: ['datastax/lunastreaming:4.0_3.6']
-        cassandraFamily: ['c5']
+        pulsarImage: ['datastax/lunastreaming:2.10_3.4', 'apachepulsar/pulsar:2.10.3', 'apachepulsar/pulsar:2.11.0']
+        cassandraFamily: ['c3', 'c4', 'c5', 'dse4']
     steps:
       - uses: actions/checkout@v3
       - name: Set up JDK ${{ matrix.jdk }}
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 7c2493db..8889ad97 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -37,9 +37,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        module: ['agent-c5', 'connector']
+        module: ['agent', 'agent-c3', 'agent-c4', 'agent-c5', 'agent-dse4', 'connector']
         jdk: ['11', '17']
-        pulsarImage: ['datastax/lunastreaming:4.0_3.6']
+        pulsarImage: ['datastax/lunastreaming:2.10_3.4', 'apachepulsar/pulsar:2.10.3', 'apachepulsar/pulsar:2.11.0']
     steps:
       - uses: actions/checkout@v3
       - name: Set up JDK ${{ matrix.jdk }}