Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ protected BulkScanWorker(String bulkScanId, T scanConfig, int parallelScanThread
new NamedThreadFactory("crawler-worker: scan executor"));
}

/**
* Handles a scan target by submitting it to the executor and managing initialization and
* cleanup.
*
* @param scanTarget the target to scan
* @return a Future containing the scan result as a Document
*/
public Future<Document> handle(ScanTarget scanTarget) {
// if we initialized ourself, we also clean up ourself
shouldCleanupSelf.weakCompareAndSetAcquire(false, init());
Expand All @@ -62,8 +69,19 @@ public Future<Document> handle(ScanTarget scanTarget) {
});
}

/**
* Performs the actual scan operation on the given target.
*
* @param scanTarget the target to scan
* @return a Document containing the scan results
*/
public abstract Document scan(ScanTarget scanTarget);

/**
* Initializes the bulk scan worker in a thread-safe manner.
*
* @return true if initialization was performed, false if already initialized
*/
public final boolean init() {
// synchronize such that no thread runs before being initialized
// but only synchronize if not already initialized
Expand All @@ -78,6 +96,11 @@ public final boolean init() {
return false;
}

/**
* Cleans up the bulk scan worker resources in a thread-safe manner.
*
* @return true if cleanup was performed, false if cleanup was deferred or not needed
*/
public final boolean cleanup() {
// synchronize such that init and cleanup do not run simultaneously
// but only synchronize if already initialized
Expand Down
31 changes: 31 additions & 0 deletions src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ public class BulkScanWorkerManager {
private static final Logger LOGGER = LogManager.getLogger();
private static volatile BulkScanWorkerManager instance;

/**
* Gets the singleton instance of BulkScanWorkerManager.
*
* @return the singleton instance
*/
public static BulkScanWorkerManager getInstance() {
if (instance == null) {
synchronized (BulkScanWorkerManager.class) {
Expand All @@ -37,6 +42,14 @@ public static BulkScanWorkerManager getInstance() {
return instance;
}

/**
* Static method to handle a scan job using the singleton instance.
*
* @param scanJobDescription the scan job description
* @param parallelConnectionThreads number of parallel connection threads
* @param parallelScanThreads number of parallel scan threads
* @return a Future containing the scan result document
*/
public static Future<Document> handleStatic(
ScanJobDescription scanJobDescription,
int parallelConnectionThreads,
Expand All @@ -62,6 +75,16 @@ private BulkScanWorkerManager() {
.build();
}

/**
* Gets or creates a BulkScanWorker for the given bulk scan ID.
*
* @param bulkScanId the unique identifier for the bulk scan
* @param scanConfig the scan configuration
* @param parallelConnectionThreads number of parallel connection threads
* @param parallelScanThreads number of parallel scan threads
* @return the BulkScanWorker instance
* @throws UncheckedException if worker creation fails
*/
public BulkScanWorker<?> getBulkScanWorker(
String bulkScanId,
ScanConfig scanConfig,
Expand All @@ -83,6 +106,14 @@ public BulkScanWorker<?> getBulkScanWorker(
}
}

/**
* Handles a scan job by delegating to the appropriate BulkScanWorker.
*
* @param scanJobDescription the scan job description
* @param parallelConnectionThreads number of parallel connection threads
* @param parallelScanThreads number of parallel scan threads
* @return a Future containing the scan result document
*/
public Future<Document> handle(
ScanJobDescription scanJobDescription,
int parallelConnectionThreads,
Expand Down
13 changes: 13 additions & 0 deletions src/main/java/de/rub/nds/crawler/core/Controller.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ public class Controller {
private final ControllerCommandConfig config;
private IDenylistProvider denylistProvider;

/**
* Creates a new Controller instance.
*
* @param config the controller command configuration
* @param orchestrationProvider the orchestration provider for job management
* @param persistenceProvider the persistence provider for data storage
*/
public Controller(
ControllerCommandConfig config,
IOrchestrationProvider orchestrationProvider,
Expand All @@ -45,6 +52,7 @@ public Controller(
}
}

/** Starts the controller and schedules bulk scan publishing jobs. */
public void start() {
ITargetListProvider targetListProvider = config.getTargetListProvider();

Expand Down Expand Up @@ -91,6 +99,11 @@ private ScheduleBuilder<?> getScanSchedule() {
}
}

/**
* Shuts down the scheduler if all triggers have been finalized.
*
* @param scheduler the scheduler to potentially shut down
*/
public static void shutdownSchedulerIfAllTriggersFinalized(Scheduler scheduler) {
try {
boolean allTriggersFinalized =
Expand Down
7 changes: 7 additions & 0 deletions src/main/java/de/rub/nds/crawler/core/ProgressMonitor.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ public class ProgressMonitor {

private boolean listenerRegistered;

/**
* Creates a new ProgressMonitor instance.
*
* @param orchestrationProvider the orchestration provider for job management
* @param persistenceProvider the persistence provider for data storage
* @param scheduler the Quartz scheduler instance
*/
public ProgressMonitor(
IOrchestrationProvider orchestrationProvider,
IPersistenceProvider persistenceProvider,
Expand Down
1 change: 1 addition & 0 deletions src/main/java/de/rub/nds/crawler/core/Worker.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ public Worker(
new NamedThreadFactory("crawler-worker: result handler"));
}

/** Starts the worker by registering a scan job consumer with the orchestration provider. */
public void start() {
this.orchestrationProvider.registerScanJobConsumer(
this::handleScanJob, this.parallelScanThreads);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ public class PublishBulkScanJob implements Job {

private static final Logger LOGGER = LogManager.getLogger();

/**
* Executes the bulk scan publishing job.
*
* @param context the job execution context
* @throws JobExecutionException if an error occurs during job execution
*/
public void execute(JobExecutionContext context) throws JobExecutionException {
try {
JobDataMap data = context.getMergedJobDataMap();
Expand Down