Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 62 additions & 5 deletions src/main/java/de/rub/nds/crawler/data/ScanTarget.java
Original file line number Diff line number Diff line change
Expand Up @@ -76,21 +76,58 @@ public static Pair<ScanTarget, JobStatus> fromTargetString(
} catch (UnknownHostException e) {
LOGGER.error(
"Host {} is unknown or can not be reached with error {}.", targetString, e);
// TODO in the current design we discard the exception info; maybe we want to keep
// this in the future
// Store the exception information for diagnostic purposes
target.setUnresolvedReason(e.getMessage());
return Pair.of(target, JobStatus.UNRESOLVABLE);
}
}
if (denylistProvider != null && denylistProvider.isDenylisted(target)) {
LOGGER.error("Host {} is denylisted and will not be scanned.", targetString);
// TODO similar to the unknownHostException, we do not keep any information as to why
// the target is blocklisted it may be nice to distinguish cases where the domain is
// blocked or where the IP is blocked
// Store information about why the target was denylisted
String denylistReason = determineDenylistReason(target, denylistProvider);
target.setDenylistReason(denylistReason);
return Pair.of(target, JobStatus.DENYLISTED);
}
return Pair.of(target, JobStatus.TO_BE_EXECUTED);
}

/**
* Determines the reason why a target is denylisted by checking both the hostname and IP.
*
* @param target the ScanTarget to check
* @param denylistProvider the provider to check against
* @return a string describing why the target is denylisted
*/
private static String determineDenylistReason(
ScanTarget target, IDenylistProvider denylistProvider) {
boolean domainBlocked = false;
boolean ipBlocked = false;

// Check if the hostname is denylisted
if (target.getHostname() != null) {
ScanTarget hostOnlyTarget = new ScanTarget();
hostOnlyTarget.setHostname(target.getHostname());
domainBlocked = denylistProvider.isDenylisted(hostOnlyTarget);
}

// Check if the IP is denylisted
if (target.getIp() != null) {
ScanTarget ipOnlyTarget = new ScanTarget();
ipOnlyTarget.setIp(target.getIp());
ipBlocked = denylistProvider.isDenylisted(ipOnlyTarget);
}

if (domainBlocked && ipBlocked) {
return "Both domain and IP are denylisted";
} else if (domainBlocked) {
return "Domain is denylisted";
} else if (ipBlocked) {
return "IP is denylisted";
} else {
return "Target is denylisted";
}
}

private String ip;

private String hostname;
Expand All @@ -99,6 +136,10 @@ public static Pair<ScanTarget, JobStatus> fromTargetString(

private int trancoRank;

private String unresolvedReason;

private String denylistReason;
Comment on lines +139 to +141
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dislike the approach of adding error only fields to the scan target.


public ScanTarget() {}

@Override
Expand Down Expand Up @@ -137,4 +178,20 @@ public void setPort(int port) {
public void setTrancoRank(int trancoRank) {
this.trancoRank = trancoRank;
}

public String getUnresolvedReason() {
return this.unresolvedReason;
}

public void setUnresolvedReason(String unresolvedReason) {
this.unresolvedReason = unresolvedReason;
}

public String getDenylistReason() {
return this.denylistReason;
}

public void setDenylistReason(String denylistReason) {
this.denylistReason = denylistReason;
}
}