diff --git a/src/main/java/de/rub/nds/crawler/data/ScanTarget.java b/src/main/java/de/rub/nds/crawler/data/ScanTarget.java index b5299b6..e0278f4 100644 --- a/src/main/java/de/rub/nds/crawler/data/ScanTarget.java +++ b/src/main/java/de/rub/nds/crawler/data/ScanTarget.java @@ -76,21 +76,58 @@ public static Pair fromTargetString( } catch (UnknownHostException e) { LOGGER.error( "Host {} is unknown or can not be reached with error {}.", targetString, e); - // TODO in the current design we discard the exception info; maybe we want to keep - // this in the future + // Store the exception information for diagnostic purposes + target.setUnresolvedReason(e.getMessage()); return Pair.of(target, JobStatus.UNRESOLVABLE); } } if (denylistProvider != null && denylistProvider.isDenylisted(target)) { LOGGER.error("Host {} is denylisted and will not be scanned.", targetString); - // TODO similar to the unknownHostException, we do not keep any information as to why - // the target is blocklisted it may be nice to distinguish cases where the domain is - // blocked or where the IP is blocked + // Store information about why the target was denylisted + String denylistReason = determineDenylistReason(target, denylistProvider); + target.setDenylistReason(denylistReason); return Pair.of(target, JobStatus.DENYLISTED); } return Pair.of(target, JobStatus.TO_BE_EXECUTED); } + /** + * Determines the reason why a target is denylisted by checking both the hostname and IP. + * + * @param target the ScanTarget to check + * @param denylistProvider the provider to check against + * @return a string describing why the target is denylisted + */ + private static String determineDenylistReason( + ScanTarget target, IDenylistProvider denylistProvider) { + boolean domainBlocked = false; + boolean ipBlocked = false; + + // Check if the hostname is denylisted + if (target.getHostname() != null) { + ScanTarget hostOnlyTarget = new ScanTarget(); + hostOnlyTarget.setHostname(target.getHostname()); + domainBlocked = denylistProvider.isDenylisted(hostOnlyTarget); + } + + // Check if the IP is denylisted + if (target.getIp() != null) { + ScanTarget ipOnlyTarget = new ScanTarget(); + ipOnlyTarget.setIp(target.getIp()); + ipBlocked = denylistProvider.isDenylisted(ipOnlyTarget); + } + + if (domainBlocked && ipBlocked) { + return "Both domain and IP are denylisted"; + } else if (domainBlocked) { + return "Domain is denylisted"; + } else if (ipBlocked) { + return "IP is denylisted"; + } else { + return "Target is denylisted"; + } + } + private String ip; private String hostname; @@ -99,6 +136,10 @@ public static Pair fromTargetString( private int trancoRank; + private String unresolvedReason; + + private String denylistReason; + public ScanTarget() {} @Override @@ -137,4 +178,20 @@ public void setPort(int port) { public void setTrancoRank(int trancoRank) { this.trancoRank = trancoRank; } + + public String getUnresolvedReason() { + return this.unresolvedReason; + } + + public void setUnresolvedReason(String unresolvedReason) { + this.unresolvedReason = unresolvedReason; + } + + public String getDenylistReason() { + return this.denylistReason; + } + + public void setDenylistReason(String denylistReason) { + this.denylistReason = denylistReason; + } }