11package io .github .jycr .javadataurlhandler ;
22
33import java .io .ByteArrayInputStream ;
4- import java .io .IOException ;
54import java .io .InputStream ;
6- import java .io .UnsupportedEncodingException ;
75import java .net .MalformedURLException ;
86import java .net .URL ;
97import java .net .URLConnection ;
108import java .net .URLDecoder ;
119import java .nio .charset .Charset ;
1210import java .util .Base64 ;
13- import java .util .regex .Matcher ;
14- import java .util .regex .Pattern ;
1511
1612import static java .nio .charset .StandardCharsets .US_ASCII ;
1713
1814/**
19- * The data scheme URLConnection.
20- * <p>The data URI scheme Data protocol Syntax:</p>
21- * <pre>data:[<mediatype>][;base64],<data></pre>
15+ * <p>The data scheme URLConnection.</p>
16+ * <p>Syntax of data URL scheme:</p>
17+ * <pre>
18+ * dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
19+ * mediatype := [ type "/" subtype ] *( ";" parameter )
20+ * data := *urlchar
21+ * parameter := attribute "=" value
22+ * </pre>
2223 *
2324 * @see <a href="https://www.rfc-editor.org/rfc/rfc2397#section-2">RFC-2397</a>
2425 * @see <a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs">mdn web docs - Data URLs</a>
2526 */
2627public class DataUriConnection extends URLConnection {
2728
28- /**
29- * Syntax of data URL scheme:
30- * <pre>
31- * dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
32- * mediatype := [ type "/" subtype ] *( ";" parameter )
33- * data := *urlchar
34- * parameter := attribute "=" value
35- * </pre>
36- */
37- private static final Pattern DATA_URL_SCHEME_PATTERN = Pattern .compile ("data:(<mediatype>?(<contentType>?.*?/.*?)?(?:;(<paramKey>?.*?)=(<paramValue>?.*?))?)(?:;(<base64Flag>?base64)?)?,(<data>?.*)" );
38-
3929 private static final Charset DEFAULT_CONTENT_CHARSET = US_ASCII ;
40- /**
41- * Default mime type for data protocol.
42- * See: <a href="https://www.rfc-editor.org/rfc/rfc2397#section-2">RFC-2397 - Description</a>
43- */
44- private static final String DEFAULT_MEDIATYPE = "text/plain;charset=" + DEFAULT_CONTENT_CHARSET .name ();
30+ private static final String DEFAULT_MEDIATYPE = "text/plain" ;
4531
46- private final boolean valid ;
4732 private final Charset charset ;
4833 private final boolean isBase64 ;
4934 private final String data ;
50- private final String mediatype ;
35+ private final String contentType ;
5136
5237 public DataUriConnection (final URL url ) throws MalformedURLException {
5338 super (url );
54- final Matcher matcher = DATA_URL_SCHEME_PATTERN .matcher (url .toString ());
55- this .valid = matcher .matches ();
56- if (!this .valid ) {
39+ String urlString = url .toString ();
40+ if (!urlString .startsWith ("data:" )) {
41+ throw new MalformedURLException ("Invalid data URL: " + url );
42+ }
43+
44+ int commaIndex = urlString .indexOf (',' );
45+ if (commaIndex == -1 ) {
5746 throw new MalformedURLException ("Invalid data URL: " + url );
5847 }
59- this .data = matcher .group ("data" );
6048
61- String mediatypeGroup = matcher .group ("mediatype" );
62- this .mediatype = (mediatypeGroup != null && !mediatypeGroup .isEmpty ()) ? mediatypeGroup : DEFAULT_MEDIATYPE ;
63- this .isBase64 = "base64" .equals (matcher .group ("base64Flag" ));
49+ String metadata = urlString .substring (5 , commaIndex );
50+ this .data = urlString .substring (commaIndex + 1 );
51+
52+ final String [] parts = metadata .split (";" );
53+ final String mediatype = parts .length > 0 && !parts [0 ].isEmpty () ? parts [0 ] : DEFAULT_MEDIATYPE ;
54+
55+ boolean base64Flag = false ;
56+ Charset extractedCharset = DEFAULT_CONTENT_CHARSET ;
57+ for (String part : parts ) {
58+ if ("base64" .equals (part )) {
59+ base64Flag = true ;
60+ } else if (part .startsWith ("charset=" )) {
61+ extractedCharset = Charset .forName (part .substring (8 ));
62+ }
63+ }
64+ this .isBase64 = base64Flag ;
65+ this .charset = extractedCharset ;
66+ this .contentType = mediatype + (isText (mediatype ) ? ";charset=" + this .charset .name () : "" );
67+ this .connected = true ;
68+ }
6469
65- String paramKey = matcher .group ("paramKey" );
66- String paramValue = matcher .group ("paramValue" );
67- this .charset = "charset" .equals (paramKey ) ? Charset .forName (paramValue ) : DEFAULT_CONTENT_CHARSET ;
70+ private static boolean isText (String mediatype ) {
71+ return mediatype != null && (mediatype .startsWith ("text/" ) || mediatype .endsWith ("+xml" ));
6872 }
6973
7074 @ Override
7175 public void connect () {
72- if (this .valid ) {
73- this .connected = true ;
74- }
76+ this .connected = true ;
7577 }
7678
7779 @ Override
78- public InputStream getInputStream () throws IOException {
79- if (!connected ) {
80- throw new IOException ();
81- }
80+ public InputStream getInputStream () {
8281 return new ByteArrayInputStream (getData ());
8382 }
8483
85- /**
86- * <p>Returns the value of the content-type defined in data URL.</p>
87- * <p>This value is optional and if not defined, value is <code>{@value #DEFAULT_MEDIATYPE}</code></p>
88- */
8984 @ Override
9085 public String getContentType () {
91- if (!connected ) {
92- return null ;
93- }
94- return mediatype ;
86+ return contentType ;
87+ }
88+
89+ private byte [] getData () {
90+ return isBase64 ? Base64 .getDecoder ().decode (data ) : URLDecoder .decode (data , charset ).getBytes (charset );
91+ }
92+
93+ Charset getCharset () {
94+ return charset ;
9595 }
9696
97- private byte [] getData () throws UnsupportedEncodingException {
98- if (isBase64 ) {
99- return Base64 .getDecoder ().decode (data );
97+ @ Override
98+ public String getHeaderField (String name ) {
99+ if ("Content-Length" .equalsIgnoreCase (name )) {
100+ return String .valueOf (getData ().length );
100101 }
101- return URLDecoder . decode ( data , charset ). getBytes ( data ) ;
102+ return null ;
102103 }
103104}
0 commit comments