@@ -1176,6 +1176,166 @@ diff_match_patch.prototype.diff_cleanupMerge = function(diffs) {
11761176  } 
11771177} ; 
11781178
1179+ /** 
1180+  * Rearrange diff boundaries that split Unicode surrogate pairs. 
1181+  *  
1182+  * @param  {!Array.<!diff_match_patch.Diff> } diffs Array of diff tuples. 
1183+  */ 
1184+ diff_match_patch . prototype . diff_cleanupSplitSurrogates  =  function ( diffs )  { 
1185+   var  lastEnd ; 
1186+   for  ( var  x  =  0 ;  x  <  diffs . length ;  x ++ )  { 
1187+     var  thisDiff  =  diffs [ x ] ; 
1188+     var  thisTop   =  thisDiff [ 1 ] [ 0 ] ; 
1189+     var  thisEnd   =  thisDiff [ 1 ] [ thisDiff [ 1 ] . length  -  1 ] ; 
1190+ 
1191+     if  ( 0  ===  thisDiff [ 1 ] . length )  { 
1192+       diffs . splice ( x -- ,  1 ) ; 
1193+       continue ; 
1194+     } 
1195+ 
1196+     if  ( thisEnd  &&  this . isHighSurrogate ( thisEnd ) )  { 
1197+       lastEnd  =  thisEnd ; 
1198+       thisDiff [ 1 ]  =  thisDiff [ 1 ] . slice ( 0 ,  - 1 ) ; 
1199+     } 
1200+ 
1201+     if  ( lastEnd  &&  thisTop  &&  this . isHighSurrogate ( lastEnd )  &&  this . isLowSurrogate ( thisTop ) )  { 
1202+       thisDiff [ 1 ]  =  lastEnd  +  thisDiff [ 1 ] ; 
1203+     } 
1204+ 
1205+     if  ( 0  ===  thisDiff [ 1 ] . length )  { 
1206+       diffs . splice ( x -- ,  1 ) ; 
1207+       continue ; 
1208+     } 
1209+   } 
1210+ 
1211+   return  diffs ; 
1212+ } ; 
1213+ 
1214+ diff_match_patch . prototype . isHighSurrogate  =  function ( c )  { 
1215+   var  v  =  c . charCodeAt ( 0 ) ; 
1216+   return  v  >=  0xD800  &&  v  <=  0xDBFF ; 
1217+ } ; 
1218+ 
1219+ diff_match_patch . prototype . isLowSurrogate  =  function ( c )  { 
1220+   var  v  =  c . charCodeAt ( 0 ) ; 
1221+   return  v  >=  0xDC00  &&  v  <=  0xDFFF ; 
1222+ } ; 
1223+ 
1224+ diff_match_patch . prototype . digit16  =  function ( c )  { 
1225+   switch  ( c )  { 
1226+     case  '0' : return  0 ; 
1227+     case  '1' : return  1 ; 
1228+     case  '2' : return  2 ; 
1229+     case  '3' : return  3 ; 
1230+     case  '4' : return  4 ; 
1231+     case  '5' : return  5 ; 
1232+     case  '6' : return  6 ; 
1233+     case  '7' : return  7 ; 
1234+     case  '8' : return  8 ; 
1235+     case  '9' : return  9 ; 
1236+     case  'A' : case  'a' : return  10 ; 
1237+     case  'B' : case  'b' : return  11 ; 
1238+     case  'C' : case  'c' : return  12 ; 
1239+     case  'D' : case  'd' : return  13 ; 
1240+     case  'E' : case  'e' : return  14 ; 
1241+     case  'F' : case  'f' : return  15 ; 
1242+     default : throw  new  Error ( 'Invalid hex-code' ) ; 
1243+   } 
1244+ } ; 
1245+ 
1246+ /** 
1247+  * Decode URI-encoded string but allow for encoded surrogate halves 
1248+  *  
1249+  * diff_match_patch needs this relaxation of the requirements because 
1250+  * not all libraries and versions produce valid URI strings in toDelta 
1251+  * and we don't want to crash this code when the input is valid input 
1252+  * but at the same time invalid utf-8 
1253+  *  
1254+  * @example : decodeURI( 'abcd%3A %F0%9F%85%B0' ) = 'abcd: \ud83c\udd70' 
1255+  * @example : decodeURI( 'abcd%3A %ED%A0%BC' ) = 'abcd: \ud83c' 
1256+  *  
1257+  * @cite : @mathiasbynens utf8.js at https://github.com/mathiasbynens/utf8.js 
1258+  *  
1259+  * @param  {String } text input string encoded by encodeURI() or equivalent 
1260+  * @return  {String } 
1261+  */ 
1262+ diff_match_patch . prototype . decodeURI  =  function ( text )  { 
1263+   try  { 
1264+     return  decodeURI ( text ) ; 
1265+   }  catch  (  e  )  { 
1266+     var  i  =  0 ; 
1267+     var  decoded  =  '' ; 
1268+ 
1269+     while  ( i  <  text . length )  { 
1270+       if  (  text [ i ]  !==  '%'  )  { 
1271+         decoded  +=  text [ i ++ ] ; 
1272+         continue ; 
1273+       } 
1274+ 
1275+       // start a percent-sequence 
1276+       var  byte1  =  ( this . digit16 ( text [ i  +  1 ] )  <<  4 )  +  this . digit16 ( text [ i  +  2 ] ) ; 
1277+       if  ( ( byte1  &  0x80 )  ===  0 )  { 
1278+         decoded  +=  String . fromCharCode ( byte1 ) ; 
1279+         i  +=  3 ; 
1280+         continue ; 
1281+       } 
1282+ 
1283+       if  ( '%'  !==  text [ i  +  3 ] )  { 
1284+         throw  new  URIError ( 'URI malformed' ) ; 
1285+       } 
1286+ 
1287+       var  byte2  =  ( this . digit16 ( text [ i  +  4 ] )  <<  4 )  +  this . digit16 ( text [ i  +  5 ] ) ; 
1288+       if  ( ( byte2  &  0xC0 )  !==  0x80 )  { 
1289+         throw  new  URIError ( 'URI malformed' ) ; 
1290+       } 
1291+       byte2  =  byte2  &  0x3F ; 
1292+       if  ( ( byte1  &  0xE0 )  ===  0xC0 )  { 
1293+         decoded  +=  String . fromCharCode ( ( ( byte1  &  0x1F )  <<  6 )  |  byte2 ) ; 
1294+         i  +=  6 ; 
1295+         continue ; 
1296+       } 
1297+ 
1298+       if  ( '%'  !==  text [ i  +  6 ] )  { 
1299+         throw  new  URIError ( 'URI malformed' ) ; 
1300+       } 
1301+ 
1302+       var  byte3  =  ( this . digit16 ( text [ i  +  7 ] )  <<  4 )  +  this . digit16 ( text [ i  +  8 ] ) ; 
1303+       if  ( ( byte3  &  0xC0 )  !==  0x80 )  { 
1304+         throw  new  URIError ( 'URI malformed' ) ; 
1305+       } 
1306+       byte3  =  byte3  &  0x3F ; 
1307+       if  ( ( byte1  &  0xF0 )  ===  0xE0 )  { 
1308+         // unpaired surrogate are fine here 
1309+         decoded  +=  String . fromCharCode ( ( ( byte1  &  0x0F )  <<  12 )  |  ( byte2  <<  6 )  |  byte3 ) ; 
1310+         i  +=  9 ; 
1311+         continue ; 
1312+       } 
1313+ 
1314+       if  ( '%'  !==  text [ i  +  9 ] )  { 
1315+         throw  new  URIError ( 'URI malformed' ) ; 
1316+       } 
1317+ 
1318+       var  byte4  =  ( this . digit16 ( text [ i  +  10 ] )  <<  4 )  +  this . digit16 ( text [ i  +  11 ] ) ; 
1319+       if  ( ( byte4  &  0xC0 )  !==  0x80 )  { 
1320+         throw  new  URIError ( 'URI malformed' ) ; 
1321+       } 
1322+       byte4  =  byte4  &  0x3F ; 
1323+       if  ( ( byte1  &  0xF8 )  ===  0xF0 )  { 
1324+         var  codePoint  =  ( ( byte1  &  0x07 )  <<  0x12 )  |  ( byte2  <<  0x0C )  |  ( byte3  <<  0x06 )  |  byte4 ; 
1325+         if  ( codePoint  >=  0x010000  &&  codePoint  <=  0x10FFFF )  { 
1326+           decoded  +=  String . fromCharCode ( ( codePoint  &  0xFFFF )  >>>  10  &  0x3FF  |  0xD800 ) ; 
1327+           decoded  +=  String . fromCharCode ( 0xDC00  |  ( codePoint  &  0xFFFF )  &  0x3FF ) ; 
1328+           i  +=  12 ; 
1329+           continue ; 
1330+         } 
1331+       } 
1332+ 
1333+       throw  new  URIError ( 'URI malformed' ) ; 
1334+     } 
1335+ 
1336+     return  decoded ; 
1337+   } 
1338+ } ; 
11791339
11801340/** 
11811341 * loc is a location in text1, compute and return the equivalent location in 
@@ -1219,6 +1379,7 @@ diff_match_patch.prototype.diff_xIndex = function(diffs, loc) {
12191379 * @return  {string } HTML representation. 
12201380 */ 
12211381diff_match_patch . prototype . diff_prettyHtml  =  function ( diffs )  { 
1382+   diffs  =  this . diff_cleanupSplitSurrogates ( diffs ) ; 
12221383  var  html  =  [ ] ; 
12231384  var  pattern_amp  =  / & / g; 
12241385  var  pattern_lt  =  / < / g; 
@@ -1319,6 +1480,7 @@ diff_match_patch.prototype.diff_levenshtein = function(diffs) {
13191480 * @return  {string } Delta text. 
13201481 */ 
13211482diff_match_patch . prototype . diff_toDelta  =  function ( diffs )  { 
1483+   diffs  =  this . diff_cleanupSplitSurrogates ( diffs ) ; 
13221484  var  text  =  [ ] ; 
13231485  for  ( var  x  =  0 ;  x  <  diffs . length ;  x ++ )  { 
13241486    switch  ( diffs [ x ] [ 0 ] )  { 
@@ -1361,7 +1523,7 @@ diff_match_patch.prototype.diff_fromDelta = function(text1, delta) {
13611523    switch  ( tokens [ x ] . charAt ( 0 ) )  { 
13621524      case  '+' :
13631525        try  { 
1364-           diffs [ diffsLength ++ ]  =  [ DIFF_INSERT ,  decodeURI ( param ) ] ; 
1526+           diffs [ diffsLength ++ ]  =  [ DIFF_INSERT ,  this . decodeURI ( param ) ] ; 
13651527        }  catch  ( ex )  { 
13661528          // Malformed URI sequence. 
13671529          throw  new  Error ( 'Illegal escape in diff_fromDelta: '  +  param ) ; 
@@ -1597,11 +1759,23 @@ diff_match_patch.prototype.patch_addContext_ = function(patch, text) {
15971759  padding  +=  this . Patch_Margin ; 
15981760
15991761  // Add the prefix. 
1762+   if  ( 
1763+     patch . start2  -  padding  >  0  && 
1764+     diff_match_patch . prototype . isLowSurrogate ( text [ patch . start2  -  padding ] ) 
1765+   )  { 
1766+     padding ++ ; 
1767+   } 
16001768  var  prefix  =  text . substring ( patch . start2  -  padding ,  patch . start2 ) ; 
16011769  if  ( prefix )  { 
16021770    patch . diffs . unshift ( [ DIFF_EQUAL ,  prefix ] ) ; 
16031771  } 
16041772  // Add the suffix. 
1773+   if  ( 
1774+     patch . start2  +  patch . length1  +  padding  <  text . length  && 
1775+     diff_match_patch . prototype . isHighSurrogate ( text [ patch . start2  +  patch . length1  +  padding ] ) 
1776+   )  { 
1777+     padding ++ ; 
1778+   } 
16051779  var  suffix  =  text . substring ( patch . start2  +  patch . length1 , 
16061780                              patch . start2  +  patch . length1  +  padding ) ; 
16071781  if  ( suffix )  { 
@@ -1675,6 +1849,7 @@ diff_match_patch.prototype.patch_make = function(a, opt_b, opt_c) {
16751849  if  ( diffs . length  ===  0 )  { 
16761850    return  [ ] ;   // Get rid of the null case. 
16771851  } 
1852+   diffs  =  this . diff_cleanupSplitSurrogates ( diffs ) ; 
16781853  var  patches  =  [ ] ; 
16791854  var  patch  =  new  diff_match_patch . patch_obj ( ) ; 
16801855  var  patchDiffLength  =  0 ;   // Keeping our own length var is faster in JS. 
@@ -2171,6 +2346,7 @@ diff_match_patch.patch_obj.prototype.toString = function() {
21712346  var  text  =  [ '@@ -'  +  coords1  +  ' +'  +  coords2  +  ' @@\n' ] ; 
21722347  var  op ; 
21732348  // Escape the body of the patch with %xx notation. 
2349+   diff_match_patch . prototype . diff_cleanupSplitSurrogates ( this . diffs ) ; 
21742350  for  ( var  x  =  0 ;  x  <  this . diffs . length ;  x ++ )  { 
21752351    switch  ( this . diffs [ x ] [ 0 ] )  { 
21762352      case  DIFF_INSERT :
0 commit comments