再帰 CTE を使用した 5 つの特殊文字のエンコード:
DECLARE @unsafe NVARCHAR(MAX), @safe NVARCHAR(MAX) -- -- Create the unsafe html string -- SET @unsafe = N'html''s encoding "method" is <= or >= & 1234 ' + NCHAR(129) -- -- Use a recursive CTE to iterate through each character in the string -- ;WITH cte AS ( -- -- The first row will contain the original -- string, an empty string to be used to -- build the "safe" string, and a position -- column to mark the character position -- of the loop -- SELECT @unsafe AS unsafe_html, CONVERT(NVARCHAR(MAX), '') AS safe_html, 1 AS pos WHERE @unsafe IS NOT NULL AND LEN(@unsafe) > 0 UNION ALL -- -- Create a loop: -- The anchor row starts at position one. -- Increment the position by one for each pass. -- Stop when the position value is equal to the string lenth. -- Evaluate the character in each string -- If the ASCII value > 128, use the &# format. -- Otherwise, check for 5 special characters: " & ' < > -- Use the encoding reference or just the original character -- SELECT @unsafe AS unsafe_html, CONVERT(NVARCHAR(MAX), safe_html + CASE WHEN UNICODE(SUBSTRING(unsafe_html, pos, 1)) > 128 THEN '&#' + CONVERT(NVARCHAR(10), UNICODE(SUBSTRING(unsafe_html, pos, 1))) ELSE CASE SUBSTRING(unsafe_html, pos, 1) WHEN '"' THEN '"' WHEN '&' THEN '&' WHEN '''' THEN '&apos' WHEN '<' THEN '<' WHEN '>' THEN '>' ELSE SUBSTRING(unsafe_html, pos, 1) END END ) AS safe_html, pos + 1 AS pos FROM cte WHERE pos <= LEN(@unsafe) ) -- -- Each pass through the string creates a row in the CTE -- The last row will have the position value of the string length + 1 -- Use that row as the safe html string -- SQL Server allows a max recursion of 32767 -- SELECT @safe = ( SELECT safe_html FROM cte WHERE pos = LEN(@unsafe) + 1 ) OPTION (MAXRECURSION 32767) SELECT @safe -- html&aposs encoding "method" is <= or >= & 1234 
プレ>初期バージョン:
DECLARE @s NVARCHAR(100) SET @s = '<html>unsafe & safe<html>' SELECT @s SELECT (SELECT @s FOR XML PATH('')) --------------------------------------- <html>unsafe & safe<html> ----------------------------------------- <html>unsafe & safe<html>
プレ>すべての公式リファレンスを含む完全なエンコード:
DECLARE @unsafe NVARCHAR(MAX), @safe NVARCHAR(MAX) -- Build string with first 10,000 unicode chars SELECT @unsafe = COALESCE(@unsafe, '') + NCHAR(number) + ' ' FROM ( SELECT TOP 10000 ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS number FROM sys.all_objects s1 CROSS JOIN sys.all_objects s2 ) t -- Build table variable with character entity references defined in HTML 4.0 -- Reference: http://www.htmlcodetutorial.com/characterentities_famsupp_69.html DECLARE @t TABLE ( name NVARCHAR(25) NOT NULL, unicode_val INT NOT NULL PRIMARY KEY ) INSERT @t VALUES ('"', 34), ('&', 38), ('&apos', 39), ('<', 60), ('>', 62), (' ', 160), ('¡', 161), ('¢', 162), ('£', 163), ('¤', 164), ('¥', 165), ('¦', 166), ('§', 167), ('¨', 168), ('©', 169), ('ª', 170), ('«', 171), ('¬', 172), ('­', 173), ('®', 174), ('¯', 175), ('°', 176), ('±', 177), ('²', 178), ('³', 179), ('´', 180), ('µ', 181), ('¶', 182), ('·', 183), ('¸', 184), ('¹', 185), ('º', 186), ('»', 187), ('¼', 188), ('½', 189), ('¾', 190), ('¿', 191), ('À', 192), ('Á', 193), ('Â', 194), ('Ã', 195), ('Ä', 196), ('Å', 197), ('Æ', 198), ('Ç', 199), ('È', 200), ('É', 201), ('Ê', 202), ('Ë', 203), ('Ì', 204), ('Í', 205), ('Î', 206), ('Ï', 207), ('Ð', 208), ('Ñ', 209), ('Ò', 210), ('Ó', 211), ('Ô', 212), ('Õ', 213), ('Ö', 214), ('×', 215), ('Ø', 216), ('Ù', 217), ('Ú', 218), ('Û', 219), ('Ü', 220), ('Ý', 221), ('Þ', 222), ('ß', 223), ('à', 224), ('á', 225), ('â', 226), ('ã', 227), ('ä', 228), ('å', 229), ('æ', 230), ('ç', 231), ('è', 232), ('é', 233), ('ê', 234), ('ë', 235), ('ì', 236), ('í', 237), ('î', 238), ('ï', 239), ('ð', 240), ('ñ', 241), ('ò', 242), ('ó', 243), ('ô', 244), ('õ', 245), ('ö', 246), ('÷', 247), ('ø', 248), ('ù', 249), ('ú', 250), ('û', 251), ('ü', 252), ('ý', 253), ('þ', 254), ('ÿ', 255), ('&OElig', 338), ('&oelig', 339), ('&Scaron', 352), ('&scaron', 353), ('&Yuml', 376), ('&fnof', 402), ('&circ', 710), ('&tilde', 732), ('&Alpha', 913), ('&Beta', 914), ('&Gamma', 915), ('&Delta', 916), ('&Epsilon', 917), ('&Zeta', 918), ('&Eta', 919), ('&Theta', 920), ('&Iota', 921), ('&Kappa', 922), ('&Lambda', 923), ('&Mu', 924), ('&Nu', 925), ('&Xi', 926), ('&Omicron', 927), ('&Pi', 928), ('&Rho', 929), ('&Sigma', 931), ('&Tau', 932), ('&Upsilon', 933), ('&Phi', 934), ('&Chi', 935), ('&Psi', 936), ('&Omega', 937), ('&alpha', 945), ('&beta', 946), ('&gamma', 947), ('&delta', 948), ('&epsilon', 949), ('&zeta', 950), ('&eta', 951), ('&theta', 952), ('&iota', 953), ('&kappa', 954), ('&lambda', 955), ('&mu', 956), ('&nu', 957), ('&xi', 958), ('&omicron', 959), ('&pi', 960), ('&rho', 961), ('&sigmaf', 962), ('&sigma', 963), ('&tau', 964), ('&upsilon', 965), ('&phi', 966), ('&chi', 967), ('&psi', 968), ('&omega', 969), ('&thetasym', 977), ('&upsih', 978), ('&piv', 982), ('&ensp', 8194), ('&emsp', 8195), ('&thinsp', 8201), ('&zwnj', 8204), ('&zwj', 8205), ('&lrm', 8206), ('&rlm', 8207), ('&ndash', 8211), ('&mdash', 8212), ('&lsquo', 8216), ('&rsquo', 8217), ('&sbquo', 8218), ('&ldquo', 8220), ('&rdquo', 8221), ('&bdquo', 8222), ('&dagger', 8224), ('&Dagger', 8225), ('&bull', 8226), ('&hellip', 8230), ('&permil', 8240), ('&prime', 8242), ('&Prime', 8243), ('&lsaquo', 8249), ('&rsaquo', 8250), ('&oline', 8254), ('&frasl', 8260), ('&euro', 8364), ('&image', 8465), ('&weierp', 8472), ('&real', 8476), ('&trade', 8482), ('&alefsym', 8501), ('&larr', 8592), ('&uarr', 8593), ('&rarr', 8594), ('&darr', 8595), ('&harr', 8596), ('&crarr', 8629), ('&lArr', 8656), ('&uArr', 8657), ('&rArr', 8658), ('&dArr', 8659), ('&hArr', 8660), ('&forall', 8704), ('&part', 8706), ('&exist', 8707), ('&empty', 8709), ('&nabla', 8711), ('&isin', 8712), ('¬in', 8713), ('&ni', 8715), ('&prod', 8719), ('&sum', 8721), ('&minus', 8722), ('&lowast', 8727), ('&radic', 8730), ('&prop', 8733), ('&infin', 8734), ('&ang', 8736), ('&and', 8743), ('&or', 8744), ('&cap', 8745), ('&cup', 8746), ('&int', 8747), ('&there4', 8756), ('&sim', 8764), ('&cong', 8773), ('&asymp', 8776), ('&ne', 8800), ('&equiv', 8801), ('&le', 8804), ('&ge', 8805), ('&sub', 8834), ('&sup', 8835), ('&nsub', 8836), ('&sube', 8838), ('&supe', 8839), ('&oplus', 8853), ('&otimes', 8855), ('&perp', 8869), ('&sdot', 8901), ('&lceil', 8968), ('&rceil', 8969), ('&lfloor', 8970), ('&rfloor', 8971), ('&lang', 9001), ('&rang', 9002), ('&loz', 9674), ('&spades', 9824), ('&clubs', 9827), ('&hearts', 9829), ('&diams', 9830) -- Build numbers table to parse the string DECLARE @numbers TABLE (number INT NOT NULL PRIMARY KEY) INSERT @numbers SELECT TOP (LEN(@unsafe)) ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS number FROM sys.all_objects s1 CROSS JOIN sys.all_objects s2 -- Use numbers table to parse each character. -- If a match is found in character entity reference table, -- then use the safe substitute. Otherwise, if the unicode -- value is greater than 128, use &#<unicode char value>. -- Finally, use the original character if nothing else -- is a match SELECT @safe = COALESCE(@safe,'') + COALESCE(name, CASE WHEN UNICODE(SUBSTRING(@unsafe, number, 1)) > 128 THEN '&#' + CONVERT(NVARCHAR(10), UNICODE(SUBSTRING(@unsafe, number, 1))) ELSE SUBSTRING(@unsafe, number, 1) END) FROM @numbers LEFT OUTER JOIN @t ON UNICODE(SUBSTRING(@unsafe, number, 1)) = unicode_val SELECT @safe AS [safe] Results: ! " # $ % & &apos ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~  ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ  Ž   ‘ ’ “ ” • – — ˜ ™ š › œ  ž Ÿ   ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯ ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿ À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ Ā ā Ă ă Ą ą Ć ć Ĉ ĉ Ċ...
プレ>