Friday, October 8, 2010
Fast and correct htmlEncoding for JavaScript
Feel free to adjust two last matches in the regexp to any of your needs.
I encode all > 0x07 and from allowed ascii, I encode only big 5.
Using regexp is still faster (specially on MSIE) than any other impl.
var _encodeHtmlRegExpImpl = (function() {
// performance is 78ms on MSIE 7 (the slowest one)
// on 80KB html markup from: http://www.w3.org/TR/html4/
var re = new RegExp(
// surrogate pair (sp)
"([\uD800-\uDBFF][\uDC00-\uDFFF])" +
// html UNUSED including standalone surogates (un)
"|([\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F-\u009F\uD800-\uDFFF])" +
// out of ascii (oa)
"|([^\u0000-\u007F])" +
// big 5 + add others (b5)
"|([\u0022\u0026\u0027\u003C\u003E])",
"g"
),
toCodePoint = function(high, low) {
return ((high - 0xD800) << 10) + (low - 0xDC00) + 0x010000;
},
enc = function(m, sp, un, oa, b5) {
// extracted out from main function and ifs changed to ternary
// thanx to Andrea Giammarchi
return "" + (oa || b5
? m.charCodeAt(0)
: (un ? "xFFFD"
: toCodePoint(m.charCodeAt(0), m.charCodeAt(1)))) + ";";
};
return function(s) {
return s.replace(re, enc);
}
} ());
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment