Fix keep_bad=0 mode

master v1.0.1
Vitaliy Filippov 2016-10-03 14:06:20 +03:00
parent 504ffed327
commit 05177d2f3f
5 changed files with 114 additions and 47 deletions

View File

@ -1,4 +1,4 @@
{ {
"plugins": [ "transform-es2015-destructuring", "transform-object-rest-spread", "transform-es2015-arrow-functions" ], "plugins": [ "transform-es2015-destructuring", "transform-object-rest-spread", "transform-es2015-arrow-functions", "transform-es2015-block-scoping" ],
"retainLines": true "retainLines": true
} }

View File

@ -5,21 +5,21 @@ var htmLawed = module.exports =
_flip: function (a) _flip: function (a)
{ {
var e = {}; var e = {};
for (var i = 0; i < a.length; i++) for (var i = 0; i < a.length; i++) {
e[a[i]] = true; e[a[i]] = true;}
return e; return e;
}, },
_strtr: function (t, h) _strtr: function (t, h)
{ {
for (var i in h) for (var i in h) {
t = t.replace(new RegExp(i, 'g'), h[i]); t = t.replace(new RegExp(i, 'g'), h[i]);}
return t; return t;
}, },
_keys: function (h) _keys: function (h)
{ {
var r = []; var r = [];
for (var i in h) for (var i in h) {
r.push(i); r.push(i);}
return r; return r;
}, },
_htmlspecialchars: function (t) _htmlspecialchars: function (t)
@ -61,8 +61,8 @@ var htmLawed = module.exports =
{ {
var re = /(?:^|-|\+)[^\-+]+?(?=-|\+|$)/g; var re = /(?:^|-|\+)[^\-+]+?(?=-|\+|$)/g;
m = {}; m = {};
while (v = re.exec(x)) while (v = re.exec(x)) {
m[v[0]] = true; m[v[0]] = true;}
for (v in m) for (v in m)
{ {
if (v[0] == '+') if (v[0] == '+')
@ -79,8 +79,8 @@ var htmLawed = module.exports =
{ {
delete x['on*']; delete x['on*'];
v = { 'onblur': 1, 'onchange': 1, 'onclick': 1, 'ondblclick': 1, 'onfocus': 1, 'onkeydown': 1, 'onkeypress': 1, 'onkeyup': 1, 'onmousedown': 1, 'onmousemove': 1, 'onmouseout': 1, 'onmouseover': 1, 'onmouseup': 1, 'onreset': 1, 'onselect': 1, 'onsubmit': 1 }; v = { 'onblur': 1, 'onchange': 1, 'onclick': 1, 'ondblclick': 1, 'onfocus': 1, 'onkeydown': 1, 'onkeypress': 1, 'onkeyup': 1, 'onmousedown': 1, 'onmousemove': 1, 'onmouseout': 1, 'onmouseover': 1, 'onmouseup': 1, 'onreset': 1, 'onselect': 1, 'onsubmit': 1 };
for (i in v) for (i in v) {
x[i] = true; x[i] = true;}
} }
C.deny_attribute = x; C.deny_attribute = x;
// config URL // config URL
@ -142,8 +142,8 @@ var htmLawed = module.exports =
y = { "\x82": '&#8218;', "\x84": '&#8222;', "\x91": '&#8216;', "\x92": '&#8217;', "\x93": '&#8220;', "\x94": '&#8221;' };else y = { "\x82": '&#8218;', "\x84": '&#8222;', "\x91": '&#8216;', "\x92": '&#8217;', "\x93": '&#8220;', "\x94": '&#8221;' };else
y = { "\x82": '\'', "\x84": '"', "\x91": '\'', "\x92": '\'', "\x93": '"', "\x94": '"' }; y = { "\x82": '\'', "\x84": '"', "\x91": '\'', "\x92": '\'', "\x93": '"', "\x94": '"' };
for (i in y) for (i in y) {
x[i] = y[i]; x[i] = y[i];}
t = htmLawed._strtr(t, x); t = htmLawed._strtr(t, x);
} }
if (C.cdata || C.comment) if (C.cdata || C.comment)
@ -210,6 +210,7 @@ var htmLawed = module.exports =
}, },
hl_bal: function (t, keep_bad, intag) hl_bal: function (t, keep_bad, intag)
{ {
var C = htmLawed.C;
if (keep_bad === undefined) if (keep_bad === undefined)
keep_bad = 1; keep_bad = 1;
// balance tags // balance tags
@ -262,8 +263,8 @@ var htmLawed = module.exports =
if (cont.N[intag]) if (cont.N[intag])
{ {
inOk = _extends({}, inOk); inOk = _extends({}, inOk);
for (var k in cont.N[intag]) for (var k in cont.N[intag]) {
delete inOk[k]; delete inOk[k];}
} }
return inOk; return inOk;
} }
@ -277,7 +278,7 @@ var htmLawed = module.exports =
var _ob = ''; var _ob = '';
var r, s, e, a, x, p; var r, s, e, a, x, p;
t = t.split('<'); t = t.split('<');
for (var i = 0, ci = t.length; i < ci; i++) for (var i = 0; i < t.length; i++)
{ {
// get markup // get markup
r = /^(\/?)([a-z1-6]+)([^>]*)>([\s\S]*)$/.exec(t[i]); r = /^(\/?)([a-z1-6]+)([^>]*)>([\s\S]*)$/.exec(t[i]);
@ -296,6 +297,7 @@ var htmLawed = module.exports =
} else } else
if (p == e) if (p == e)
{ {
if (!cont.E[e])
q.pop(); q.pop();
_ob += '</' + e + '>'; _ob += '</' + e + '>';
e = null; e = null;
@ -315,6 +317,31 @@ var htmLawed = module.exports =
_ob += add + '</' + e + '>'; _ob += add + '</' + e + '>';
e = null; e = null;
} }
} else
if (!C.elements[e])
{
// Forbidden tag not handled by hl_tag() - remove everything up to its end
for (var j = i + 1, _in = 1; j < t.length; j++)
{
r = /^(\/?)([a-z1-6]+)([^>]*)>/.exec(t[j]);
if (r && r[2] == e)
{
_in += r[1] ? -1 : 1;
}
if (_in <= 0)
{
t[j] = t[j].substr(r[0].length);
t.splice(i, j - i);
break;
} else
if (j == t.length - 1)
{
t.splice(i, t.length - i);
break;
}
}
i--;
continue;
} }
// open tag // open tag
// cont.B ele needs el.B ele as child // cont.B ele needs el.B ele as child
@ -322,21 +349,21 @@ var htmLawed = module.exports =
{ {
t[i] = e + a + '>'; t[i] = e + a + '>';
t.splice(i + 1, 0, 'div>' + x); t.splice(i + 1, 0, 'div>' + x);
ci++;i--; i--;
e = x = null; e = x = null;
} else } else
if ((ql && cont.B[p] || cont.B[intag] && !ql) && !el.B[e] && !ok[e]) if ((q.length && cont.B[p] || cont.B[intag] && !q.length) && !el.B[e] && !ok[e])
{ {
t.splice(i, 0, 'div>'); t.splice(i, 0, 'div>');
ci++;i--; i--;
e = x = null; e = x = null;
} }
// if no open ele, intag = parent; mostly immediate parent-child relation should hold // if no open ele, intag = parent; mostly immediate parent-child relation should hold
else if (!ql || !el.N[e] || !q.filter(function (_k) {return cont.N[_k];}).length) else if (!q.length || !el.N[e] || !q.filter(function (_k) {return cont.N[_k];}).length)
{ {
if (!ok[e]) if (!ok[e])
{ {
if (ql && cont.T[p]) if (q.length && cont.T[p])
{ {
_ob += '</' + q.pop() + '>'; _ob += '</' + q.pop() + '>';
e = x = null; e = x = null;
@ -379,8 +406,8 @@ var htmLawed = module.exports =
if (cont.N[d]) if (cont.N[d])
{ {
ok2 = _extends({}, ok2); ok2 = _extends({}, ok2);
for (var _k in cont.N[d]) for (var _k in cont.N[d]) {
delete ok2[_k]; delete ok2[_k];}
} }
if (!ok2[e]) if (!ok2[e])
{ {
@ -390,8 +417,8 @@ var htmLawed = module.exports =
break; break;
} }
add = '</' + d + '>'; add = '</' + d + '>';
while (++k < kc) while (++k < kc) {
add = '</' + q[k] + '>' + add; add = '</' + q[k] + '>' + add;}
break; break;
} else } else
@ -444,8 +471,8 @@ var htmLawed = module.exports =
} }
// end // end
while (e = q.pop()) while (e = q.pop()) {
_ob += '</' + e + '>'; _ob += '</' + e + '>';}
return _ob; return _ob;
// eof // eof
}, },
@ -532,8 +559,8 @@ var htmLawed = module.exports =
{ {
m = /^([a-zA-Z\d\-+\.]+:\/\/[^\/]+)([\s\S]*)/.exec(C.base_url); m = /^([a-zA-Z\d\-+\.]+:\/\/[^\/]+)([\s\S]*)/.exec(C.base_url);
p = (m[2] + p).replace(/\/\.\//g, '/'); p = (m[2] + p).replace(/\/\.\//g, '/');
while (/\/([^\/]{3,}|[^\/\.]+?|\.[^\/\.]|[^\/\.]\.)\/\.\.\//.exec(p)) while (/\/([^\/]{3,}|[^\/\.]+?|\.[^\/\.]|[^\/\.]\.)\/\.\.\//.exec(p)) {
p = p.replace(/\/([^\/]{3,}|[^\/\.]+?|\.[^\/\.]|[^\/\.]\.)\/\.\.\//g, '/'); p = p.replace(/\/([^\/]{3,}|[^\/\.]+?|\.[^\/\.]|[^\/\.]\.)\/\.\.\//g, '/');}
p = m[1] + p; p = m[1] + p;
} }
} }
@ -864,8 +891,11 @@ var htmLawed = module.exports =
var m = /^<(\/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?>$/m.exec(t); var m = /^<(\/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?>$/m.exec(t);
if (!m) if (!m)
return t.replace(/</g, '&lt;').replace(/>/g, '&gt;');else return t.replace(/</g, '&lt;').replace(/>/g, '&gt;');else
if (!C.elements[e = m[2].toLowerCase()]) if (!C.elements[e = m[2].toLowerCase()] && C.keep_bad > 0)
return C.keep_bad % 2 ? t.replace(/</g, '&lt;').replace(/>/g, '&gt;') : ''; {
// C.keep_bad == 0 (remove bad elements with their content) is handled by hl_bal
return C.keep_bad % 2 ? t.replace(/</g, '&lt;').replace(/>/g, '&gt;') : '';
}
// attr string // attr string
var a = m[3].trim().replace(/[\n\r\t]/g, ' '); var a = m[3].trim().replace(/[\n\r\t]/g, ' ');
// tag transform // tag transform
@ -1026,9 +1056,9 @@ var htmLawed = module.exports =
// rqd attr // rqd attr
if (TAG.AR[e]) if (TAG.AR[e])
{ {
for (k in TAG.AR[e]) for (k in TAG.AR[e]) {
if (!a[k]) if (!a[k])
a[k] = TAG.AR[e][k] || k; a[k] = TAG.AR[e][k] || k;}
} }
// depr attrs // depr attrs
@ -1112,8 +1142,8 @@ var htmLawed = module.exports =
delete a.id;else delete a.id;else
{ {
while (htmLawed.hl_Ids[a.id]) while (htmLawed.hl_Ids[a.id]) {
a.id = C.unique_ids + a.id; // FIXME 1 2 3 4 ... ? a.id = C.unique_ids + a.id;} // FIXME 1 2 3 4 ... ?
htmLawed.hl_Ids[a.id] = 1; htmLawed.hl_Ids[a.id] = 1;
} }
} }
@ -1134,8 +1164,8 @@ var htmLawed = module.exports =
if (!C.hook_tag) if (!C.hook_tag)
{ {
aA = ''; aA = '';
for (k in a) for (k in a) {
aA += ' ' + k + '="' + a[k] + '"'; aA += ' ' + k + '="' + a[k] + '"';}
return '<' + e + aA + (TAG.E[e] ? ' /' : '') + '>'; return '<' + e + aA + (TAG.E[e] ? ' /' : '') + '>';
} }
return C.hook_tag(e, a); return C.hook_tag(e, a);

View File

@ -210,6 +210,7 @@ var htmLawed = module.exports =
}, },
hl_bal: function(t, keep_bad, intag) hl_bal: function(t, keep_bad, intag)
{ {
var C = htmLawed.C;
if (keep_bad === undefined) if (keep_bad === undefined)
keep_bad = 1; keep_bad = 1;
// balance tags // balance tags
@ -277,7 +278,7 @@ var htmLawed = module.exports =
var _ob = ''; var _ob = '';
var r, s, e, a, x, p; var r, s, e, a, x, p;
t = t.split('<'); t = t.split('<');
for (var i = 0, ci = t.length; i < ci; i++) for (var i = 0; i < t.length; i++)
{ {
// get markup // get markup
r = /^(\/?)([a-z1-6]+)([^>]*)>([\s\S]*)$/.exec(t[i]); r = /^(\/?)([a-z1-6]+)([^>]*)>([\s\S]*)$/.exec(t[i]);
@ -296,7 +297,8 @@ var htmLawed = module.exports =
} }
else if (p == e) else if (p == e)
{ {
q.pop(); if (!cont.E[e])
q.pop();
_ob += '</'+e+'>'; _ob += '</'+e+'>';
e = null; e = null;
// Last open // Last open
@ -316,27 +318,52 @@ var htmLawed = module.exports =
e = null; e = null;
} }
} }
else if (!C.elements[e])
{
// Forbidden tag not handled by hl_tag() - remove everything up to its end
for (let j = i+1, _in = 1; j < t.length; j++)
{
r = /^(\/?)([a-z1-6]+)([^>]*)>/.exec(t[j]);
if (r && r[2] == e)
{
_in += (r[1] ? -1 : 1);
}
if (_in <= 0)
{
t[j] = t[j].substr(r[0].length);
t.splice(i, j-i);
break;
}
else if (j == t.length-1)
{
t.splice(i, t.length-i);
break;
}
}
i--;
continue;
}
// open tag // open tag
// cont.B ele needs el.B ele as child // cont.B ele needs el.B ele as child
else if (cont.B[e] && x.trim().length > 0) // FIXME trim else if (cont.B[e] && x.trim().length > 0) // FIXME trim
{ {
t[i] = e+a+'>'; t[i] = e+a+'>';
t.splice(i+1, 0, 'div>'+x); t.splice(i+1, 0, 'div>'+x);
ci++; i--; i--;
e = x = null; e = x = null;
} }
else if (((ql && cont.B[p]) || (cont.B[intag] && !ql)) && !el.B[e] && !ok[e]) else if (((q.length && cont.B[p]) || (cont.B[intag] && !q.length)) && !el.B[e] && !ok[e])
{ {
t.splice(i, 0, 'div>'); t.splice(i, 0, 'div>');
ci++; i--; i--;
e = x = null; e = x = null;
} }
// if no open ele, intag = parent; mostly immediate parent-child relation should hold // if no open ele, intag = parent; mostly immediate parent-child relation should hold
else if (!ql || !el.N[e] || !q.filter(_k => cont.N[_k]).length) else if (!q.length || !el.N[e] || !q.filter(_k => cont.N[_k]).length)
{ {
if (!ok[e]) if (!ok[e])
{ {
if (ql && cont.T[p]) if (q.length && cont.T[p])
{ {
_ob += '</'+q.pop()+'>'; _ob += '</'+q.pop()+'>';
e = x = null; e = x = null;
@ -864,8 +891,11 @@ var htmLawed = module.exports =
var m = /^<(\/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?>$/m.exec(t); var m = /^<(\/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?>$/m.exec(t);
if (!m) if (!m)
return t.replace(/</g, '&lt;').replace(/>/g, '&gt;'); return t.replace(/</g, '&lt;').replace(/>/g, '&gt;');
else if (!C.elements[e = m[2].toLowerCase()]) else if (!C.elements[e = m[2].toLowerCase()] && C.keep_bad > 0)
{
// C.keep_bad == 0 (remove bad elements with their content) is handled by hl_bal
return (C.keep_bad%2) ? t.replace(/</g, '&lt;').replace(/>/g, '&gt;') : ''; return (C.keep_bad%2) ? t.replace(/</g, '&lt;').replace(/>/g, '&gt;') : '';
}
// attr string // attr string
var a = m[3].trim().replace(/[\n\r\t]/g, ' '); var a = m[3].trim().replace(/[\n\r\t]/g, ' ');
// tag transform // tag transform

View File

@ -1,6 +1,6 @@
{ {
"name": "htmlawed", "name": "htmlawed",
"version": "1.0.0", "version": "1.0.1",
"author": { "author": {
"name": "Vitaliy Filippov", "name": "Vitaliy Filippov",
"email": "vitalif@yourcmc.ru", "email": "vitalif@yourcmc.ru",
@ -27,6 +27,7 @@
"devDependencies": { "devDependencies": {
"babel-cli": "latest", "babel-cli": "latest",
"babel-plugin-transform-es2015-destructuring": "latest", "babel-plugin-transform-es2015-destructuring": "latest",
"babel-plugin-transform-es2015-block-scoping": "latest",
"babel-plugin-transform-object-rest-spread": "latest", "babel-plugin-transform-object-rest-spread": "latest",
"babel-plugin-transform-es2015-arrow-functions": "latest", "babel-plugin-transform-es2015-arrow-functions": "latest",
"eslint": "latest", "eslint": "latest",

View File

@ -27,3 +27,9 @@ var src = '<body><style>a { }</style> <img style="abc: 1">zhopa</img> <p>Hello &
var res = '<style>a { }</style> <img style="abc: 1" src="src" alt="image" />zhopa <p>Hello &nbsp; world!</p>'; var res = '<style>a { }</style> <img style="abc: 1" src="src" alt="image" />zhopa <p>Hello &nbsp; world!</p>';
var ok = htmLawed.sanitize(src, { safe: 1, elements: '* +style', style_pass: true }); var ok = htmLawed.sanitize(src, { safe: 1, elements: '* +style', style_pass: true });
console.log("[STYLE_PASS] "+(ok ? "OK" : "NOT OK")); console.log("[STYLE_PASS] "+(ok ? "OK" : "NOT OK"));
var str = 'ssss <script type="application/json" data-scope="inboxmarkup">\
{"api_version":"1.0","publisher":{"api_key":"05dde50f1d1a384dd78767c55493e4bb","name":"GitHub"},"entity":{"external_key":"github/vitalif/grive2","title":"vitalif/grive2","subtitle":"GitHub repository","main_image_url":"https://cloud.githubusercontent.com/assets/143418/17495839/a5054eac-5d88-11e6-95fc-7290892c7bb5.png","avatar_image_url":"https://cloud.githubusercontent.com/assets/143418/15842166/7c72db34-2c0b-11e6-9aed-b52498112777.png","action":{"name":"Open in GitHub","url":"https://github.com/vitalif/grive2"}},"updates":{"snippets":[{"icon":"DESCRIPTION","message":"Error syncing - TCP connection reset by peer (#111)"}],"action":{"name":"View Issue","url":"https://github.com/vitalif/grive2/issues/111"}}}\
</script> sss';
var ok = htmLawed.sanitize(str, { safe: 1, keep_bad: 0 }) == 'ssss sss';
console.log("[keep_bad=0] "+(ok ? "OK" : "NOT OK"));