likeopera-backend/sanitize.js

72 lines
2.3 KiB
JavaScript

const htmlawed = require('htmlawed');
const css = require('css');
function sanitizeHtml(html)
{
if (!html)
return '';
else
html = html+'';
let styles = '';
// GitHub tends to insert some metadata script. Cut off them here,
// because htmLawed has a global policy for bad tags and we use "leave content in place".
html = html.replace(/<script[^<>]*>([\s\S]*)<\/script\s*>/ig, '');
html = html.replace(/<style[^<>]*>([\s\S]*?)(<\/style\s*>|(?=<style[^<>]*>))/ig, function(m, m1)
{
styles += m1+'\n';
return '';
});
html = html.replace(/^[\s\S]*?<body[^<>]*>([\s\S]*)<\/body>[\s\S]*$/i, '$1');
html = html.replace(/^[\s\S]*?<html[^<>]*>([\s\S]*)<\/html>[\s\S]*$/i, '$1');
if (styles)
{
html = '<style>\n'+styles+'</style>\n'+html;
styles = '';
}
html = htmlawed.sanitize(html||'', { safe: 1, elements: '* +style', keep_bad: 6, comment: 1 });
html = html.replace(/<a(\s*[^>]+)>/ig, (m, m1) => '<a'+m1+' target="_blank">');
html = html.replace(/<style[^>]*>([\s\S]*)<\/style\s*>/ig, function(m, m1)
{
let ast = css.parse(m1, { silent: true });
rewriteCss(ast);
return '<style>'+css.stringify(ast)+'</style>';
});
return html;
}
function rewriteCss(ast)
{
var rules = ast.rules || ast.stylesheet && ast.stylesheet.rules;
if (ast.stylesheet && ast.stylesheet.parsingErrors)
{
delete ast.stylesheet.parsingErrors;
}
if (rules)
{
for (var i = 0; i < rules.length; i++)
{
if (rules[i].type == 'document')
{
// prune @document instructions (may spy on current URL)
rules.splice(i--, 1);
}
else if (rules[i].type == 'rule' && (!rules[i].selectors || !rules[i].declarations))
rules.splice(i--, 1);
else
rewriteCss(rules[i]);
}
}
else if (ast.type == 'rule')
{
for (var i = 0; i < ast.selectors.length; i++)
{
// FIXME: Do not hardcode css selector for frontend here
// This will require generating unique substitution string,
// so we may also generate 'blocked images' stubs when we do it.
ast.selectors[i] = '.message-view .text '+ast.selectors[i];
}
}
}
module.exports = sanitizeHtml;