Extract attachments into local FS

master
Vitaliy Filippov 2019-05-18 03:01:13 +03:00
parent 3b62de9e2d
commit 006e180553
7 changed files with 157 additions and 20 deletions

View File

@ -166,7 +166,7 @@ class ImapManager
end(); end();
} }
}) })
.catch(e => reject(e)); .catch(reject);
}); });
f.once('end', () => f.once('end', () =>

View File

@ -1,9 +1,14 @@
const fs = require('fs');
const path = require('path');
const crypto = require('crypto');
const Imap = require('imap'); const Imap = require('imap');
const EventEmitter = require('events').EventEmitter; const EventEmitter = require('events').EventEmitter;
const iconv = require('iconv-lite'); const iconv = require('iconv-lite');
const MailParser = require('mailparser').MailParser; const MailParser = require('mailparser').MailParser;
const mimelib = require('mimelib'); const mimelib = require('mimelib');
const fsp = require('./fsp.js');
const ImapManager = require('./ImapManager.js'); const ImapManager = require('./ImapManager.js');
const sanitizeHtml = require('./sanitize.js'); const sanitizeHtml = require('./sanitize.js');
const SQL = require('./select-builder-pgsql.js'); const SQL = require('./select-builder-pgsql.js');
@ -23,7 +28,16 @@ class Syncer
// public // public
async init(cfg) async init(cfg)
{ {
for (var i = 0; i < cfg.accounts.length; i++) this.files_path = path.resolve(cfg.files_path);
try
{
fs.accessSync(this.files_path, fs.constants.R_OK || fs.constants.W_OK);
}
catch (e)
{
throw new Error(this.files_path+' is not writable');
}
for (let i = 0; i < cfg.accounts.length; i++)
{ {
await this.addAccount(cfg.accounts[i]); await this.addAccount(cfg.accounts[i]);
} }
@ -402,15 +416,60 @@ class Syncer
} }
} }
async parseMsg(msg) async parseMsg(msg_text)
{ {
let parser = new MailParser({ streamAttachments: false, defaultCharset: 'windows-1251' }); let parser = new MailParser({ streamAttachments: false, defaultCharset: 'windows-1251' });
return await new Promise((r, j) => let msg = await new Promise((resolve, reject) =>
{ {
parser.once('end', r); parser.on('error', reject);
parser.write(msg); parser.once('end', resolve);
parser.write(msg_text);
parser.end(); parser.end();
}); });
let byid = {};
for (let a of msg.attachments||[])
{
byid[a.contentId||''] = a;
}
msg.html = msg.html.replace(/(<img[^<>]*src=["']?)cid:([^'"\s]{1,256})/g, (m, m1, m2) =>
{
if (!byid[m2])
{
return m1 + 'cid:' + m2;
}
return m1 + 'data:' + byid[m2].contentType + ';base64,' + byid[m2].toString('base64');
});
let attachments = [];
for (let a of msg.attachments||[])
{
let hash = crypto.createHash('sha1');
hash.update(a.content);
let sha1 = hash.digest('hex');
let subdir = sha1.substr(0, 2)+'/'+sha1.substr(2, 2);
let filename = subdir+'/'+sha1+'.bin';
if (!await fsp.exists(this.files_path+'/'+filename))
{
if (!await fsp.exists(this.files_path+'/'+sha1.substr(0, 2)))
{
await fsp.mkdir(this.files_path+'/'+sha1.substr(0, 2));
}
if (!await fsp.exists(this.files_path+'/'+subdir))
{
await fsp.mkdir(this.files_path+'/'+subdir);
}
await fsp.writeFile(this.files_path+'/'+filename, a.content);
}
attachments.push({
id: a.contentId,
name: a.fileName,
mimetype: a.contentType,
size: a.length,
sha1,
filename,
});
}
msg.attachments = attachments;
return msg;
} }
extractAttachments(struct, attachments) extractAttachments(struct, attachments)
@ -424,11 +483,11 @@ class Syncer
} }
else if (struct[i].disposition && struct[i].disposition.type == 'attachment') else if (struct[i].disposition && struct[i].disposition.type == 'attachment')
{ {
attachments.push([ attachments.push({
mimelib.parseMimeWords(struct[i].disposition.params && struct[i].disposition.params.filename || struct[i].description || ''), name: mimelib.parseMimeWords(struct[i].disposition.params && struct[i].disposition.params.filename || struct[i].description || ''),
struct[i].type+'/'+struct[i].subtype, mimetype: struct[i].type+'/'+struct[i].subtype,
struct[i].size, size: struct[i].size,
]); });
} }
} }
return attachments; return attachments;
@ -538,12 +597,10 @@ class Syncer
async fetchFullMessage(account_id, folder_id, folder_name, msg_uid) async fetchFullMessage(account_id, folder_id, folder_name, msg_uid)
{ {
// FIXME: parse and save attachments
// FIXME: replace inline images
let srv = await this.imap.getConnection(account_id, folder_name); let srv = await this.imap.getConnection(account_id, folder_name);
let upd = await this.imap.runFetch( let upd = await this.imap.runFetch(
srv, msg_uid, { bodies: '' }, srv, msg_uid, { bodies: '' },
async(messages, state) => await this._parseBody(messages, folder_id) (messages, state) => this._parseBody(messages, folder_id)
); );
this.imap.releaseConnection(account_id); this.imap.releaseConnection(account_id);
return upd; return upd;
@ -556,11 +613,20 @@ class Syncer
let msg = messages[i]; let msg = messages[i];
let obj = await this.parseMsg(msg[0].headers); let obj = await this.parseMsg(msg[0].headers);
obj.html = sanitizeHtml(obj.html); obj.html = sanitizeHtml(obj.html);
let upd = { body_text: obj.text||'', body_html: obj.html }; let upd = {
upd.body_html_text = obj.html.replace(/<style[^>]*>.*<\/style\s*>|<\/?[^>]*>/g, ''); body_text: obj.text||'',
await SQL.update(this.pg, 'messages m', upd, { folder_id: boxId, uid: msg[0].uid }); body_html: obj.html,
body_html_text: obj.html.replace(/<style[^>]*>.*<\/style\s*>|<\/?[^>]*>/g, ''),
};
/*await SQL.update(
this.pg, 'messages m', {
...upd,
'props = props || ?': [ { attachments: obj.attachments } ]
}, { folder_id: boxId, uid: msg[0].uid }
);*/
if (messages.length == 1) if (messages.length == 1)
{ {
upd.props = { attachments: obj.attachments };
return [ upd ]; return [ upd ];
} }
} }

View File

@ -251,7 +251,7 @@ class SyncerWeb
if (!msg.body_html && !msg.body_text) if (!msg.body_html && !msg.body_text)
{ {
let upd = await this.syncer.fetchFullMessage(msg.account_id, msg.folder_id, msg.folder_name, msg.uid); let upd = await this.syncer.fetchFullMessage(msg.account_id, msg.folder_id, msg.folder_name, msg.uid);
return res.send({ msg: { ...msg, ...upd[0] } }); return res.send({ msg: { ...msg, ...upd[0], props: { ...msg.props, ...upd[0].props } } });
} }
return res.send({ msg: msg }); return res.send({ msg: msg });
} }

View File

@ -0,0 +1,27 @@
begin;
create or replace function messages_fulltext(msg messages) returns tsvector
language plpgsql immutable as $$
begin
return setweight(to_tsvector('russian', regexp_replace(
coalesce(msg.props->>'from', '') || ' ' ||
coalesce(msg.props->>'replyto', '') || ' ' ||
coalesce(msg.props->>'to', '') || ' ' ||
coalesce(msg.props->>'cc', '') || ' ' ||
coalesce(msg.props->>'bcc', '') || ' ' ||
(select string_agg((a->>'name') || ' ' || (a->>'mimetype') || ' ' || (a->>'size'), ' ')
from jsonb_array_elements(coalesce(msg.props->'attachments', '[]'::jsonb)) as t (a)) || ' ' ||
msg.subject,
'\W+', ' ', 'g'
)), 'A')
|| setweight(to_tsvector('russian', msg.body_html_text || ' ' || msg.body_text), 'B');
end
$$;
update messages
set props = props || jsonb_build_object('attachments', (
select jsonb_agg(jsonb_build_object('name', a->>0, 'mimetype', a->>1, 'size', a->>2)) from jsonb_array_elements(props->'attachments') as t (a)
))
where jsonb_array_length(props->'attachments') > 0;
commit;

View File

@ -82,7 +82,8 @@ begin
coalesce(msg.props->>'to', '') || ' ' || coalesce(msg.props->>'to', '') || ' ' ||
coalesce(msg.props->>'cc', '') || ' ' || coalesce(msg.props->>'cc', '') || ' ' ||
coalesce(msg.props->>'bcc', '') || ' ' || coalesce(msg.props->>'bcc', '') || ' ' ||
coalesce(msg.props->>'attachments', '') || ' ' || (select string_agg((a->>'name') || ' ' || (a->>'mimetype') || ' ' || (a->>'size'), ' ')
from jsonb_array_values(coalesce(msg.props->'attachments', '[]'::jsonb)) as t (a)) || ' ' ||
msg.subject, msg.subject,
'\W+', ' ', 'g' '\W+', ' ', 'g'
)), 'A') )), 'A')

43
fsp.js Normal file
View File

@ -0,0 +1,43 @@
const fs = require('fs');
module.exports = {
async writeFile(path, content, options)
{
return await new Promise((ok, no) =>
{
fs.writeFile(path, content, options, err => err ? no(err) : ok());
});
},
async rename(from, to)
{
return await new Promise((ok, no) =>
{
fs.rename(from, to, err => err ? no(err) : ok());
});
},
async mkdir(path, options)
{
return await new Promise((ok, no) =>
{
fs.mkdir(path, options, err => err ? no(err) : ok());
});
},
async exists(path)
{
return await new Promise((ok, no) =>
{
fs.access(path, fs.constants.R_OK, err => err ? (err.code == 'ENOENT' ? ok(false) : no(err)) : ok(true));
});
},
async is_writable(path)
{
return await new Promise((ok, no) =>
{
fs.access(path, fs.constants.R_OK | fs.constants.W_OK, err => ok(!err));
});
},
};

View File

@ -61,7 +61,7 @@ async function startSync(cfg)
let syncerweb = new SyncerWeb(syncer, dbh, cfg); let syncerweb = new SyncerWeb(syncer, dbh, cfg);
await syncer.init(cfg); await syncer.init(cfg);
syncerweb.listen(8057); syncerweb.listen(8057);
await syncer.syncAll(); //await syncer.syncAll();
} }
let cfg = require('./cfg.json'); let cfg = require('./cfg.json');