Extract attachments into local FS

master
Vitaliy Filippov 2019-05-18 03:01:13 +03:00
parent 3b62de9e2d
commit 006e180553
7 changed files with 157 additions and 20 deletions

View File

@ -166,7 +166,7 @@ class ImapManager
end();
}
})
.catch(e => reject(e));
.catch(reject);
});
f.once('end', () =>

View File

@ -1,9 +1,14 @@
const fs = require('fs');
const path = require('path');
const crypto = require('crypto');
const Imap = require('imap');
const EventEmitter = require('events').EventEmitter;
const iconv = require('iconv-lite');
const MailParser = require('mailparser').MailParser;
const mimelib = require('mimelib');
const fsp = require('./fsp.js');
const ImapManager = require('./ImapManager.js');
const sanitizeHtml = require('./sanitize.js');
const SQL = require('./select-builder-pgsql.js');
@ -23,7 +28,16 @@ class Syncer
// public
async init(cfg)
{
for (var i = 0; i < cfg.accounts.length; i++)
this.files_path = path.resolve(cfg.files_path);
try
{
fs.accessSync(this.files_path, fs.constants.R_OK || fs.constants.W_OK);
}
catch (e)
{
throw new Error(this.files_path+' is not writable');
}
for (let i = 0; i < cfg.accounts.length; i++)
{
await this.addAccount(cfg.accounts[i]);
}
@ -402,15 +416,60 @@ class Syncer
}
}
async parseMsg(msg)
async parseMsg(msg_text)
{
let parser = new MailParser({ streamAttachments: false, defaultCharset: 'windows-1251' });
return await new Promise((r, j) =>
let msg = await new Promise((resolve, reject) =>
{
parser.once('end', r);
parser.write(msg);
parser.on('error', reject);
parser.once('end', resolve);
parser.write(msg_text);
parser.end();
});
let byid = {};
for (let a of msg.attachments||[])
{
byid[a.contentId||''] = a;
}
msg.html = msg.html.replace(/(<img[^<>]*src=["']?)cid:([^'"\s]{1,256})/g, (m, m1, m2) =>
{
if (!byid[m2])
{
return m1 + 'cid:' + m2;
}
return m1 + 'data:' + byid[m2].contentType + ';base64,' + byid[m2].toString('base64');
});
let attachments = [];
for (let a of msg.attachments||[])
{
let hash = crypto.createHash('sha1');
hash.update(a.content);
let sha1 = hash.digest('hex');
let subdir = sha1.substr(0, 2)+'/'+sha1.substr(2, 2);
let filename = subdir+'/'+sha1+'.bin';
if (!await fsp.exists(this.files_path+'/'+filename))
{
if (!await fsp.exists(this.files_path+'/'+sha1.substr(0, 2)))
{
await fsp.mkdir(this.files_path+'/'+sha1.substr(0, 2));
}
if (!await fsp.exists(this.files_path+'/'+subdir))
{
await fsp.mkdir(this.files_path+'/'+subdir);
}
await fsp.writeFile(this.files_path+'/'+filename, a.content);
}
attachments.push({
id: a.contentId,
name: a.fileName,
mimetype: a.contentType,
size: a.length,
sha1,
filename,
});
}
msg.attachments = attachments;
return msg;
}
extractAttachments(struct, attachments)
@ -424,11 +483,11 @@ class Syncer
}
else if (struct[i].disposition && struct[i].disposition.type == 'attachment')
{
attachments.push([
mimelib.parseMimeWords(struct[i].disposition.params && struct[i].disposition.params.filename || struct[i].description || ''),
struct[i].type+'/'+struct[i].subtype,
struct[i].size,
]);
attachments.push({
name: mimelib.parseMimeWords(struct[i].disposition.params && struct[i].disposition.params.filename || struct[i].description || ''),
mimetype: struct[i].type+'/'+struct[i].subtype,
size: struct[i].size,
});
}
}
return attachments;
@ -538,12 +597,10 @@ class Syncer
async fetchFullMessage(account_id, folder_id, folder_name, msg_uid)
{
// FIXME: parse and save attachments
// FIXME: replace inline images
let srv = await this.imap.getConnection(account_id, folder_name);
let upd = await this.imap.runFetch(
srv, msg_uid, { bodies: '' },
async(messages, state) => await this._parseBody(messages, folder_id)
(messages, state) => this._parseBody(messages, folder_id)
);
this.imap.releaseConnection(account_id);
return upd;
@ -556,11 +613,20 @@ class Syncer
let msg = messages[i];
let obj = await this.parseMsg(msg[0].headers);
obj.html = sanitizeHtml(obj.html);
let upd = { body_text: obj.text||'', body_html: obj.html };
upd.body_html_text = obj.html.replace(/<style[^>]*>.*<\/style\s*>|<\/?[^>]*>/g, '');
await SQL.update(this.pg, 'messages m', upd, { folder_id: boxId, uid: msg[0].uid });
let upd = {
body_text: obj.text||'',
body_html: obj.html,
body_html_text: obj.html.replace(/<style[^>]*>.*<\/style\s*>|<\/?[^>]*>/g, ''),
};
/*await SQL.update(
this.pg, 'messages m', {
...upd,
'props = props || ?': [ { attachments: obj.attachments } ]
}, { folder_id: boxId, uid: msg[0].uid }
);*/
if (messages.length == 1)
{
upd.props = { attachments: obj.attachments };
return [ upd ];
}
}

View File

@ -251,7 +251,7 @@ class SyncerWeb
if (!msg.body_html && !msg.body_text)
{
let upd = await this.syncer.fetchFullMessage(msg.account_id, msg.folder_id, msg.folder_name, msg.uid);
return res.send({ msg: { ...msg, ...upd[0] } });
return res.send({ msg: { ...msg, ...upd[0], props: { ...msg.props, ...upd[0].props } } });
}
return res.send({ msg: msg });
}

View File

@ -0,0 +1,27 @@
begin;
create or replace function messages_fulltext(msg messages) returns tsvector
language plpgsql immutable as $$
begin
return setweight(to_tsvector('russian', regexp_replace(
coalesce(msg.props->>'from', '') || ' ' ||
coalesce(msg.props->>'replyto', '') || ' ' ||
coalesce(msg.props->>'to', '') || ' ' ||
coalesce(msg.props->>'cc', '') || ' ' ||
coalesce(msg.props->>'bcc', '') || ' ' ||
(select string_agg((a->>'name') || ' ' || (a->>'mimetype') || ' ' || (a->>'size'), ' ')
from jsonb_array_elements(coalesce(msg.props->'attachments', '[]'::jsonb)) as t (a)) || ' ' ||
msg.subject,
'\W+', ' ', 'g'
)), 'A')
|| setweight(to_tsvector('russian', msg.body_html_text || ' ' || msg.body_text), 'B');
end
$$;
update messages
set props = props || jsonb_build_object('attachments', (
select jsonb_agg(jsonb_build_object('name', a->>0, 'mimetype', a->>1, 'size', a->>2)) from jsonb_array_elements(props->'attachments') as t (a)
))
where jsonb_array_length(props->'attachments') > 0;
commit;

View File

@ -82,7 +82,8 @@ begin
coalesce(msg.props->>'to', '') || ' ' ||
coalesce(msg.props->>'cc', '') || ' ' ||
coalesce(msg.props->>'bcc', '') || ' ' ||
coalesce(msg.props->>'attachments', '') || ' ' ||
(select string_agg((a->>'name') || ' ' || (a->>'mimetype') || ' ' || (a->>'size'), ' ')
from jsonb_array_values(coalesce(msg.props->'attachments', '[]'::jsonb)) as t (a)) || ' ' ||
msg.subject,
'\W+', ' ', 'g'
)), 'A')

43
fsp.js Normal file
View File

@ -0,0 +1,43 @@
const fs = require('fs');
module.exports = {
async writeFile(path, content, options)
{
return await new Promise((ok, no) =>
{
fs.writeFile(path, content, options, err => err ? no(err) : ok());
});
},
async rename(from, to)
{
return await new Promise((ok, no) =>
{
fs.rename(from, to, err => err ? no(err) : ok());
});
},
async mkdir(path, options)
{
return await new Promise((ok, no) =>
{
fs.mkdir(path, options, err => err ? no(err) : ok());
});
},
async exists(path)
{
return await new Promise((ok, no) =>
{
fs.access(path, fs.constants.R_OK, err => err ? (err.code == 'ENOENT' ? ok(false) : no(err)) : ok(true));
});
},
async is_writable(path)
{
return await new Promise((ok, no) =>
{
fs.access(path, fs.constants.R_OK | fs.constants.W_OK, err => ok(!err));
});
},
};

View File

@ -61,7 +61,7 @@ async function startSync(cfg)
let syncerweb = new SyncerWeb(syncer, dbh, cfg);
await syncer.init(cfg);
syncerweb.listen(8057);
await syncer.syncAll();
//await syncer.syncAll();
}
let cfg = require('./cfg.json');