File Manager
--[[
Copyright (c) 2015, Vsevolod Stakhov <vsevolod@highsecure.ru>
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
]]--
if confighelp then
return
end
-- This plugin is intended to read and parse spamassassin rules with regexp
-- rules. SA plugins or statistics are not supported
local E = {}
local N = 'spamassassin'
local rspamd_logger = require "rspamd_logger"
local rspamd_regexp = require "rspamd_regexp"
local rspamd_expression = require "rspamd_expression"
local rspamd_trie = require "rspamd_trie"
local util = require "rspamd_util"
local lua_util = require "lua_util"
local fun = require "fun"
-- Known plugins
local known_plugins = {
'Mail::SpamAssassin::Plugin::FreeMail',
'Mail::SpamAssassin::Plugin::HeaderEval',
'Mail::SpamAssassin::Plugin::ReplaceTags',
'Mail::SpamAssassin::Plugin::RelayEval',
'Mail::SpamAssassin::Plugin::MIMEEval',
'Mail::SpamAssassin::Plugin::BodyEval',
'Mail::SpamAssassin::Plugin::MIMEHeader',
'Mail::SpamAssassin::Plugin::WLBLEval',
'Mail::SpamAssassin::Plugin::HTMLEval',
}
-- Table that replaces SA symbol with rspamd equivalent
-- Used for dependency resolution
local symbols_replacements = {
-- SPF replacements
USER_IN_SPF_WHITELIST = 'WHITELIST_SPF',
USER_IN_DEF_SPF_WL = 'WHITELIST_SPF',
SPF_PASS = 'R_SPF_ALLOW',
SPF_FAIL = 'R_SPF_FAIL',
SPF_SOFTFAIL = 'R_SPF_SOFTFAIL',
SPF_HELO_PASS = 'R_SPF_ALLOW',
SPF_HELLO_FAIL = 'R_SPF_FAIL',
SPF_HELLO_SOFTFAIL = 'R_SPF_SOFTFAIL',
-- DKIM replacements
USER_IN_DKIM_WHITELIST = 'WHITELIST_DKIM',
USER_IN_DEF_DKIM_WL = 'WHITELIST_DKIM',
DKIM_VALID = 'R_DKIM_ALLOW',
-- SURBL replacements
URIBL_SBL_A = 'URIBL_SBL',
URIBL_DBL_SPAM = 'DBL_SPAM',
URIBL_DBL_PHISH = 'DBL_PHISH',
URIBL_DBL_MALWARE = 'DBL_MALWARE',
URIBL_DBL_BOTNETCC = 'DBL_BOTNET',
URIBL_DBL_ABUSE_SPAM = 'DBL_ABUSE',
URIBL_DBL_ABUSE_REDIR = 'DBL_ABUSE_REDIR',
URIBL_DBL_ABUSE_MALW = 'DBL_ABUSE_MALWARE',
URIBL_DBL_ABUSE_BOTCC = 'DBL_ABUSE_BOTNET',
URIBL_WS_SURBL = 'WS_SURBL_MULTI',
URIBL_PH_SURBL = 'PH_SURBL_MULTI',
URIBL_MW_SURBL = 'MW_SURBL_MULTI',
URIBL_CR_SURBL = 'CRACKED_SURBL',
URIBL_ABUSE_SURBL = 'ABUSE_SURBL',
-- Misc rules
BODY_URI_ONLY = 'R_EMPTY_IMAGE',
HTML_IMAGE_ONLY_04 = 'HTML_SHORT_LINK_IMG_1',
HTML_IMAGE_ONLY_08 = 'HTML_SHORT_LINK_IMG_1',
HTML_IMAGE_ONLY_12 = 'HTML_SHORT_LINK_IMG_1',
HTML_IMAGE_ONLY_16 = 'HTML_SHORT_LINK_IMG_2',
HTML_IMAGE_ONLY_20 = 'HTML_SHORT_LINK_IMG_2',
HTML_IMAGE_ONLY_24 = 'HTML_SHORT_LINK_IMG_3',
HTML_IMAGE_ONLY_28 = 'HTML_SHORT_LINK_IMG_3',
HTML_IMAGE_ONLY_32 = 'HTML_SHORT_LINK_IMG_3',
}
-- Internal variables
local rules = {}
local atoms = {}
local scores = {}
local scores_added = {}
local external_deps = {}
local freemail_domains = {}
local pcre_only_regexps = {}
local freemail_trie
local replace = {
tags = {},
pre = {},
inter = {},
post = {},
rules = {},
}
local internal_regexp = {
date_shift = rspamd_regexp.create("^\\(\\s*'((?:-?\\d+)|(?:undef))'\\s*,\\s*'((?:-?\\d+)|(?:undef))'\\s*\\)$")
}
-- Mail::SpamAssassin::Plugin::WLBLEval plugin
local sa_lists = {
from_blacklist = {},
from_whitelist = {},
from_def_whitelist = {},
to_blacklist = {},
to_whitelist = {},
elts = 0,
}
local func_cache = {}
local section = rspamd_config:get_all_opt("spamassassin")
if not (section and type(section) == 'table') then
rspamd_logger.infox(rspamd_config, 'Module is unconfigured')
end
-- Minimum score to treat symbols as meta
local meta_score_alpha = 0.5
-- Maximum size of regexp checked
local match_limit = 0
local function split(str, delim)
local result = {}
if not delim then
delim = '[^%s]+'
end
for token in string.gmatch(str, delim) do
table.insert(result, token)
end
return result
end
local function replace_symbol(s)
local rspamd_symbol = symbols_replacements[s]
if not rspamd_symbol then
return s, false
end
return rspamd_symbol, true
end
local ffi
if type(jit) == 'table' then
ffi = require("ffi")
ffi.cdef[[
int rspamd_re_cache_type_from_string (const char *str);
int rspamd_re_cache_process_ffi (void *ptask,
void *pre,
int type,
const char *type_data,
int is_strong);
]]
end
local function process_regexp_opt(re, task, re_type, header, strong)
--[[
-- This is now broken with lua regexp conditions!
if type(jit) == 'table' then
-- Use ffi call
local itype = ffi.C.rspamd_re_cache_type_from_string(re_type)
if not strong then
strong = 0
else
strong = 1
end
local iret = ffi.C.rspamd_re_cache_process_ffi (task, re, itype, header, strong)
return tonumber(iret)
else
return task:process_regexp(re, re_type, header, strong)
end
--]]
return task:process_regexp(re, re_type, header, strong)
end
local function is_pcre_only(name)
if pcre_only_regexps[name] then
rspamd_logger.infox(rspamd_config, 'mark re %s as PCRE only', name)
return true
end
return false
end
local function handle_header_def(hline, cur_rule)
--Now check for modifiers inside header's name
local hdrs = split(hline, '[^|]+')
local hdr_params = {}
local cur_param = {}
-- Check if an re is an ordinary re
local ordinary = true
for _,h in ipairs(hdrs) do
if h == 'ALL' or h == 'ALL:raw' then
ordinary = false
cur_rule['type'] = 'function'
-- Pack closure
local re = cur_rule['re']
-- Rule to match all headers
rspamd_config:register_regexp({
re = re,
type = 'allheader',
pcre_only = is_pcre_only(cur_rule['symbol']),
})
cur_rule['function'] = function(task)
if not re then
rspamd_logger.errx(task, 're is missing for rule %1', h)
return 0
end
return process_regexp_opt(re, task, 'allheader')
end
else
local args = split(h, '[^:]+')
cur_param['strong'] = false
cur_param['raw'] = false
cur_param['header'] = args[1]
if args[2] then
-- We have some ops that are required for the header, so it's not ordinary
ordinary = false
end
fun.each(function(func)
if func == 'addr' then
cur_param['function'] = function(str)
local addr_parsed = util.parse_mail_address(str)
local ret = {}
if addr_parsed then
for _,elt in ipairs(addr_parsed) do
if elt['addr'] then
table.insert(ret, elt['addr'])
end
end
end
return ret
end
elseif func == 'name' then
cur_param['function'] = function(str)
local addr_parsed = util.parse_mail_address(str)
local ret = {}
if addr_parsed then
for _,elt in ipairs(addr_parsed) do
if elt['name'] then
table.insert(ret, elt['name'])
end
end
end
return ret
end
elseif func == 'raw' then
cur_param['raw'] = true
elseif func == 'case' then
cur_param['strong'] = true
else
rspamd_logger.warnx(rspamd_config, 'Function %1 is not supported in %2',
func, cur_rule['symbol'])
end
end, fun.tail(args))
local function split_hdr_param(param, headers)
for _,hh in ipairs(headers) do
local nparam = {}
for k,v in pairs(param) do
if k ~= 'header' then
nparam[k] = v
end
end
nparam['header'] = hh
table.insert(hdr_params, nparam)
end
end
-- Some header rules require splitting to check of multiple headers
if cur_param['header'] == 'MESSAGEID' then
-- Special case for spamassassin
ordinary = false
split_hdr_param(cur_param, {
'Message-ID',
'X-Message-ID',
'Resent-Message-ID'})
elseif cur_param['header'] == 'ToCc' then
ordinary = false
split_hdr_param(cur_param, { 'To', 'Cc', 'Bcc' })
else
table.insert(hdr_params, cur_param)
end
end
cur_rule['ordinary'] = ordinary
cur_rule['header'] = hdr_params
end
end
local function freemail_search(input)
local res = 0
local function trie_callback(number, pos)
lua_util.debugm(N, rspamd_config, 'Matched pattern %1 at pos %2', freemail_domains[number], pos)
res = res + 1
end
if input then
freemail_trie:match(input, trie_callback, true)
end
return res
end
local function gen_eval_rule(arg)
local eval_funcs = {
{'check_freemail_from', function(task)
local from = task:get_from('mime')
if from and from[1] then
return freemail_search(string.lower(from[1]['addr']))
end
return 0
end},
{'check_freemail_replyto',
function(task)
return freemail_search(task:get_header('Reply-To'))
end
},
{'check_freemail_header',
function(task, remain)
-- Remain here contains one or two args: header and regexp to match
local larg = string.match(remain, "^%(%s*['\"]([^%s]+)['\"]%s*%)$")
local re = nil
if not larg then
larg, re = string.match(remain, "^%(%s*['\"]([^%s]+)['\"]%s*,%s*['\"]([^%s]+)['\"]%s*%)$")
end
if larg then
local h
if larg == 'EnvelopeFrom' then
h = task:get_from('smtp')
if h then h = h[1]['addr'] end
else
h = task:get_header(larg)
end
if h then
local hdr_freemail = freemail_search(string.lower(h))
if hdr_freemail > 0 and re then
local r = rspamd_regexp.create_cached(re)
if r then
if r:match(h) then
return 1
end
return 0
else
rspamd_logger.infox(rspamd_config, 'cannot create regexp %1', re)
return 0
end
end
return hdr_freemail
end
end
return 0
end
},
{
'check_for_missing_to_header',
function (task)
local th = task:get_recipients('mime')
if not th or #th == 0 then
return 1
end
return 0
end
},
{
'check_relays_unparseable',
function(task)
local rh_mime = task:get_header_full('Received')
local rh_parsed = task:get_received_headers()
local rh_cnt = 0
if rh_mime then rh_cnt = #rh_mime end
local parsed_cnt = 0
if rh_parsed then parsed_cnt = #rh_parsed end
return rh_cnt - parsed_cnt
end
},
{
'check_for_shifted_date',
function (task, remain)
-- Remain here contains two args: start and end hours shift
local matches = internal_regexp['date_shift']:search(remain, true, true)
if matches and matches[1] then
local min_diff = matches[1][2]
local max_diff = matches[1][3]
if min_diff == 'undef' then
min_diff = 0
else
min_diff = tonumber(min_diff) * 3600
end
if max_diff == 'undef' then
max_diff = 0
else
max_diff = tonumber(max_diff) * 3600
end
-- Now get the difference between Date and message received date
local dm = task:get_date { format = 'message', gmt = true}
local dt = task:get_date { format = 'connect', gmt = true}
local diff = dm - dt
if (max_diff == 0 and diff >= min_diff) or
(min_diff == 0 and diff <= max_diff) or
(diff >= min_diff and diff <= max_diff) then
return 1
end
end
return 0
end
},
{
'check_for_mime',
function(task, remain)
local larg = string.match(remain, "^%(%s*['\"]([^%s]+)['\"]%s*%)$")
if larg then
if larg == 'mime_attachment' then
local parts = task:get_parts()
if parts then
for _,p in ipairs(parts) do
if p:get_filename() then
return 1
end
end
end
else
rspamd_logger.infox(task, 'unimplemented mime check %1', arg)
end
end
return 0
end
},
{
'check_from_in_blacklist',
function(task)
local from = task:get_from('mime')
if ((from or E)[1] or E).addr then
if sa_lists['from_blacklist'][string.lower(from[1]['addr'])] then
return 1
end
end
return 0
end
},
{
'check_from_in_whitelist',
function(task)
local from = task:get_from('mime')
if ((from or E)[1] or E).addr then
if sa_lists['from_whitelist'][string.lower(from[1]['addr'])] then
return 1
end
end
return 0
end
},
{
'check_from_in_default_whitelist',
function(task)
local from = task:get_from('mime')
if ((from or E)[1] or E).addr then
if sa_lists['from_def_whitelist'][string.lower(from[1]['addr'])] then
return 1
end
end
return 0
end
},
{
'check_to_in_blacklist',
function(task)
local rcpt = task:get_recipients('mime')
if rcpt then
for _,r in ipairs(rcpt) do
if sa_lists['to_blacklist'][string.lower(r['addr'])] then
return 1
end
end
end
return 0
end
},
{
'check_to_in_whitelist',
function(task)
local rcpt = task:get_recipients('mime')
if rcpt then
for _,r in ipairs(rcpt) do
if sa_lists['to_whitelist'][string.lower(r['addr'])] then
return 1
end
end
end
return 0
end
},
{
'html_tag_exists',
function(task, remain)
local tp = task:get_text_parts()
for _,p in ipairs(tp) do
if p:is_html() then
local hc = p:get_html()
if hc:has_tag(remain) then
return 1
end
end
end
return 0
end
}
}
for _,f in ipairs(eval_funcs) do
local pat = string.format('^%s', f[1])
local first,last = string.find(arg, pat)
if first then
local func_arg = string.sub(arg, last + 1)
return function(task)
return f[2](task, func_arg)
end
end
end
end
-- Returns parser function or nil
local function maybe_parse_sa_function(line)
local arg
local elts = split(line, '[^:]+')
arg = elts[2]
lua_util.debugm(N, rspamd_config, 'trying to parse SA function %1 with args %2',
elts[1], elts[2])
local substitutions = {
{'^exists:',
function(task) -- filter
local hdrs_check
if arg == 'MESSAGEID' then
hdrs_check = {
'Message-ID',
'X-Message-ID',
'Resent-Message-ID'
}
elseif arg == 'ToCc' then
hdrs_check = { 'To', 'Cc', 'Bcc' }
else
hdrs_check = {arg}
end
for _,h in ipairs(hdrs_check) do
if task:has_header(h) then
return 1
end
end
return 0
end,
},
{'^eval:',
function(task)
local func = func_cache[arg]
if not func then
func = gen_eval_rule(arg)
func_cache[arg] = func
end
if not func then
rspamd_logger.errx(task, 'cannot find appropriate eval rule for function %1',
arg)
else
return func(task)
end
return 0
end
},
}
for _,s in ipairs(substitutions) do
if string.find(line, s[1]) then
return s[2]
end
end
return nil
end
local function words_to_re(words, start)
return table.concat(fun.totable(fun.drop_n(start, words)), " ");
end
local function process_tflags(rule, flags)
fun.each(function(flag)
if flag == 'publish' then
rule['publish'] = true
elseif flag == 'multiple' then
rule['multiple'] = true
elseif string.match(flag, '^maxhits=(%d+)$') then
rule['maxhits'] = tonumber(string.match(flag, '^maxhits=(%d+)$'))
elseif flag == 'nice' then
rule['nice'] = true
end
end, fun.drop_n(1, flags))
if rule['re'] then
if rule['maxhits'] then
rule['re']:set_max_hits(rule['maxhits'])
elseif rule['multiple'] then
rule['re']:set_max_hits(0)
else
rule['re']:set_max_hits(1)
end
end
end
local function process_replace(words, tbl)
local re = words_to_re(words, 2)
tbl[words[2]] = re
end
local function process_sa_conf(f)
local cur_rule = {}
local valid_rule = false
local function insert_cur_rule()
if cur_rule['type'] ~= 'meta' and cur_rule['publish'] then
-- Create meta rule from this rule
local nsym = '__fake' .. cur_rule['symbol']
local nrule = {
type = 'meta',
symbol = cur_rule['symbol'],
score = cur_rule['score'],
meta = nsym,
description = cur_rule['description'],
}
rules[nrule['symbol']] = nrule
cur_rule['symbol'] = nsym
end
-- We have previous rule valid
if not cur_rule['symbol'] then
rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
end
rules[cur_rule['symbol']] = cur_rule
cur_rule = {}
valid_rule = false
end
local function parse_score(words)
if #words == 3 then
-- score rule <x>
lua_util.debugm(N, rspamd_config, 'found score for %1: %2', words[2], words[3])
return tonumber(words[3])
elseif #words == 6 then
-- score rule <x1> <x2> <x3> <x4>
-- we assume here that bayes and network are enabled and select <x4>
lua_util.debugm(N, rspamd_config, 'found score for %1: %2', words[2], words[6])
return tonumber(words[6])
else
rspamd_logger.errx(rspamd_config, 'invalid score for %1', words[2])
end
return 0
end
local skip_to_endif = false
local if_nested = 0
for l in f:lines() do
(function ()
l = lua_util.rspamd_str_trim(l)
-- Replace bla=~/re/ with bla =~ /re/ (#2372)
l = l:gsub('([^%s])%s*([=!]~)%s*([^%s])', '%1 %2 %3')
if string.len(l) == 0 or string.sub(l, 1, 1) == '#' then
return
end
-- Unbalanced if/endif
if if_nested < 0 then if_nested = 0 end
if skip_to_endif then
if string.match(l, '^endif') then
if_nested = if_nested - 1
if if_nested == 0 then
skip_to_endif = false
end
elseif string.match(l, '^if') then
if_nested = if_nested + 1
elseif string.match(l, '^else') then
-- Else counterpart for if
skip_to_endif = false
end
return
else
if string.match(l, '^ifplugin') then
local ls = split(l)
if not fun.any(function(pl)
if pl == ls[2] then return true end
return false
end, known_plugins) then
skip_to_endif = true
end
if_nested = if_nested + 1
elseif string.match(l, '^if !plugin%(') then
local pname = string.match(l, '^if !plugin%(([A-Za-z:]+)%)')
if fun.any(function(pl)
if pl == pname then return true end
return false
end, known_plugins) then
skip_to_endif = true
end
if_nested = if_nested + 1
elseif string.match(l, '^if') then
-- Unknown if
skip_to_endif = true
if_nested = if_nested + 1
elseif string.match(l, '^else') then
-- Else counterpart for if
skip_to_endif = true
elseif string.match(l, '^endif') then
if_nested = if_nested - 1
end
end
-- Skip comments
local words = fun.totable(fun.take_while(
function(w) return string.sub(w, 1, 1) ~= '#' end,
fun.filter(function(w)
return w ~= "" end,
fun.iter(split(l)))))
if words[1] == "header" or words[1] == 'mimeheader' then
-- header SYMBOL Header ~= /regexp/
if valid_rule then
insert_cur_rule()
end
if words[4] and (words[4] == '=~' or words[4] == '!~') then
cur_rule['type'] = 'header'
cur_rule['symbol'] = words[2]
if words[4] == '!~' then
cur_rule['not'] = true
end
cur_rule['re_expr'] = words_to_re(words, 4)
local unset_comp = string.find(cur_rule['re_expr'], '%s+%[if%-unset:')
if unset_comp then
-- We have optional part that needs to be processed
local unset = string.match(string.sub(cur_rule['re_expr'], unset_comp),
'%[if%-unset:%s*([^%]%s]+)]')
cur_rule['unset'] = unset
-- Cut it down
cur_rule['re_expr'] = string.sub(cur_rule['re_expr'], 1, unset_comp - 1)
end
cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
if not cur_rule['re'] then
rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%1' for %2",
cur_rule['re_expr'], cur_rule['symbol'])
else
cur_rule['re']:set_max_hits(1)
handle_header_def(words[3], cur_rule)
end
if cur_rule['unset'] then
cur_rule['ordinary'] = false
end
if words[1] == 'mimeheader' then
cur_rule['mime'] = true
else
cur_rule['mime'] = false
end
if cur_rule['re'] and cur_rule['symbol'] and
(cur_rule['header'] or cur_rule['function']) then
valid_rule = true
cur_rule['re']:set_max_hits(1)
if cur_rule['header'] and cur_rule['ordinary'] then
for _,h in ipairs(cur_rule['header']) do
if type(h) == 'string' then
if cur_rule['mime'] then
rspamd_config:register_regexp({
re = cur_rule['re'],
type = 'mimeheader',
header = h,
pcre_only = is_pcre_only(cur_rule['symbol']),
})
else
rspamd_config:register_regexp({
re = cur_rule['re'],
type = 'header',
header = h,
pcre_only = is_pcre_only(cur_rule['symbol']),
})
end
else
h['mime'] = cur_rule['mime']
if cur_rule['mime'] then
rspamd_config:register_regexp({
re = cur_rule['re'],
type = 'mimeheader',
header = h['header'],
pcre_only = is_pcre_only(cur_rule['symbol']),
})
else
if h['raw'] then
rspamd_config:register_regexp({
re = cur_rule['re'],
type = 'rawheader',
header = h['header'],
pcre_only = is_pcre_only(cur_rule['symbol']),
})
else
rspamd_config:register_regexp({
re = cur_rule['re'],
type = 'header',
header = h['header'],
pcre_only = is_pcre_only(cur_rule['symbol']),
})
end
end
end
end
cur_rule['re']:set_limit(match_limit)
cur_rule['re']:set_max_hits(1)
end
end
else
-- Maybe we know the function and can convert it
local args = words_to_re(words, 2)
local func = maybe_parse_sa_function(args)
if func then
cur_rule['type'] = 'function'
cur_rule['symbol'] = words[2]
cur_rule['function'] = func
valid_rule = true
else
rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
end
end
elseif words[1] == "body" then
-- body SYMBOL /regexp/
if valid_rule then
insert_cur_rule()
end
cur_rule['symbol'] = words[2]
if words[3] and (string.sub(words[3], 1, 1) == '/'
or string.sub(words[3], 1, 1) == 'm') then
cur_rule['type'] = 'sabody'
cur_rule['re_expr'] = words_to_re(words, 2)
cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
if cur_rule['re'] then
rspamd_config:register_regexp({
re = cur_rule['re'],
type = 'sabody',
pcre_only = is_pcre_only(cur_rule['symbol']),
})
valid_rule = true
cur_rule['re']:set_limit(match_limit)
cur_rule['re']:set_max_hits(1)
end
else
-- might be function
local args = words_to_re(words, 2)
local func = maybe_parse_sa_function(args)
if func then
cur_rule['type'] = 'function'
cur_rule['symbol'] = words[2]
cur_rule['function'] = func
valid_rule = true
else
rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
end
end
elseif words[1] == "rawbody" then
-- body SYMBOL /regexp/
if valid_rule then
insert_cur_rule()
end
cur_rule['symbol'] = words[2]
if words[3] and (string.sub(words[3], 1, 1) == '/'
or string.sub(words[3], 1, 1) == 'm') then
cur_rule['type'] = 'sarawbody'
cur_rule['re_expr'] = words_to_re(words, 2)
cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
if cur_rule['re'] then
rspamd_config:register_regexp({
re = cur_rule['re'],
type = 'sarawbody',
pcre_only = is_pcre_only(cur_rule['symbol']),
})
valid_rule = true
cur_rule['re']:set_limit(match_limit)
cur_rule['re']:set_max_hits(1)
end
else
-- might be function
local args = words_to_re(words, 2)
local func = maybe_parse_sa_function(args)
if func then
cur_rule['type'] = 'function'
cur_rule['symbol'] = words[2]
cur_rule['function'] = func
valid_rule = true
else
rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
end
end
elseif words[1] == "full" then
-- body SYMBOL /regexp/
if valid_rule then
insert_cur_rule()
end
cur_rule['symbol'] = words[2]
if words[3] and (string.sub(words[3], 1, 1) == '/'
or string.sub(words[3], 1, 1) == 'm') then
cur_rule['type'] = 'message'
cur_rule['re_expr'] = words_to_re(words, 2)
cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
cur_rule['raw'] = true
if cur_rule['re'] then
valid_rule = true
rspamd_config:register_regexp({
re = cur_rule['re'],
type = 'body',
pcre_only = is_pcre_only(cur_rule['symbol']),
})
cur_rule['re']:set_limit(match_limit)
cur_rule['re']:set_max_hits(1)
end
else
-- might be function
local args = words_to_re(words, 2)
local func = maybe_parse_sa_function(args)
if func then
cur_rule['type'] = 'function'
cur_rule['symbol'] = words[2]
cur_rule['function'] = func
valid_rule = true
else
rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
end
end
elseif words[1] == "uri" then
-- uri SYMBOL /regexp/
if valid_rule then
insert_cur_rule()
end
cur_rule['type'] = 'uri'
cur_rule['symbol'] = words[2]
cur_rule['re_expr'] = words_to_re(words, 2)
cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
if cur_rule['re'] and cur_rule['symbol'] then
valid_rule = true
rspamd_config:register_regexp({
re = cur_rule['re'],
type = 'url',
pcre_only = is_pcre_only(cur_rule['symbol']),
})
cur_rule['re']:set_limit(match_limit)
cur_rule['re']:set_max_hits(1)
end
elseif words[1] == "meta" then
-- meta SYMBOL expression
if valid_rule then
insert_cur_rule()
end
cur_rule['type'] = 'meta'
cur_rule['symbol'] = words[2]
cur_rule['meta'] = words_to_re(words, 2)
if cur_rule['meta'] and cur_rule['symbol']
and cur_rule['meta'] ~= '0' then
valid_rule = true
end
elseif words[1] == "describe" and valid_rule then
cur_rule['description'] = words_to_re(words, 2)
elseif words[1] == "score" then
scores[words[2]] = parse_score(words)
elseif words[1] == 'freemail_domains' then
fun.each(function(dom)
table.insert(freemail_domains, '@' .. dom)
end, fun.drop_n(1, words))
elseif words[1] == 'blacklist_from' then
sa_lists['from_blacklist'][words[2]] = 1
sa_lists['elts'] = sa_lists['elts'] + 1
elseif words[1] == 'whitelist_from' then
sa_lists['from_whitelist'][words[2]] = 1
sa_lists['elts'] = sa_lists['elts'] + 1
elseif words[1] == 'whitelist_to' then
sa_lists['to_whitelist'][words[2]] = 1
sa_lists['elts'] = sa_lists['elts'] + 1
elseif words[1] == 'blacklist_to' then
sa_lists['to_blacklist'][words[2]] = 1
sa_lists['elts'] = sa_lists['elts'] + 1
elseif words[1] == 'tflags' then
process_tflags(cur_rule, words)
elseif words[1] == 'replace_tag' then
process_replace(words, replace['tags'])
elseif words[1] == 'replace_pre' then
process_replace(words, replace['pre'])
elseif words[1] == 'replace_inter' then
process_replace(words, replace['inter'])
elseif words[1] == 'replace_post' then
process_replace(words, replace['post'])
elseif words[1] == 'replace_rules' then
fun.each(function(r) table.insert(replace['rules'], r) end,
fun.drop_n(1, words))
end
end)()
end
if valid_rule then
insert_cur_rule()
end
end
-- Now check all valid rules and add the according rspamd rules
local function calculate_score(sym, rule)
if fun.all(function(c) return c == '_' end, fun.take_n(2, fun.iter(sym))) then
return 0.0
end
if rule['nice'] or (rule['score'] and rule['score'] < 0.0) then
return -1.0
end
return 1.0
end
local function add_sole_meta(sym, rule)
local r = {
type = 'meta',
meta = rule['symbol'],
score = rule['score'],
description = rule['description']
}
rules[sym] = r
end
local function sa_regexp_match(data, re, raw, rule)
local res = 0
if not re then
return 0
end
if rule['multiple'] then
local lim = -1
if rule['maxhits'] then
lim = rule['maxhits']
end
res = res + re:matchn(data, lim, raw)
else
if re:match(data, raw) then res = 1 end
end
return res
end
local function apply_replacements(str)
local pre = ""
local post = ""
local inter = ""
local function check_specific_tag(prefix, s, tbl)
local replacement = nil
local ret = s
fun.each(function(n, t)
local ns,matches = string.gsub(s, string.format("<%s%s>", prefix, n), "")
if matches > 0 then
replacement = t
ret = ns
end
end, tbl)
return ret,replacement
end
local repl
str,repl = check_specific_tag("pre ", str, replace['pre'])
if repl then
pre = repl
end
str,repl = check_specific_tag("inter ", str, replace['inter'])
if repl then
inter = repl
end
str,repl = check_specific_tag("post ", str, replace['post'])
if repl then
post = repl
end
-- XXX: ugly hack
if inter then
str = string.gsub(str, "><", string.format(">%s<", inter))
end
local function replace_all_tags(s)
local sstr
sstr = s
fun.each(function(n, t)
local rep = string.format("%s%s%s", pre, t, post)
rep = string.gsub(rep, '%%', '%%%%')
sstr = string.gsub(sstr, string.format("<%s>", n), rep)
end, replace['tags'])
return sstr
end
local s = replace_all_tags(str)
if str ~= s then
return true,s
end
return false,str
end
local function parse_atom(str)
local atom = table.concat(fun.totable(fun.take_while(function(c)
if string.find(', \t()><+!|&\n', c) then
return false
end
return true
end, fun.iter(str))), '')
return atom
end
local function gen_process_atom_cb(result_name, task)
return function (atom)
local atom_cb = atoms[atom]
if atom_cb then
local res = atom_cb(task, result_name)
if not res then
lua_util.debugm(N, task, 'metric: %s, atom: %s, NULL result', result_name, atom)
elseif res > 0 then
lua_util.debugm(N, task, 'metric: %s, atom: %s, result: %s', result_name, atom, res)
end
return res
else
-- This is likely external atom
local real_sym = atom
if symbols_replacements[atom] then
real_sym = symbols_replacements[atom]
end
if task:has_symbol(real_sym, result_name) then
lua_util.debugm(N, task, 'external atom: %s, result: 1, named_result: %s', real_sym, result_name)
return 1
end
lua_util.debugm(N, task, 'external atom: %s, result: 0, , named_result: %s', real_sym, result_name)
end
return 0
end
end
local function post_process()
-- Replace rule tags
local ntags = {}
local function rec_replace_tags(tag, tagv)
if ntags[tag] then return ntags[tag] end
fun.each(function(n, t)
if n ~= tag then
local s, matches = string.gsub(tagv, string.format("<%s>", n), t)
if matches > 0 then
ntags[tag] = rec_replace_tags(tag, s)
end
end
end, replace['tags'])
if not ntags[tag] then ntags[tag] = tagv end
return ntags[tag]
end
fun.each(function(n, t)
rec_replace_tags(n, t)
end, replace['tags'])
fun.each(function(n, t)
replace['tags'][n] = t
end, ntags)
fun.each(function(r)
local rule = rules[r]
if rule['re_expr'] and rule['re'] then
local res, nexpr = apply_replacements(rule['re_expr'])
if res then
local nre = rspamd_regexp.create(nexpr)
if not nre then
rspamd_logger.errx(rspamd_config, 'cannot apply replacement for rule %1', r)
--rule['re'] = nil
else
local old_max_hits = rule['re']:get_max_hits()
lua_util.debugm(N, rspamd_config, 'replace %1 -> %2', r, nexpr)
rspamd_config:replace_regexp({
old_re = rule['re'],
new_re = nre,
pcre_only = is_pcre_only(rule['symbol']),
})
rule['re'] = nre
rule['re_expr'] = nexpr
nre:set_limit(match_limit)
nre:set_max_hits(old_max_hits)
end
end
end
end, replace['rules'])
fun.each(function(key, score)
if rules[key] then
rules[key]['score'] = score
end
end, scores)
-- Header rules
fun.each(function(k, r)
local f = function(task)
local raw = false
local check = {}
-- Cached path for ordinary expressions
if r['ordinary'] then
local h = r['header'][1]
local t = 'header'
if h['raw'] then
t = 'rawheader'
end
if not r['re'] then
rspamd_logger.errx(task, 're is missing for rule %1 (%2 header)', k,
h['header'])
return 0
end
local ret = process_regexp_opt(r.re, task, t, h.header, h.strong)
if r['not'] then
if ret ~= 0 then
ret = 0
else
ret = 1
end
end
return ret
end
-- Slow path
fun.each(function(h)
local hname = h['header']
local hdr
if h['mime'] then
local parts = task:get_parts()
for _, p in ipairs(parts) do
local m_hdr = p:get_header_full(hname, h['strong'])
if m_hdr then
if not hdr then
hdr = {}
end
for _, mh in ipairs(m_hdr) do
table.insert(hdr, mh)
end
end
end
else
hdr = task:get_header_full(hname, h['strong'])
end
if hdr then
for _, rh in ipairs(hdr) do
-- Subject for optimization
local str
if h['raw'] then
str = rh['value']
raw = true
else
str = rh['decoded']
end
if not str then return 0 end
if h['function'] then
str = h['function'](str)
end
if type(str) == 'string' then
table.insert(check, str)
else
for _, c in ipairs(str) do
table.insert(check, c)
end
end
end
elseif r['unset'] then
table.insert(check, r['unset'])
end
end, r['header'])
if #check == 0 then
if r['not'] then return 1 end
return 0
end
local ret = 0
for _, c in ipairs(check) do
local match = sa_regexp_match(c, r['re'], raw, r)
if (match > 0 and not r['not']) or (match == 0 and r['not']) then
ret = 1
end
end
return ret
end
if r['score'] then
local real_score = r['score'] * calculate_score(k, r)
if math.abs(real_score) > meta_score_alpha then
add_sole_meta(k, r)
end
end
atoms[k] = f
end,
fun.filter(function(_, r)
return r['type'] == 'header' and r['header']
end,
rules))
-- Custom function rules
fun.each(function(k, r)
local f = function(task)
local res = r['function'](task)
if res and res > 0 then
return res
end
return 0
end
if r['score'] then
local real_score = r['score'] * calculate_score(k, r)
if math.abs(real_score) > meta_score_alpha then
add_sole_meta(k, r)
end
end
atoms[k] = f
end,
fun.filter(function(_, r)
return r['type'] == 'function' and r['function']
end,
rules))
-- Parts rules
fun.each(function(k, r)
local f = function(task)
if not r['re'] then
rspamd_logger.errx(task, 're is missing for rule %1', k)
return 0
end
local t = 'mime'
if r['raw'] then t = 'rawmime' end
return process_regexp_opt(r.re, task, t)
end
if r['score'] then
local real_score = r['score'] * calculate_score(k, r)
if math.abs(real_score) > meta_score_alpha then
add_sole_meta(k, r)
end
end
atoms[k] = f
end,
fun.filter(function(_, r)
return r['type'] == 'part'
end, rules))
-- SA body rules
fun.each(function(k, r)
local f = function(task)
if not r['re'] then
rspamd_logger.errx(task, 're is missing for rule %1', k)
return 0
end
local t = r['type']
local ret = process_regexp_opt(r.re, task, t)
return ret
end
if r['score'] then
local real_score = r['score'] * calculate_score(k, r)
if math.abs(real_score) > meta_score_alpha then
add_sole_meta(k, r)
end
end
atoms[k] = f
end,
fun.filter(function(_, r)
return r['type'] == 'sabody' or r['type'] == 'message' or r['type'] == 'sarawbody'
end, rules))
-- URL rules
fun.each(function(k, r)
local f = function(task)
if not r['re'] then
rspamd_logger.errx(task, 're is missing for rule %1', k)
return 0
end
return process_regexp_opt(r.re, task, 'url')
end
if r['score'] then
local real_score = r['score'] * calculate_score(k, r)
if math.abs(real_score) > meta_score_alpha then
add_sole_meta(k, r)
end
end
atoms[k] = f
end,
fun.filter(function(_, r)
return r['type'] == 'uri'
end,
rules))
-- Meta rules
fun.each(function(k, r)
local expression = nil
-- Meta function callback
-- Here are dragons!
-- This function can be called from 2 DIFFERENT type of invocations:
-- 1) Invocation from Rspamd itself where `res_name` will be nil
-- 2) Invocation from other meta during expression:process_traced call
-- So we need to distinguish that and return different stuff to be able to deal with atoms
local meta_cb = function(task, res_name)
lua_util.debugm(N, task, 'meta callback for %s; result name: %s', k, res_name)
local cached = task:cache_get('sa_metas_processed')
-- We avoid many task methods invocations here (likely)
if not cached then
cached = {}
task:cache_set('sa_metas_processed', cached)
end
local already_processed = cached[k]
-- Exclude elements that are named in the same way as the symbol itself
local function exclude_sym_filter(sopt)
return sopt ~= k
end
if not (already_processed and already_processed[res_name or 'default']) then
-- Execute symbol
local function exec_symbol(cur_res)
local res,trace = expression:process_traced(gen_process_atom_cb(cur_res, task))
lua_util.debugm(N, task, 'meta result for %s: %s; result name: %s', k, res, cur_res)
if res > 0 then
-- Symbol should be one shot to make it working properly
task:insert_result_named(cur_res, k, res, fun.totable(fun.filter(exclude_sym_filter, trace)))
end
if not cached[k] then
cached[k] = {}
end
cached[k][cur_res] = res
end
if not res_name then
-- Invoke for all named results
local named_results = task:get_all_named_results()
for _,cur_res in ipairs(named_results) do
exec_symbol(cur_res)
end
else
-- Invoked from another meta
exec_symbol(res_name)
return cached[k][res_name] or 0
end
else
-- We have cached the result
local res = already_processed[res_name or 'default'] or 0
lua_util.debugm(N, task, 'cached meta result for %s: %s; result name: %s',
k, res, res_name)
if res_name then
return res
end
end
-- No return if invoked directly from Rspamd as we use task:insert_result_named directly
end
expression = rspamd_expression.create(r['meta'], parse_atom, rspamd_config:get_mempool())
if not expression then
rspamd_logger.errx(rspamd_config, 'Cannot parse expression ' .. r['meta'])
else
if r['score'] then
rspamd_config:set_metric_symbol{
name = k, score = r['score'],
description = r['description'],
priority = 2,
one_shot = true
}
scores_added[k] = 1
rspamd_config:register_symbol{
name = k,
weight = calculate_score(k, r),
callback = meta_cb
}
else
-- Add 0 score to avoid issues
rspamd_config:register_symbol{
name = k,
weight = calculate_score(k, r),
callback = meta_cb,
score = 0,
}
end
r['expression'] = expression
if not atoms[k] then
atoms[k] = meta_cb
end
end
end,
fun.filter(function(_, r)
return r['type'] == 'meta'
end,
rules))
-- Check meta rules for foreign symbols and register dependencies
-- First direct dependencies:
fun.each(function(k, r)
if r['expression'] then
local expr_atoms = r['expression']:atoms()
for _,a in ipairs(expr_atoms) do
if not atoms[a] then
local rspamd_symbol = replace_symbol(a)
if not external_deps[k] then
external_deps[k] = {}
end
if not external_deps[k][rspamd_symbol] then
rspamd_config:register_dependency(k, rspamd_symbol)
external_deps[k][rspamd_symbol] = true
lua_util.debugm(N, rspamd_config,
'atom %1 is a direct foreign dependency, ' ..
'register dependency for %2 on %3',
a, k, rspamd_symbol)
end
end
end
end
end,
fun.filter(function(_, r)
return r['type'] == 'meta'
end,
rules))
-- ... And then indirect ones ...
local nchanges
repeat
nchanges = 0
fun.each(function(k, r)
if r['expression'] then
local expr_atoms = r['expression']:atoms()
for _,a in ipairs(expr_atoms) do
if type(external_deps[a]) == 'table' then
for dep in pairs(external_deps[a]) do
if not external_deps[k] then
external_deps[k] = {}
end
if not external_deps[k][dep] then
rspamd_config:register_dependency(k, dep)
external_deps[k][dep] = true
lua_util.debugm(N, rspamd_config,
'atom %1 is an indirect foreign dependency, ' ..
'register dependency for %2 on %3',
a, k, dep)
nchanges = nchanges + 1
end
end
else
local rspamd_symbol, replaced_symbol = replace_symbol(a)
if replaced_symbol then
external_deps[a] = {[rspamd_symbol] = true}
else
external_deps[a] = {}
end
end
end
end
end,
fun.filter(function(_, r)
return r['type'] == 'meta'
end,
rules))
until nchanges == 0
-- Set missing symbols
fun.each(function(key, score)
if not scores_added[key] then
rspamd_config:set_metric_symbol({
name = key, score = score,
priority = 2, flags = 'ignore'})
end
end, scores)
-- Logging output
if freemail_domains then
freemail_trie = rspamd_trie.create(freemail_domains)
rspamd_logger.infox(rspamd_config, 'loaded %1 freemail domains definitions',
#freemail_domains)
end
rspamd_logger.infox(rspamd_config, 'loaded %1 blacklist/whitelist elements',
sa_lists['elts'])
end
local has_rules = false
if type(section) == "table" then
if type(section.pcre_only) == 'table' then
pcre_only_regexps = lua_util.list_to_hash(section.pcre_only)
end
if type(section.alpha) == 'number' then
meta_score_alpha = section.alpha
end
if type(section.match_limit) == 'number' then
match_limit = section.match_limit
end
for k, fn in pairs(section) do
if k ~= 'pcre_only' and k ~= 'alpha' and k ~= 'match_limit' then
if type(fn) == 'table' then
for _, elt in ipairs(fn) do
local files = util.glob(elt)
if not files or #files == 0 then
rspamd_logger.errx(rspamd_config, "cannot find any files matching pattern %s", elt)
else
for _,matched in ipairs(files) do
local f = io.open(matched, "r")
if f then
rspamd_logger.infox(rspamd_config, 'loading SA rules from %s', matched)
process_sa_conf(f)
has_rules = true
else
rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
end
end
end
end
else
-- assume string
local files = util.glob(fn)
if not files or #files == 0 then
rspamd_logger.errx(rspamd_config, "cannot find any files matching pattern %s", fn)
else
for _,matched in ipairs(files) do
local f = io.open(matched, "r")
if f then
rspamd_logger.infox(rspamd_config, 'loading SA rules from %s', matched)
process_sa_conf(f)
has_rules = true
else
rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
end
end
end
end
end
end
end
if has_rules then
post_process()
else
lua_util.disable_module(N, "config")
end
File Manager Version 1.0, Coded By Lucas
Email: hehe@yahoo.com