File Manager
#! /usr/bin/crm
#
# test_svm.crm - test SVM filter
# Copyright 2009 William S. Yerazunis.
# This file is under GPLv3, as described in COPYING.
window
isolate (:index_file:) /index/ # files to run, in TREC index format
# that is, with one data per line like:
# "ham"|"spam" relative/file/name
isolate (:P:) <default> // # if set with --P, do by paragraphs
isolate (:L:) <default> /100000/ # run microgroomed learn modulo this cycle
isolate (:S:) <default> /0/ # number of msgs to skip off start of index
isolate (:N:) <default> /1000/
isolate (:M:) <default> /1000/
isolate (:ham_svm:) /ham.svm/
isolate (:spam_svm:) /spam.svm/
isolate (:ham_vs_spam:) /ham_vs_spam.svmhyp/
isolate (:total: :lino: :messago: :filo: :label: :stat:)
isolate (:nham:) /0/
isolate (:nspam:) /0/
isolate (:correct:) /0/
#output /N = ':*:N:'\n/
eval (:total:)/:*:M: /
input (:whole_index_file:) [:*:index_file:]
# Skip first :S: files
output /Skipping first :*:S: files.\n/
{
eval /:@: :*:S: - 1 :/ (:S:)
match <fromend> [:whole_index_file:] /[^\n]*\n/ (:lino:)
eval /:@: :*:S: > 0 :/
liaf
}
output /Learning the next :*:N: files.\n/
{
eval /:@: :*:N: > 0 :/
#output /N = ':*:N:'\n/
{
match <fromend> [:whole_index_file:] /[^\n]*\n/ (:lino:)
#output /:*:lino:/
match <nomultiline> [:lino:] /([[:graph:]]+)[[:space:]]+([[:graph:]]+)/(:: :label: :filo:)
input (:messago:) [:*:filo: 0 10000]
#output /learning file :*:filo:\n/
{
match /ham/ [:label:]
#output /This is a ham!\n/
eval (:nham:) /:@: :*:nham: + 1:/
{
match [:P:] /SET/
match [:messago:] <fromend> (:one_para:) \
/([[:graph:]]+[[:space:]]+){7}.*?\n\n/
learn (:*:ham_svm:) < svm unigram unique> \
[:one_para:] /[[:graph:]]+/ /0 0 100 1e-3 1 0.5 1/
liaf
}
alius
learn (:*:ham_svm:) < svm unigram unique > \
[:messago:] /[[:graph:]]+/ /0 0 100 1e-3 1 0.5 1/
}
alius
{
match /spam/ [:label:]
#output /This is a spam!\n/
eval (:nspam:) /:@: :*:nspam: + 1:/
{
match [:P:] /SET/
match [:messago:] <fromend> (:one_para:) \
/([[:graph:]]+[[:space:]]+){7}.*?\n\n/
learn (:*:spam_svm:) < svm unigram unique > \
[:one_para:] /[[:graph:]]+/ /0 0 100 1e-3 1 0.5 1/
liaf
}
alius
learn (:*:spam_svm:) < svm unigram unique > \
[:messago:] /[[:graph:]]+/ /0 0 100 1e-3 1 0.5 1/
}
}
eval (:N:) /:@: :*:N: - 1:/
{
eval /:@: :*:N: % :*:L: = 0 :/
learn (:*:ham_svm: |:*:spam_svm:|:*:ham_vs_spam:) \
< svm unigram unique microgroom > \
/[[:graph:]]+/ /0 0 100 1e-3 1 0.5 1.0/
output /*/
}
liaf
}
output /total number of training spam is :*:nspam:\ntotal number of training ham is :*:nham:\n/
output /Calculating ideal SVM to separate the classes.\n/
learn (:*:ham_svm: |:*:spam_svm:|:*:ham_vs_spam:) < svm unigram unique > /[[:graph:]]+/ /0 0 100 1e-3 1 0.5 1/
output /Starting classify on next :*:M: files.\n/
##start classify
{
eval /:@: :*:M: > 0 :/
{
match <fromend> [:whole_index_file:] /[^\n]*\n/ (:lino:)
output /start classify :*:lino:/
match <nomultiline> [:lino:] /([[:graph:]]+)[[:space:]]+([[:graph:]]+)/(:: :label: :filo:)
input (:messago:) [:*:filo: 0 10000]
{
classify (ham.svm|spam.svm|ham_vs_spam.svmhyp) (:stat:)[:messago:] < svm unigram unique > /[[:graph:]]+/ /0 0 100 1e-3 1 0.5 1/
}
output /:*:stat:\n/
{
match [:stat:] /succeeds/
match [:lino:] /ham/
eval (:correct:) /:@: :*:correct: + 1:/
}
alius
{
match [:stat:] /fails/
match [:lino:] /spam/
eval (:correct:) /:@: :*:correct: + 1:/
}
}
eval (:M:) /:@: :*:M: - 1:/
liaf
}
output /total=:*:total:\n/
output /correct=:*:correct:\n/
eval (:correct:) /:@: (:*:correct:) \/ (:*:total:) : /
eval (:correct:) /:@: :*:correct: * 100 :/
output /accuracy = :*:correct:%/
#output /total number of predicted texts = :*:total:,number of correctly classified texts = :*:correct:\n/
File Manager Version 1.0, Coded By Lucas
Email: hehe@yahoo.com