00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include <stdlib.h>
00026 #include <ctype.h>
00027 #include <string.h>
00028
00029 #ifdef _WIN32
00030 #include <io.h>
00031 #include <process.h>
00032 #else
00033 #include <errno.h>
00034 #include <sys/types.h>
00035 #include <unistd.h>
00036 #endif
00037
00038 #include "SpamUtil.h"
00039 #include "MailHeader.h"
00040 #include "MailBody.h"
00041 #include "MailFilter.h"
00042
00043
00044 using namespace std;
00045
00046
00050 const char *MailFilter::getNewTempFileName()
00051 {
00052 const char* TEMP_NAME_ROOT = "mail_temp";
00053 const size_t BUF_SIZE = 64;;
00054 char *pBuf = new char[ BUF_SIZE ];
00055
00056 int pid = getpid();
00057
00058 mFileCount++;
00059 sprintf(pBuf, "%s_%d_%d", TEMP_NAME_ROOT, pid, mFileCount );
00060 fileNames.push_back( pBuf );
00061 return pBuf;
00062 }
00063
00064
00065
00066 FILE *MailFilter::openFile( const char *fileName,
00067 const char *mode,
00068 const char *callingFunc )
00069 {
00070 char msgbuf[ 128];
00071
00072 FILE *fp = fopen( fileName, mode );
00073 if (fp == 0) {
00074 char *err_reason = strerror( errno);
00075 sprintf(msgbuf, "error opening %s. Reason = %s", fileName, err_reason );
00076 log.log(Logger::ERROR, callingFunc, msgbuf );
00077 }
00078 else {
00079 sprintf(msgbuf, "Opened file %s", fileName );
00080 log.log(Logger::DEBUG, callingFunc, msgbuf );
00081 }
00082 return fp;
00083 }
00084
00085
00086 void MailFilter::closeFile( FILE *fp,
00087 const char *fileName,
00088 const char *callingFunc )
00089 {
00090 char msgbuf[ 128];
00091 if (fp != 0) {
00092 fflush( fp );
00093 if (fclose( fp ) != 0) {
00094 char *err_reason = strerror( errno );
00095 sprintf(msgbuf, "Error closing %s. Reason = %s", fileName, err_reason );
00096 log.log(Logger::ERROR, callingFunc, msgbuf );
00097 }
00098 else {
00099 sprintf(msgbuf, "Close file %s", fileName );
00100 log.log(Logger::DEBUG, callingFunc, msgbuf );
00101 }
00102 }
00103 else {
00104 log.log(Logger::DEBUG, callingFunc, "MailFilter::closeFile called with null file ptr.");
00105 }
00106 }
00107
00108
00115 bool MailFilter::writeLine(const char *buf,
00116 FILE *fp,
00117 const char *fileName,
00118 const char *callingFunc )
00119 {
00120 bool writeOK = true;
00121
00122 if (fputs( buf, fp ) == EOF) {
00123 writeOK = false;
00124 char msgbuf[128];
00125 char *err_reason = strerror( errno );
00126 sprintf(msgbuf, "Error writing to %s. Reason = %s", fileName, err_reason );
00127 log.log(Logger::ERROR, callingFunc, msgbuf );
00128 }
00129 return writeOK;
00130 }
00131
00132
00137 const char *MailFilter::readLine(char *buf,
00138 const size_t bufSize,
00139 FILE *fp,
00140 const char *fileName,
00141 const char *callingFunc )
00142 {
00143 char *inLine = 0;
00144 *buf = '\0';
00145 if ((inLine = fgets( buf, bufSize, fp )) == 0) {
00146 if (! feof(fp)) {
00147 char msgbuf[128];
00148 char *err_reason = strerror( errno );
00149 if (fileName != 0) {
00150 sprintf(msgbuf, "Error reading from %s. Reason = %s", fileName, err_reason );
00151 }
00152 else if (fp == stdin) {
00153 sprintf(msgbuf, "Error reading from stdin. Reason = %s", err_reason );
00154 }
00155 log.log(Logger::ERROR, callingFunc, msgbuf );
00156 }
00157 }
00158 return inLine;
00159 }
00160
00161
00166 MailFilter::~MailFilter()
00167 {
00168 size_t numFiles = fileNames.size();
00169 for (int i = 0; i < numFiles; i++) {
00170 char *pStr = fileNames[i];
00171 delete [] pStr;
00172 }
00173 }
00174
00175
00176
00206 bool MailFilter::isFromLine(const char *buf)
00207 {
00208 static const char *FROM = "From ";
00209 static const size_t FROM_LEN = strlen( FROM );
00210 static const char *dow[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", 0 };
00211 static const char *mon[] = {"Jan", "Feb", "Mar", "Apr",
00212 "May", "Jun", "Jul", "Aug",
00213 "Sep", "Oct", "Nov", "Dec", 0 };
00214 char msgbuf[128];
00215 bool isFrom = false;
00216
00217 if (buf != 0) {
00218
00219 if (strncmp(buf, FROM, FROM_LEN) == 0) {
00220 sprintf(msgbuf, "Found from line: %s", buf );
00221 log.log(Logger::DEBUG, "isFromLine", msgbuf);
00222
00223 const char *pDOW = 0;
00224 for (const char **pDay = dow; *pDay != 0; pDay++) {
00225 if ((pDOW = strstr(buf, *pDay)) != 0) {
00226 break;
00227 }
00228 }
00229 if (pDOW != 0) {
00230 const char *pStartMonth = 0;
00231 for (const char **pMon = mon; *pMon != 0; pMon++) {
00232 if ((pStartMonth = strstr(pDOW+3, *pMon)) != 0) {
00233 break;
00234 }
00235 }
00236
00237 if (pStartMonth != 0) {
00238 const char *pDate = pStartMonth + 3;
00239 pDate = SpamUtil().skipWhiteSpace( pDate );
00240 if (isdigit(*pDate)) {
00241 int date = atoi( pDate );
00242 if (date >= 1 && date <= 31) {
00243 log.log(Logger::DEBUG, "isFromLine", "Found start of email");
00244 isFrom = true;
00245 }
00246 }
00247 }
00248 }
00249 }
00250 }
00251 return isFrom;
00252 }
00253
00254
00255
00285 bool MailFilter::copyToTempFiles()
00286 {
00287 const char *mode = "w";
00288 char buf[ 1024 ];
00289 char msgbuf[ 128];
00290 bool copyOK = true;
00291 const char *inLine = 0;
00292 const char *fileName = 0;
00293 FILE *fp = 0;
00294 bool firstFrom = true;
00295
00296 log.log(Logger::DEBUG, "copyToTempFiles", "enter");
00297
00298 do {
00299 if ((inLine = readLine(buf, sizeof(buf), stdin, 0, "copyToTempFiles"))) {
00300 if (isFromLine( buf )) {
00301 if (firstFrom) {
00302 firstFrom = false;
00303 }
00304 else {
00305 closeFile( fp, fileName, "copyToTempFiles");
00306 }
00307 fileName = getNewTempFileName();
00308 fp = openFile( fileName, mode, "copyToTempFiles" );
00309 }
00310 if (fp) {
00311 if (! writeLine(buf, fp, fileName, "copyToTempFiles")) {
00312 copyOK = false;
00313 break;
00314 }
00315 }
00316 }
00317 else {
00318 if (! feof(stdin)) {
00319 copyOK = false;
00320 }
00321 else {
00322 closeFile( fp, fileName, "copyToTempFiles");
00323 }
00324 }
00325 } while (inLine != 0);
00326
00327 log.log(Logger::DEBUG, "copyToTempFiles", "exit");
00328
00329 return copyOK;
00330 }
00331
00332
00333
00341 void MailFilter::error_append_file( const char *srcfile,
00342 const char *destfile)
00343 {
00344 static const char *SUBJECT = "subject";
00345 static size_t SUBJECT_LEN = strlen( SUBJECT );
00346 char msgbuf[ 128];
00347 const char *read_only = "r";
00348 const char *append = "a+";
00349 FILE *read_fp;
00350 FILE *write_fp;
00351
00352 log.log(Logger::DEBUG, "error_append_file", "enter");
00353
00354 if ((read_fp = fopen( srcfile, read_only )) != NULL) {
00355 if ((write_fp = fopen( destfile, append )) != NULL) {
00356 char line[ 4096 ];
00357 size_t amt_read;
00358 size_t amt_written;
00359
00360 fprintf(write_fp, "\n");
00361
00362 while (fgets(line, sizeof(line), read_fp) != 0) {
00363 fputs(line, write_fp);
00364
00365 if (SpamUtil().match(line, SUBJECT_LEN, SUBJECT)) {
00366 fprintf(write_fp, "X-MailFilterError:\n");
00367 }
00368 }
00369
00370 fclose( write_fp );
00371 }
00372 else {
00373 sprintf(msgbuf, "error opening file %s", destfile );
00374 log.log(Logger::ERROR, "error_append_file", msgbuf );
00375 }
00376 fclose( read_fp );
00377 }
00378 else {
00379 sprintf( msgbuf, "error opening file %s", srcfile );
00380 log.log(Logger::ERROR, "error_append_file", msgbuf );
00381 }
00382 log.log(Logger::DEBUG, "error_append_file", "exit");
00383 }
00384
00385
00397 void MailFilter::append_file( const char *srcfile,
00398 const char *destfile)
00399 {
00400 char msgbuf[ 128];
00401 const char *read_only = "r";
00402 const char *append = "a+";
00403 FILE *read_fp;
00404 FILE *write_fp;
00405
00406 log.log(Logger::DEBUG, "append_file", "enter");
00407
00408 if ((read_fp = fopen( srcfile, read_only )) != NULL) {
00409 if ((write_fp = fopen( destfile, append )) != NULL) {
00410 char buf[ 4096 ];
00411 size_t amt_read;
00412 size_t amt_written;
00413
00414 fprintf(write_fp, "\n");
00415
00416 while ((amt_read = fread(buf, 1, sizeof(buf), read_fp)) > 0) {
00417 amt_written = fwrite(buf, 1, amt_read, write_fp );
00418 if (amt_written < amt_read) {
00419 char *err_reason = strerror( errno );
00420 sprintf(msgbuf, "error writing file %s. Reason = %s", destfile, err_reason);
00421 log.log(Logger::ERROR, "append_file", msgbuf );
00422 }
00423 }
00424
00425 fclose( write_fp );
00426 }
00427 else {
00428 char *err_reason = strerror( errno );
00429 sprintf(msgbuf, "append_file: error opening file %s. Reason = %s",
00430 destfile, err_reason );
00431 log.log(Logger::ERROR, "append_file", msgbuf );
00432 }
00433 fclose( read_fp );
00434 }
00435 else {
00436 char *err_reason = strerror( errno );
00437 sprintf( msgbuf, "append_file: error opening file %s. Reason = %s",
00438 srcfile, err_reason );
00439 log.log(Logger::ERROR, "append_file", msgbuf );
00440 }
00441 log.log(Logger::DEBUG, "append_file", "exit");
00442 }
00443
00444
00445
00466 MailFilter::classification MailFilter::checkMail(const char *tempFileName,
00467 SpamParameters ¶ms,
00468 HeaderInfo &headInfo)
00469 {
00470 const char *mode = "r";
00471 classification mailClass = EMAIL;
00472 char msgbuf[256];
00473 log.log(Logger::DEBUG, "checkMail", "enter");
00474
00475 FILE *fp = openFile(tempFileName, mode, "checkMail");
00476 if (fp != NULL) {
00477 MailHeader headFilter( params, headInfo );
00478 mailClass = headFilter.checkHeader(fp);
00479 if (mailClass == UNKNOWN) {
00480 MailBody bodyFilter( params, headInfo );
00481 const char *boundaryStr = headFilter.getBoundaryStr();
00482 mailClass = bodyFilter.checkBody(boundaryStr, fp);
00483 headInfo.klass(mailClass);
00484 }
00485 fclose( fp );
00486 }
00487
00488 log.log(Logger::DEBUG, "checkMail", "exit");
00489 return mailClass;
00490 }
00491
00492
00513 MailFilter::MailFilter(SpamParameters ¶ms)
00514 {
00515
00516 const char* INBOX = "inbox";
00517
00518 const char* SPAM = "junk_mail";
00519
00520 const char* GARBAGE_MAIL = "garbage_mail";
00521
00522 mFileCount = 0;
00523 log = pLogger->getLogger("MailFilter");
00524 log.log(Logger::DEBUG, "MailFilter", "enter");
00525
00526 bool doGarbageTrace = params.hasFlag("trace_garbage") &&
00527 (! params.hasFlag("keep_garbage"));
00528
00529
00530 if (copyToTempFiles()) {
00531 size_t numFiles = fileNames.size();
00532 for (int i = 0; i < numFiles; i++) {
00533 const char *tempFileName = fileNames[i];
00534
00535 HeaderInfo headInfo( doGarbageTrace );
00536
00537 char msg[256];
00538 classification kind = checkMail(tempFileName,
00539 params,
00540 headInfo);
00541
00542 Logger::LogLevel mode;
00543
00544 switch (kind) {
00545 case UNKNOWN:
00546 {
00547
00548
00549
00550 sprintf(msg, "email classified as UNKNOWN");
00551 append_file( tempFileName, INBOX );
00552 mode = Logger::ERROR;
00553 }
00554 break;
00555 case EMAIL:
00556 {
00557 sprintf(msg, "Subject: %s added to mail in %s",
00558 headInfo.subject(), INBOX );
00559 append_file( tempFileName, INBOX );
00560 mode = Logger::DEBUG;
00561 }
00562 break;
00563 case SUSPECT: {
00564 sprintf(msg, "Subject: %s added to suspected spam in %s",
00565 headInfo.subject(), SPAM );
00566 append_file( tempFileName, SPAM );
00567 mode = Logger::DEBUG;
00568 }
00569 break;
00570 case GARBAGE: {
00571 if (params.hasFlag("keep_garbage")) {
00572 sprintf(msg, "Subject: %s is garbage, copied to %s",
00573 headInfo.subject(), GARBAGE_MAIL );
00574 append_file( tempFileName, GARBAGE_MAIL );
00575 }
00576 else {
00577 sprintf(msg, "Subject: %s deleted", headInfo.subject() );
00578 }
00579 mode = Logger::DEBUG;
00580 }
00581 break;
00582 case BAD_VALUE: {
00583 sprintf(msg, "Mail filter error: Subject = %s", headInfo.subject() );
00584
00585
00586
00587 error_append_file( tempFileName, INBOX );
00588 mode = Logger::ERROR;
00589 }
00590 break;
00591 default: {
00592 sprintf(msg, "bad classification value" );
00593 mode = Logger::ERROR;
00594 }
00595 break;
00596 }
00597
00598 log.log( mode, "MailFilter", msg );
00599
00600 if (! log.errorFound()) {
00601
00602 sprintf(msg, "removing %s", tempFileName );
00603 log.log(Logger::DEBUG, "MailFilter", msg );
00604 int unlinkRslt = unlink( tempFileName );
00605 if (unlinkRslt != 0) {
00606 sprintf(msg, "error unlinking %s. Error = %s\n",
00607 tempFileName, strerror(errno));
00608 log.log(Logger::ERROR, "MailFilter", msg );
00609 }
00610 }
00611 else {
00612 sprintf(msg, "email that caused the error is in %s", tempFileName );
00613 log.log(Logger::ERROR, "MailFilter", msg );
00614 }
00615 }
00616 }
00617
00618 log.log(Logger::DEBUG, "MailFilter", "exit");
00619 }