Main Page | Compound List | File List | Compound Members | File Members

MailFilter.C

00001 /*
00002   This email filter was written by Ian Kaplan, Bear Products
00003   International.  It is copyrighted by Ian Kaplan, 2004,
00004   www.bearcave.com.
00005 
00006   You have permission to use this software without restriction on two
00007   conditions:
00008 
00009     1. You must preserve this copyright notice in this software and
00010        any software derived from it.
00011 
00012     2. You accept any risk entailed in using this software.  By
00013        using this software, you acknowledge that you have a
00014        sophisticated background in software engineering and 
00015        understand the way this software functions.  You further
00016        acknowledge that using this software may result in the
00017        irretrievable loss of important e-email and you alone
00018        are responsible for this loss.
00019 
00020   If either of these conditions are unacceptable, you may not use any
00021   part of this software.
00022 
00023  */
00024 
00025 #include <stdlib.h>
00026 #include <ctype.h>
00027 #include <string.h>
00028 
00029 #ifdef _WIN32
00030 #include <io.h>      // for unlink on WIN32
00031 #include <process.h>
00032 #else
00033 #include <errno.h>
00034 #include <sys/types.h>
00035 #include <unistd.h>  // for unlink on UNIX
00036 #endif
00037 
00038 #include "SpamUtil.h"
00039 #include "MailHeader.h"
00040 #include "MailBody.h"
00041 #include "MailFilter.h"
00042 
00043 // for vector
00044 using namespace std;
00045 
00046 
00050 const char *MailFilter::getNewTempFileName()
00051 {
00052   const char* TEMP_NAME_ROOT = "mail_temp";
00053   const size_t BUF_SIZE = 64;;
00054   char *pBuf = new char[ BUF_SIZE ];
00055   
00056   int pid = getpid();
00057   // create a unique temporary file name
00058   mFileCount++;
00059   sprintf(pBuf, "%s_%d_%d", TEMP_NAME_ROOT, pid, mFileCount );
00060   fileNames.push_back( pBuf );
00061   return pBuf;
00062 } // getNewTempFileName
00063 
00064 
00065 
00066 FILE *MailFilter::openFile( const char *fileName, 
00067                             const char *mode,
00068                             const char *callingFunc )
00069 {
00070   char msgbuf[ 128];
00071 
00072   FILE *fp = fopen( fileName, mode );
00073   if (fp == 0) {
00074     char *err_reason = strerror( errno);
00075     sprintf(msgbuf, "error opening %s.  Reason = %s", fileName, err_reason );
00076     log.log(Logger::ERROR, callingFunc, msgbuf );
00077   }
00078   else {
00079     sprintf(msgbuf, "Opened file %s", fileName );
00080     log.log(Logger::DEBUG, callingFunc, msgbuf );
00081   }
00082   return fp;
00083 } // openFile
00084 
00085 
00086 void MailFilter::closeFile( FILE *fp, 
00087                             const char *fileName, 
00088                             const char *callingFunc )
00089 {
00090   char msgbuf[ 128];
00091   if (fp != 0) {
00092     fflush( fp );
00093     if (fclose( fp ) != 0) {
00094       char *err_reason = strerror( errno );
00095       sprintf(msgbuf, "Error closing %s.  Reason = %s", fileName, err_reason );
00096       log.log(Logger::ERROR, callingFunc, msgbuf );
00097     }
00098     else {
00099       sprintf(msgbuf, "Close file %s", fileName );
00100       log.log(Logger::DEBUG, callingFunc, msgbuf );
00101     }
00102   }
00103   else {
00104     log.log(Logger::DEBUG, callingFunc, "MailFilter::closeFile called with null file ptr.");
00105   }
00106 } // closeFile
00107 
00108 
00115 bool MailFilter::writeLine(const char *buf,
00116                            FILE *fp, 
00117                            const char *fileName, 
00118                            const char *callingFunc )
00119 {
00120   bool writeOK = true;
00121 
00122   if (fputs( buf, fp ) == EOF) {
00123     writeOK = false;
00124     char msgbuf[128];
00125     char *err_reason = strerror( errno );
00126     sprintf(msgbuf, "Error writing to %s.  Reason = %s", fileName, err_reason );
00127     log.log(Logger::ERROR, callingFunc, msgbuf );
00128   }
00129   return writeOK;
00130 } // writeLine
00131 
00132 
00137 const char *MailFilter::readLine(char *buf, 
00138                                  const size_t bufSize,
00139                                  FILE *fp, 
00140                                  const char *fileName, 
00141                                  const char *callingFunc )
00142 {
00143   char *inLine = 0;
00144   *buf = '\0';
00145   if ((inLine = fgets( buf, bufSize, fp )) == 0) {
00146     if (! feof(fp)) {
00147       char msgbuf[128];
00148       char *err_reason = strerror( errno );
00149       if (fileName != 0) {
00150         sprintf(msgbuf, "Error reading from %s.  Reason = %s", fileName, err_reason );
00151       }
00152       else if (fp == stdin) {
00153         sprintf(msgbuf, "Error reading from stdin.  Reason = %s", err_reason );
00154       }
00155       log.log(Logger::ERROR, callingFunc, msgbuf );
00156     }
00157   }
00158   return inLine;
00159 } // readLine
00160 
00161 
00166 MailFilter::~MailFilter()
00167 {
00168   size_t numFiles = fileNames.size();
00169   for (int i = 0; i < numFiles; i++) {
00170     char *pStr = fileNames[i];
00171     delete [] pStr;
00172   }
00173 } // ~MailFilter
00174 
00175 
00176 
00206 bool MailFilter::isFromLine(const char *buf)
00207 {
00208   static const char *FROM = "From ";
00209   static const size_t FROM_LEN = strlen( FROM );
00210   static const char *dow[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", 0 };
00211   static const char *mon[] =  {"Jan", "Feb", "Mar", "Apr",
00212                                "May", "Jun",  "Jul",  "Aug",  
00213                                "Sep",  "Oct",  "Nov", "Dec", 0 };
00214   char msgbuf[128];
00215   bool isFrom = false;
00216 
00217   if (buf != 0) {
00218     // find "From "
00219     if (strncmp(buf, FROM, FROM_LEN) == 0) {
00220       sprintf(msgbuf, "Found from line: %s", buf );
00221       log.log(Logger::DEBUG, "isFromLine", msgbuf);
00222       // find a day-of-the-week
00223       const char *pDOW = 0;
00224       for (const char **pDay = dow; *pDay != 0; pDay++) {
00225         if ((pDOW = strstr(buf, *pDay)) != 0) {
00226           break;
00227         }
00228       } // for
00229       if (pDOW != 0) {// if a day-of-the-week was found, find a month
00230         const char *pStartMonth = 0;
00231         for (const char **pMon = mon; *pMon != 0; pMon++) {
00232           if ((pStartMonth = strstr(pDOW+3, *pMon)) != 0) {
00233             break;
00234           }
00235         } // for
00236         // look for the day of the month (1..31)
00237         if (pStartMonth != 0) {
00238           const char *pDate = pStartMonth + 3;
00239           pDate = SpamUtil().skipWhiteSpace( pDate );
00240           if (isdigit(*pDate)) {
00241             int date = atoi( pDate );
00242             if (date >= 1 && date <= 31) {
00243               log.log(Logger::DEBUG, "isFromLine", "Found start of email");
00244               isFrom = true;
00245             }
00246           }
00247         }
00248       } // if pDow != 0
00249     }
00250   }
00251   return isFrom;
00252 } // isFromLine
00253 
00254 
00255 
00285 bool MailFilter::copyToTempFiles()
00286 {
00287   const char *mode = "w";
00288   char buf[ 1024 ];
00289   char msgbuf[ 128];
00290   bool copyOK = true;
00291   const char *inLine = 0;
00292   const char *fileName = 0;
00293   FILE *fp = 0;
00294   bool firstFrom = true;
00295 
00296   log.log(Logger::DEBUG, "copyToTempFiles", "enter");
00297 
00298   do {
00299     if ((inLine = readLine(buf, sizeof(buf), stdin, 0, "copyToTempFiles"))) {
00300       if (isFromLine( buf )) {
00301         if (firstFrom) {
00302           firstFrom = false;
00303         }
00304         else {
00305           closeFile( fp, fileName, "copyToTempFiles");
00306         }
00307         fileName = getNewTempFileName();
00308         fp = openFile( fileName, mode, "copyToTempFiles" );
00309       } // isFromLine
00310       if (fp) {
00311         if (! writeLine(buf, fp, fileName, "copyToTempFiles")) {
00312           copyOK = false;
00313           break;
00314         }
00315       }
00316     } // inLine = readLine
00317     else {
00318       if (! feof(stdin)) {
00319         copyOK = false;
00320       }
00321       else {
00322         closeFile( fp, fileName, "copyToTempFiles");
00323       }
00324     }
00325   } while (inLine != 0);
00326 
00327   log.log(Logger::DEBUG, "copyToTempFiles", "exit");
00328 
00329   return copyOK;
00330 } // copyToTempFiles
00331 
00332 
00333 
00341 void MailFilter::error_append_file( const char *srcfile, 
00342                                     const char *destfile)
00343 {
00344   static const char *SUBJECT = "subject";
00345   static size_t SUBJECT_LEN = strlen( SUBJECT );
00346   char msgbuf[ 128];
00347   const char *read_only = "r";
00348   const char *append = "a+";
00349   FILE *read_fp;
00350   FILE *write_fp;
00351 
00352   log.log(Logger::DEBUG, "error_append_file", "enter");
00353   
00354   if ((read_fp = fopen( srcfile, read_only )) != NULL) {
00355     if ((write_fp = fopen( destfile, append )) != NULL) {
00356       char line[ 4096 ];
00357       size_t amt_read;
00358       size_t amt_written;
00359 
00360       fprintf(write_fp, "\n");  // add a carriage return (blank line)
00361 
00362       while (fgets(line, sizeof(line), read_fp) != 0) {
00363         fputs(line, write_fp);
00364         // append "X-MailFilterError:" after the "Subject:" line
00365         if (SpamUtil().match(line, SUBJECT_LEN, SUBJECT)) {
00366           fprintf(write_fp, "X-MailFilterError:\n");
00367         }
00368       } // while 
00369 
00370       fclose( write_fp );
00371     }
00372     else {
00373       sprintf(msgbuf, "error opening file %s", destfile );
00374       log.log(Logger::ERROR, "error_append_file", msgbuf );
00375     }
00376     fclose( read_fp );
00377   }
00378   else {
00379     sprintf( msgbuf, "error opening file %s", srcfile );
00380     log.log(Logger::ERROR, "error_append_file", msgbuf );
00381   }
00382   log.log(Logger::DEBUG, "error_append_file", "exit");  
00383 } // error_append_file
00384 
00385 
00397 void MailFilter::append_file( const char *srcfile, 
00398                               const char *destfile)
00399 {
00400   char msgbuf[ 128];
00401   const char *read_only = "r";
00402   const char *append = "a+";
00403   FILE *read_fp;
00404   FILE *write_fp;
00405 
00406   log.log(Logger::DEBUG, "append_file", "enter");
00407   
00408   if ((read_fp = fopen( srcfile, read_only )) != NULL) {
00409     if ((write_fp = fopen( destfile, append )) != NULL) {
00410       char buf[ 4096 ];
00411       size_t amt_read;
00412       size_t amt_written;
00413 
00414       fprintf(write_fp, "\n");  // add a carriage return (blank line)
00415 
00416       while ((amt_read = fread(buf, 1, sizeof(buf), read_fp)) > 0) {
00417         amt_written = fwrite(buf, 1, amt_read, write_fp );
00418         if (amt_written < amt_read) {
00419           char *err_reason = strerror( errno );
00420           sprintf(msgbuf, "error writing file %s.  Reason = %s", destfile, err_reason);
00421           log.log(Logger::ERROR, "append_file", msgbuf );
00422         }
00423       } // while 
00424 
00425       fclose( write_fp );
00426     }
00427     else {
00428       char *err_reason = strerror( errno );
00429       sprintf(msgbuf, "append_file: error opening file %s.  Reason = %s", 
00430               destfile, err_reason );
00431       log.log(Logger::ERROR, "append_file", msgbuf );
00432     }
00433     fclose( read_fp );
00434   }
00435   else {
00436     char *err_reason = strerror( errno );
00437     sprintf( msgbuf, "append_file: error opening file %s.  Reason = %s", 
00438              srcfile, err_reason );
00439     log.log(Logger::ERROR, "append_file", msgbuf );
00440   }
00441   log.log(Logger::DEBUG, "append_file", "exit");
00442 }  // append_file
00443 
00444 
00445 
00466 MailFilter::classification MailFilter::checkMail(const char *tempFileName, 
00467                                                  SpamParameters &params,
00468                                                  HeaderInfo &headInfo)
00469 {
00470   const char *mode = "r";
00471   classification mailClass = EMAIL;
00472   char msgbuf[256];
00473   log.log(Logger::DEBUG, "checkMail", "enter");
00474 
00475   FILE *fp = openFile(tempFileName, mode, "checkMail");
00476   if (fp != NULL) {
00477     MailHeader headFilter( params, headInfo );
00478     mailClass = headFilter.checkHeader(fp);
00479     if (mailClass == UNKNOWN) {
00480       MailBody bodyFilter( params, headInfo );
00481       const char *boundaryStr = headFilter.getBoundaryStr();
00482       mailClass = bodyFilter.checkBody(boundaryStr, fp);
00483       headInfo.klass(mailClass);
00484     }
00485     fclose( fp );
00486   }
00487 
00488   log.log(Logger::DEBUG, "checkMail", "exit");
00489   return mailClass;
00490 } // checkMail
00491 
00492 
00513 MailFilter::MailFilter(SpamParameters &params) 
00514 {
00515   // file for email
00516   const char* INBOX          = "inbox";
00517   // fiile for email that is suspected of being spam
00518   const char* SPAM           = "junk_mail";
00519   // File for email that is "garbage".
00520   const char* GARBAGE_MAIL   = "garbage_mail";
00521 
00522   mFileCount = 0;
00523   log = pLogger->getLogger("MailFilter");
00524   log.log(Logger::DEBUG, "MailFilter", "enter");
00525 
00526   bool doGarbageTrace = params.hasFlag("trace_garbage") &&
00527                         (! params.hasFlag("keep_garbage"));
00528 
00529   // read mail file from stdin into one or more temporary file
00530   if (copyToTempFiles()) {
00531     size_t numFiles = fileNames.size();
00532     for (int i = 0; i < numFiles; i++) {
00533       const char *tempFileName = fileNames[i];
00534 
00535       HeaderInfo headInfo( doGarbageTrace );
00536 
00537       char msg[256];
00538       classification kind = checkMail(tempFileName, 
00539                                       params,
00540                                       headInfo);
00541 
00542       Logger::LogLevel mode;
00543 
00544       switch (kind) {
00545       case UNKNOWN:
00546         {
00547           // If the email is classified as "UNKNOWN" then something is
00548           // wrong.  But we don't want to lose the email, so append it
00549           // to the inbox.
00550           sprintf(msg, "email classified as UNKNOWN");
00551           append_file( tempFileName, INBOX );
00552           mode = Logger::ERROR;
00553         }
00554         break;
00555       case EMAIL: 
00556         {
00557           sprintf(msg, "Subject: %s added to mail in %s", 
00558                   headInfo.subject(), INBOX );
00559           append_file( tempFileName, INBOX );
00560           mode = Logger::DEBUG;
00561         }
00562         break;
00563       case SUSPECT: {
00564         sprintf(msg, "Subject: %s added to suspected spam in %s", 
00565                 headInfo.subject(), SPAM );
00566         append_file( tempFileName, SPAM );
00567         mode = Logger::DEBUG;
00568       }
00569         break;
00570       case GARBAGE: {
00571         if (params.hasFlag("keep_garbage")) {
00572           sprintf(msg, "Subject: %s is garbage, copied to %s", 
00573                   headInfo.subject(), GARBAGE_MAIL );
00574           append_file( tempFileName, GARBAGE_MAIL );
00575         }
00576         else {
00577           sprintf(msg, "Subject: %s deleted", headInfo.subject() );
00578         }
00579         mode = Logger::DEBUG;
00580       }
00581         break;
00582       case BAD_VALUE: { // something went wrong processing the e-mail
00583         sprintf(msg, "Mail filter error: Subject = %s", headInfo.subject() );
00584         // Append it to the inbox so it is not lost.  The error_append_file
00585         // function will add a marker to the file to indicate that there
00586         // was an error
00587         error_append_file( tempFileName, INBOX );
00588         mode = Logger::ERROR;
00589       }
00590         break;
00591       default: {
00592         sprintf(msg, "bad classification value" );
00593         mode = Logger::ERROR;
00594       }
00595         break;
00596       } // switch
00597 
00598       log.log( mode, "MailFilter", msg );
00599 
00600       if (! log.errorFound()) {
00601         // remove temporary file
00602         sprintf(msg, "removing %s", tempFileName );
00603         log.log(Logger::DEBUG, "MailFilter", msg );
00604         int unlinkRslt = unlink( tempFileName );
00605         if (unlinkRslt != 0) {
00606           sprintf(msg, "error unlinking %s.  Error = %s\n",
00607                   tempFileName, strerror(errno));
00608           log.log(Logger::ERROR, "MailFilter", msg );
00609         }
00610       }
00611       else {
00612         sprintf(msg, "email that caused the error is in %s", tempFileName );
00613         log.log(Logger::ERROR, "MailFilter", msg );
00614       }
00615     } // for
00616   } // if copyToTempFiles
00617 
00618   log.log(Logger::DEBUG, "MailFilter", "exit");
00619 } // MailFilter constructor

Generated on Sat Mar 27 13:07:38 2004 for Mail Filter by doxygen 1.3.3