00001
00037 #include "MailBody.h"
00038
00045 MailBody::MailBody( SpamParameters ¶m, HeaderInfo &headInfo ) :
00046 mParams( param ), mHeadInfo( headInfo )
00047 {
00048 log = pLogger->getLogger("MailBody");
00049 }
00050
00051
00072 MailBody::lineType MailBody::getLine(char *buf,
00073 const size_t bufSize,
00074 FILE *fp,
00075 const char *boundary)
00076 {
00077 lineType type = EndOfFile;
00078
00079 char *pStr;
00080
00081 do {
00082 pStr = fgets(buf, bufSize, fp);
00083 } while (pStr != 0 && SpamUtil().isBlankLine(buf));
00084
00085 if (pStr != 0) {
00086
00087 type = LINE;
00088 if (boundary != 0) {
00089 if (buf[0] == '-' && buf[1] == '-') {
00090 if (strstr(buf+2, boundary) != 0) {
00091 type = BOUNDARY;
00092 }
00093 }
00094 }
00095
00096 if (type != BOUNDARY) {
00097 SpamUtil().toLower(buf, buf, bufSize);
00098 }
00099 }
00100
00101 return type;
00102 }
00103
00104
00112 MailBody::lineType MailBody::getHtmlLine(char *buf,
00113 const size_t bufSize,
00114 FILE *fp,
00115 const char *boundary)
00116 {
00117 char *tmpBuf = new char[ bufSize ];
00118 lineType type = EndOfFile;
00119
00120 *buf = '\0';
00121
00122 char *pStr;
00123
00124 do {
00125 pStr = fgets(tmpBuf, bufSize, fp);
00126 } while (pStr != 0 && SpamUtil().isBlankLine(tmpBuf));
00127
00128 if (pStr != 0) {
00129
00130 type = LINE;
00131 if (boundary != 0) {
00132 if (tmpBuf[0] == '-' && tmpBuf[1] == '-') {
00133 if (strstr(tmpBuf+2, boundary) != 0) {
00134 type = BOUNDARY;
00135 }
00136 }
00137 }
00138
00139 if (type != BOUNDARY) {
00140 size_t i = 0;
00141 bool copyChar = true;
00142 for (size_t j = 0; j < bufSize-1 && tmpBuf[j] != '\0'; j++) {
00143 char ch = tmpBuf[j];
00144 if (ch == '<') {
00145 copyChar = false;
00146 }
00147 else if (ch == '>') {
00148 copyChar = true;
00149 }
00150 else if (copyChar) {
00151 buf[i] = tolower(ch);
00152 i++;
00153 }
00154 }
00155 buf[i] = '\0';
00156 }
00157 }
00158 delete [] tmpBuf;
00159
00160 return type;
00161 }
00162
00163
00170 MailBody::lineType MailBody::findSection( const char *boundary, FILE *fp )
00171 {
00172
00173 const size_t BUF_SIZE = 128;
00174 char buf[ BUF_SIZE ];
00175
00176 lineType ty;
00177
00178 while ((ty = getLine(buf, sizeof(buf), fp, boundary )) != MailBody::EndOfFile) {
00179 if (ty == BOUNDARY) {
00180 break;
00181 }
00182 }
00183
00184 return ty;
00185 }
00186
00187
00188
00204 SpamUtil::contentType MailBody::classifyMailSection(FILE *fp)
00205 {
00206 SpamUtil::contentType type = SpamUtil::UNKNOWN;
00207 char buf[128];
00208 char *bufPtr;
00209
00210
00211 if ((bufPtr = fgets(buf, sizeof(buf), fp)) != 0) {
00212 if (SpamUtil().isBlankLine(buf)) {
00213 type = SpamUtil::BLANK;
00214 }
00215 else {
00216 static const char *CONTENT_TYPE = "Content-Type:";
00217 static const size_t content_typeLen = strlen( CONTENT_TYPE );
00218 const char *ptr;
00219
00220
00221 if ((ptr = strstr(buf, CONTENT_TYPE)) != 0) {
00222 type = SpamUtil().classifySection( ptr+content_typeLen );
00223
00224 if ((bufPtr = fgets(buf, sizeof(buf), fp)) != 0) {
00225
00226 if (SpamUtil().findColon( bufPtr ) == 0) {
00227
00228 if (type == SpamUtil::TEXT) {
00229 static const char *CHARSET = "charset";
00230 static const size_t charsetLen = strlen( CHARSET );
00231 if ((ptr = strstr(buf, "charset")) != 0) {
00232 if (strstr(ptr+charsetLen, "Windows") != 0) {
00233 type = SpamUtil::WINDOZ;
00234 }
00235 }
00236 }
00237 bufPtr = fgets(buf, sizeof(buf), fp);
00238 }
00239 if (bufPtr != 0) {
00240
00241 if (strstr(buf, "base64") != 0) {
00242 type = SpamUtil::BASE64;
00243 }
00244 }
00245 }
00246 }
00247 }
00248 }
00249 else {
00250 log.log(Logger::ERROR, "classifyMailSection", "line expected after boundary");
00251 }
00252
00253 return type;
00254 }
00255
00256
00257
00262 void MailBody::mailBodyMsg( MailFilter::classification klass,
00263 const char *foundStr,
00264 const char *funcName )
00265 {
00266 char msg[256];
00267 char startMsg[64];
00268 const char *pMsg = msg;
00269
00270 const char *klassStr = "email";
00271 if (klass == MailFilter::SUSPECT) {
00272 klassStr = "suspect";
00273 }
00274 else if (klass == MailFilter::GARBAGE) {
00275 klassStr = "garbage";
00276 }
00277 sprintf(startMsg, "email classified as %s", klassStr);
00278 if (foundStr[0] != '\0') {
00279 sprintf(msg, "%s, found \"%s\"", startMsg, foundStr );
00280 }
00281 else {
00282 pMsg = startMsg;
00283 }
00284 log.log(Logger::DEBUG, funcName, pMsg );
00285 }
00286
00287
00288
00346 MailFilter::classification MailBody::processBySection(const char *boundary,
00347 FILE *fp)
00348 {
00349 MailFilter::classification klass = MailFilter::UNKNOWN;
00350 log.log(Logger::DEBUG, "processBySection", "enter");
00351
00352 log.log(Logger::DEBUG, "processBySection", "processing first section");
00353
00354 if (findSection(boundary, fp) == BOUNDARY) {
00355 char foundStr[128];
00356 foundStr[0] = '\0';
00357
00358 log.log(Logger::DEBUG, "processBySection", "found boundary");
00359
00360 SpamUtil::contentType type = classifyMailSection(fp);
00361 if (type == SpamUtil::TEXT) {
00362 char buf[4096];
00363
00364 bool foundNonBlankLine = false;
00365 MailBody::lineType lineTy;
00366 while ((lineTy = getLine(buf, sizeof(buf), fp, boundary)) == LINE) {
00367 klass = SpamUtil().checkLine(buf,
00368 mParams,
00369 foundStr,
00370 sizeof(foundStr));
00371 foundNonBlankLine = true;
00372 if (klass != MailFilter::UNKNOWN) {
00373 mHeadInfo.reason(foundStr);
00374 break;
00375 }
00376 }
00377
00378
00379
00380
00381 if (! foundNonBlankLine) {
00382 log.log(Logger::DEBUG, "processBySection", "found blank text section");
00383 klass = MailFilter::SUSPECT;
00384 }
00385 else {
00386 char msg[128];
00387 sprintf(msg, "first section type = %s", SpamUtil().typeToStr( type ));
00388 log.log(Logger::DEBUG, "processBySection", msg );
00389 }
00390
00391
00392
00393
00394 if (lineTy == BOUNDARY) {
00395 log.log(Logger::DEBUG, "processBySection", "found second section boundary" );
00396 SpamUtil::contentType secondSecType = classifyMailSection(fp);
00397 type = secondSecType;
00398 char msg[128];
00399 sprintf(msg, "second section type = %s", SpamUtil().typeToStr( secondSecType ));
00400 log.log(Logger::DEBUG, "processBySection", msg );
00401
00402 if (secondSecType == SpamUtil::HTML && (foundNonBlankLine)) {
00403
00404
00405
00406
00407 log.log(Logger::DEBUG, "processBySection", "processing HTML section" );
00408 while ((lineTy = getHtmlLine(buf, sizeof(buf), fp, boundary)) == LINE) {
00409 klass = SpamUtil().checkLine(buf,
00410 mParams,
00411 foundStr,
00412 sizeof(foundStr));
00413 if (klass != MailFilter::UNKNOWN) {
00414 mHeadInfo.reason(foundStr);
00415 break;
00416 }
00417 }
00418 }
00419
00420 }
00421 }
00422 else if (type == SpamUtil::HTML ||
00423 type == SpamUtil::BLANK ||
00424 type == SpamUtil::WINDOZ) {
00425
00426 klass = MailFilter::SUSPECT;
00427 if (type == SpamUtil::HTML) {
00428 log.log(Logger::DEBUG, "processBySection", "begins with HTML section");
00429 }
00430 else if (type == SpamUtil::BLANK) {
00431 log.log(Logger::DEBUG, "processBySection", "found boundary, no Content-Type");
00432 }
00433 if (findSection(boundary, fp) == BOUNDARY) {
00434 type = classifyMailSection(fp);
00435 }
00436 }
00437
00438 if (type == SpamUtil::WINDOZ ||
00439 type == SpamUtil::IMAGE ||
00440 type == SpamUtil::AUDIO ||
00441 type == SpamUtil::MULTIPART) {
00442 char msg[128];
00443 sprintf(msg, "found section type %s", SpamUtil().typeToStr( type ) );
00444 log.log(Logger::DEBUG, "processBySection", msg );
00445 klass = MailFilter::SUSPECT;
00446 }
00447 else if (type == SpamUtil::BASE64) {
00448 log.log(Logger::DEBUG, "processBySection", "found base64 section");
00449 if (mParams.hasFlag("kill_base64")) {
00450 klass = MailFilter::GARBAGE;
00451 mHeadInfo.reason("found base64 encoding");
00452 }
00453 else {
00454 klass = MailFilter::SUSPECT;
00455 }
00456 }
00457
00458
00459 if (klass == MailFilter::UNKNOWN) {
00460 klass = MailFilter::EMAIL;
00461 }
00462
00463 mailBodyMsg( klass, foundStr, "processBySection");
00464 }
00465 else {
00466 log.log(Logger::ERROR, "processBySection", "boundary not found");
00467 }
00468
00469 log.log(Logger::DEBUG, "processBySection", "exit");
00470 return klass;
00471 }
00472
00473
00480 MailFilter::classification MailBody::processTextBody( FILE *fp )
00481 {
00482 MailFilter::classification klass = MailFilter::UNKNOWN;
00483 log.log(Logger::DEBUG, "processTextBody", "enter");
00484 char buf[256];
00485 char foundStr[128];
00486
00487 foundStr[0] = '\0';
00488 MailBody::lineType lineTy;
00489 while ((lineTy = getLine(buf, sizeof(buf), fp, 0)) == LINE) {
00490 klass = SpamUtil().checkLine(buf,
00491 mParams,
00492 foundStr,
00493 sizeof(foundStr));
00494 if (klass != MailFilter::UNKNOWN) {
00495 break;
00496 }
00497 }
00498
00499 if (klass != MailFilter::UNKNOWN) {
00500 mHeadInfo.reason( foundStr );
00501 }
00502
00503
00504
00505 if (klass == MailFilter::UNKNOWN) {
00506 klass = MailFilter::EMAIL;
00507 }
00508 mailBodyMsg( klass, foundStr, "processTextBody");
00509
00510 log.log(Logger::DEBUG, "processTextBody", "exit");
00511 return klass;
00512 }
00513
00514
00521 MailFilter::classification MailBody::checkBody(const char *boundary, FILE *fp)
00522 {
00523 MailFilter::classification klass = MailFilter::UNKNOWN;
00524 if (! feof(fp)) {
00525 if (boundary != 0 && boundary[0] != '\0') {
00526 klass = processBySection( boundary, fp );
00527 }
00528 else {
00529 klass = processTextBody( fp );
00530 }
00531 }
00532 return klass;
00533 }