00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include <assert.h>
00027 #include <stdio.h>
00028
00029 #include <string.h>
00030 #include <ctype.h>
00031
00032 #include <sys/types.h>
00033 #include <sys/stat.h>
00034 #ifndef _WIN32
00035 #include <unistd.h>
00036 #include <errno.h>
00037 #endif
00038
00039 #include "SpamUtil.h"
00040
00041
00045 const int SpamUtil::getFileSize( FILE *fp, const char *fileName, char *msgbuf )
00046 {
00047 int fileSize = -1;
00048 if (fp != 0) {
00049 struct stat s;
00050 int fd = fileno( fp );
00051 if (fstat( fd, &s) == 0) {
00052 fileSize = s.st_size;
00053 }
00054 else {
00055 char *err_reason = strerror( errno);
00056 sprintf( msgbuf, "SpamUtil::getFileSize: could not open %s. Reason: %s",
00057 fileName, err_reason );
00058 }
00059 }
00060 return fileSize;
00061 }
00062
00063
00074 void SpamUtil::strncpy(char *pDest, const char *pSrc, size_t destSize )
00075 {
00076 const size_t maxCopy = destSize-1;
00077 size_t ix;
00078 for (ix = 0; ix < maxCopy && pSrc[ix] != '\0'; ix++) {
00079 pDest[ix] = pSrc[ix];
00080 }
00081 pDest[ix] = '\0';
00082 }
00083
00084
00089 const char *SpamUtil::skipWhiteSpace( const char *pBuf )
00090 {
00091 while (*pBuf != '\0' && isspace( *pBuf )) {
00092 pBuf++;
00093 }
00094 return pBuf;
00095 }
00096
00097
00101 const char *SpamUtil::trimEnd(char *str )
00102 {
00103 if (str != 0 && str[0] != '\0') {
00104 int len = strlen( str );
00105 int i;
00106 for (i = len-1; i >=0 && isspace(str[i]); i--)
00107 ;
00108 str[i+1] = '\0';
00109 }
00110 return str;
00111 }
00112
00113
00119 void SpamUtil::trim(const char *str )
00120 {
00121 if (str != 0) {
00122 char *start = (char *)skipWhiteSpace( str );
00123
00124 char *ptr = (char *)str;
00125 if (*start != '\0') {
00126 size_t len = strlen( str );
00127 char *end = (char *)&str[len-1];
00128
00129 while (end > start && isspace( *end )) {
00130 end--;
00131 }
00132 size_t trimLen = (end - start) + 1;
00133 for (size_t i = 0; i < trimLen; i++) {
00134 *ptr = *start;
00135 start++;
00136 ptr++;
00137 }
00138 }
00139 *ptr = '\0';
00140 }
00141 }
00142
00143
00144
00149 bool SpamUtil::isBlankLine(const char *buf)
00150 {
00151 bool blankLine = true;
00152 if (buf != 0) {
00153 for (; *buf != '\0'; buf++) {
00154 if (! isspace( *buf ) ) {
00155 blankLine = false;
00156 break;
00157 }
00158 }
00159 }
00160 return blankLine;
00161 }
00162
00163
00164
00170 bool SpamUtil::match(const char *bufStart,
00171 const size_t len,
00172 const char *word )
00173 {
00174 assert( bufStart != 0 && word != 0);
00175
00176 bool wordsMatch = false;
00177 if (len > 0) {
00178 wordsMatch = true;
00179 for (size_t i = 0; i < len; i++) {
00180 if (tolower(bufStart[i]) != word[i]) {
00181 wordsMatch = false;
00182 break;
00183 }
00184 }
00185 }
00186
00187 return wordsMatch;
00188 }
00189
00190
00196 bool SpamUtil::match(const char *bufStart,
00197 const char *bufEnd,
00198 const char *word )
00199 {
00200 assert( bufStart != 0 && word != 0);
00201
00202 bool wordsMatch = false;
00203 int regionLen = bufEnd - bufStart;
00204 int wordLen = strlen( word );
00205 if (wordLen > 0 && wordLen == regionLen) {
00206 wordsMatch = true;
00207 for (size_t i = 0; i < wordLen; i++) {
00208 if (tolower(bufStart[i]) != word[i]) {
00209 wordsMatch = false;
00210 break;
00211 }
00212 }
00213 }
00214
00215 return wordsMatch;
00216 }
00217
00218
00219
00220 const char *SpamUtil::findColon( const char *buf )
00221 {
00222 const char *pColon = 0;
00223 if (buf != 0) {
00224 for (; *buf != '\0'; buf++) {
00225 if (*buf == ':') {
00226 pColon = buf;
00227 break;
00228 }
00229 }
00230 }
00231 return pColon;
00232 }
00233
00234
00235
00242 void SpamUtil::toLower(char *dest, const char *src, size_t size)
00243 {
00244
00245 size_t j;
00246 for (j = 0; j < size-1 && src[j] != '\0'; j++) {
00247 dest[j] = tolower(src[j]);
00248 }
00249 dest[j] = '\0';
00250 }
00251
00252
00258 void SpamUtil::toLower(char *dest, const char *src)
00259 {
00260
00261 size_t j;
00262 for (j = 0; src[j] != '\0'; j++) {
00263 dest[j] = tolower(src[j]);
00264 }
00265 dest[j] = '\0';
00266 }
00267
00268
00272 SpamUtil::contentType SpamUtil::classifySection( const char *buf )
00273 {
00274 contentType type = UNKNOWN;
00275 char tempBuf[128];
00276
00277 SpamUtil().toLower(tempBuf, buf, sizeof(tempBuf));
00278
00279 const char *typeName = "unknown";
00280 if (strstr(buf, "multipart") != 0) {
00281 type = MULTIPART;
00282 }
00283 else if (strstr(buf, "html") != 0) {
00284 type = HTML;
00285 }
00286 else if (strstr(buf, "text") != 0) {
00287 type = TEXT;
00288 }
00289 else if (strstr(buf, "image") != 0) {
00290 type = IMAGE;
00291 }
00292 else if (strstr(buf, "audio") != 0) {
00293 type = AUDIO;
00294 }
00295 else if (strstr(buf, "application") != 0) {
00296 type = APPLICATION;
00297 }
00298
00299 return type;
00300 }
00301
00302
00307 const char *SpamUtil::typeToStr( contentType type )
00308 {
00309 const char *typeName;
00310
00311 switch (type) {
00312 case BAD_CONT:
00313 typeName = "bad content";
00314 break;
00315 case UNKNOWN:
00316 typeName = "unknown";
00317 break;
00318 case BLANK:
00319 typeName = "blank section";
00320 break;
00321 case HTML:
00322 typeName = "html";
00323 break;
00324 case TEXT:
00325 typeName = "text";
00326 break;
00327 case MULTIPART:
00328 typeName = "multipart";
00329 break;
00330 case IMAGE:
00331 typeName = "image";
00332 break;
00333 case APPLICATION:
00334 typeName = "application";
00335 break;
00336 case AUDIO:
00337 typeName = "audio";
00338 break;
00339 case WINDOZ:
00340 typeName = "windows chars";
00341 break;
00342 case BASE64:
00343 typeName = "base64";
00344 break;
00345 default:
00346 typeName = "bad content type";
00347 break;
00348 }
00349 return typeName;
00350 }
00351
00352
00353
00354 const char *SpamUtil::classificationToStr( MailFilter::classification klass )
00355 {
00356 const char *str;
00357
00358 switch (klass) {
00359 case MailFilter::UNKNOWN:
00360 str = "UNKNOWN";
00361 break;
00362 case MailFilter::EMAIL:
00363 str = "EMAIL";
00364 break;
00365 case MailFilter::SUSPECT:
00366 str = "SUSPECT";
00367 break;
00368 case MailFilter::GARBAGE:
00369 str = "GARBAGE";
00370 break;
00371 default:
00372 str = "bad value";
00373 break;
00374 }
00375
00376 return str;
00377 }
00378
00379
00380
00406 MailFilter::classification SpamUtil::checkLine(const char *buf,
00407 SpamParameters ¶ms,
00408 char *foundStr,
00409 const size_t foundStrSize )
00410 {
00411 const char *HTMLtag = "<html>";
00412 const char *BODYtag = "<body";
00413
00414 MailFilter::classification klass = MailFilter::UNKNOWN;
00415 std::vector<const char *> spamWords = params.getSection(SpamParameters::spam_words);
00416 std::vector<const char *> killWords = params.getSection(SpamParameters::kill_words);
00417
00418 foundStr[0] = '\0';
00419 char *pHtmlTag = 0;
00420
00421 if ((pHtmlTag = strstr(buf, HTMLtag)) == 0) {
00422 pHtmlTag = strstr(buf, BODYtag);
00423 }
00424
00425 if (pHtmlTag != 0) {
00426 strncpy(foundStr, pHtmlTag, foundStrSize);
00427 klass = MailFilter::SUSPECT;
00428 }
00429 else {
00430 const size_t len = spamWords.size();
00431 for (size_t i = 0; i < len; i++) {
00432 if (strstr(buf, spamWords[i]) != 0) {
00433 strncpy(foundStr, spamWords[i], foundStrSize);
00434 klass = MailFilter::SUSPECT;
00435 break;
00436 }
00437 }
00438
00439 if (klass == MailFilter::UNKNOWN) {
00440 const size_t len = killWords.size();
00441 for (size_t i = 0; i < len; i++) {
00442 if (strstr(buf, killWords[i]) != 0) {
00443 strncpy(foundStr, killWords[i], foundStrSize);
00444 klass = MailFilter::GARBAGE;
00445 break;
00446 }
00447 }
00448 }
00449 }
00450 return klass;
00451 }
00452