Index: mergelog-4.5/configure.in =================================================================== --- mergelog-4.5.orig/configure.in +++ mergelog-4.5/configure.in @@ -2,7 +2,7 @@ dnl Process this file with autoconf to p AC_INIT(src/mergelog.c) PACKAGE=mergelog -VERSION=4.5 +VERSION=4.5-split AM_INIT_AUTOMAKE($PACKAGE,$VERSION,nosubst) AC_ARG_PROGRAM Index: mergelog-4.5/man/mergelog.1 =================================================================== --- mergelog-4.5.orig/man/mergelog.1 +++ mergelog-4.5/man/mergelog.1 @@ -1,8 +1,9 @@ -.TH MERGELOG 1 "22 Jan 2001" +.TH MERGELOG 1 "20 Jun 2001" .SH NAME mergelog \- a fast tool to merge http log files by date .SH SYNOPSIS .B mergelog +.IR [-vh]\ [-o\ outfmt] .IR logfile1 .IR logfile2 \ ... .SH DESCRIPTION Index: mergelog-4.5/man/zmergelog.1 =================================================================== --- mergelog-4.5.orig/man/zmergelog.1 +++ mergelog-4.5/man/zmergelog.1 @@ -1,8 +1,9 @@ -.TH ZMERGELOG 1 "22 Jan 2001" +.TH ZMERGELOG 1 "20 Jun 2001" .SH NAME zmergelog \- a fast tool to merge gzipped http log files by date .SH SYNOPSIS .B zmergelog +.IR [-vh]\ [-o\ outfmt] .IR logfile1 .IR logfile2 \ ... .SH DESCRIPTION Index: mergelog-4.5/src/mergelog.c =================================================================== --- mergelog-4.5.orig/src/mergelog.c +++ mergelog-4.5/src/mergelog.c @@ -45,12 +45,16 @@ #define mygets(a,b,c,d) fast_gzgets(a,b,c,d) #define myrewind gzrewind #define myclose gzclose +#define mywrite(f,b,l) gzwrite((f),(b),(l)) +#define MYSUFFIX ".gz" #else #define myFH FILE #define myopen fopen #define mygets(a,b,c,d) fgets(a,b,c) #define myrewind rewind #define myclose fclose +#define mywrite(f,b,l) fwrite((b),1,(l),(f)) +#define MYSUFFIX #endif @@ -112,20 +116,141 @@ int main (int argc, char *argv[]) { char *trans_digits[60]; char *trans_year[200]; char months[24]="anebarprayunulugepctovec"; + int goc; + int goUsage = 0, goVerbose = 0; + const char* goOutput = NULL; + const char* argv0 = *argv; + struct tm gotmFlags; + const char* goOutPtr; + myFH *outFile = NULL; + char outFileName[512]; + struct tm outFileTM; + char outFileNewName[512]; + size_t outBytes; + + while((goc=getopt(argc,argv,"vho:"))!=-1) { + switch(goc) { + case 'h': /* -h print usage */ + goUsage++; + break; + case 'v': /* -v verbose */ + goVerbose++; + break; + case 'o': /* -o fmt output specification */ + goOutput = optarg; + break; + default: /* whatever.. but we better print out usage */ + goUsage++; + break; + } + } + argv = &argv[optind]; + argc -= optind; - /* - print usage if necessary - */ - if (argc == 1) { - fprintf(stderr,"usage: %s logfile1 logfile2 ...\nmergelog %s Copyright (C) 2000-2001 Bertrand Demiddelaer\n",argv[0],VERSION); + if(argc<1) + goUsage++; + if(goUsage) { + printf( + "mergelog " VERSION " Copyright (c) 2000-2001 Bertrand Demiddelaer\n\n" + "I have at least %d reason(s) for lending you a helping hand on using the program.\n\n" + "Usage: %s [-vh] [-o fmt] logfile1 logfile2 ..\n\n" + " -h Print this message\n" + " -v Increase verbosity\n" + " -o Give template for output file name to write log to\n" + " instead of standard output in strftime(3) form.\n" + " ( eg. -o %%Y-%%m-%%d-access.log" MYSUFFIX ")\n", + goUsage,argv0); exit(1); } + /* Analyze output format if given. It's better than trying to + * regenerate file name and compare it to the filename currently being + * written each time we want to dump one line. */ + if(goOutput) { + memset(&gotmFlags,0,sizeof(gotmFlags)); + for(goOutPtr=goOutput;*goOutPtr;goOutPtr++) { + if( (*goOutPtr=='%') && goOutPtr[1]) { + switch(*(++goOutPtr)) { + case 'a': /* abbreviated weekday */ + case 'A': /* full weekday */ + case 'u': /* day of week number */ + case 'w': /* zerobased weekday number */ + gotmFlags.tm_wday = 1; + break; + case 'b': /* abbreviated month */ + case 'h': /* %b */ + case 'B': /* full month */ + case 'm': /* month number */ + gotmFlags.tm_mon = 1; + break; + case 'C': /* century */ + case 'y': /* centuryless year */ + case 'Y': /* year with a century */ + gotmFlags.tm_year = 1; + break; + case 'd': /* day of month */ + case 'e': /* day of month space padded */ + gotmFlags.tm_mday = 1; + break; + case 'D': /* %m/%d/%y americanism */ + gotmFlags.tm_mon = gotmFlags.tm_mday = gotmFlags.tm_year = 1; + break; + case 'G': /* ISO 8601 year repsesentation. depends on week */ + case 'g': /* ISO 8601 year repsesentation without century. depends on week */ + case 'U': /* sunday-based week number */ + case 'V': /* ISO 8601 week number */ + case 'W': /* monday-based week number */ + case 'x': /* preferred date representation for locale without the time */ + gotmFlags.tm_yday = gotmFlags.tm_year = 1; + break; + case 'H': /* 24h format hour */ + case 'I': /* 12h format hour */ + case 'k': /* 24h format hour space padded */ + case 'l': /* 12h format hour space padded */ + case 'p': /* AM/PM indicator */ + case 'P': /* am/pm indicator */ + gotmFlags.tm_hour = 1; + break; + case 'j': /* day of year */ + gotmFlags.tm_yday = 1; + break; + case 'M': /* minute */ + gotmFlags.tm_min = 1; + break; + case 'n': /* newline character */ + case 't': /* tab character */ + case 'z': /* time zone as an offset from GMT */ + case 'Z': /* timezone name or abbreviation */ + case '%': /* % character */ + break; + case 'r': /* %I:%M:%S %p time in am/pm notation*/ + case 'T': /* %H:%M:%S time in 24h notation */ + case 'X': /* preferred time representation for locale without the date */ + gotmFlags.tm_hour = gotmFlags.tm_min = gotmFlags.tm_sec = 1; + break; + case 'R': /* %H:%M time in 24h notation without seconds */ + gotmFlags.tm_hour = gotmFlags.tm_min = 1; + break; + case 'S': /* second */ + gotmFlags.tm_sec = 1; + break; + case 'c': /* preferred date and time representation for locale */ + case 'E': /* weird modifier */ + case 'O': /* weird modifier */ + case 's': /* number of seconds since epoch */ + case '+': /* date and time in date(1) format */ + memset(&gotmFlags,0xFF,sizeof(gotmFlags)); + break; + } + } + } + } + #ifdef USE_ZLIB /* check if there are enough gunzip buffers */ - if(argc>MAX_FILES) { + if(argc>=MAX_FILES) { fputs("too many gzipped log files, aborting\n",stderr); exit(1); } @@ -134,9 +259,9 @@ int main (int argc, char *argv[]) { /* open log files */ - for (i=1;i<argc;i++) { - log_file[i-1]=myopen(argv[i],"r"); - if (log_file[i-1] == NULL) { + for (i=0;i<argc;i++) { + log_file[i]=myopen(argv[i],"r"); + if (log_file[i] == NULL) { fprintf(stderr,"can't open %s, aborting\n",argv[i]); exit(1); } @@ -174,8 +299,8 @@ int main (int argc, char *argv[]) { /* init things for each log file and get the older date to start with */ - nb_files=argc-1; - for (i=0;i<argc-1;i++) { + nb_files=argc; + for (i=0;i<argc;i++) { #ifdef USE_ZLIB /* @@ -230,7 +355,7 @@ int main (int argc, char *argv[]) { */ log_date=memchr(log_scan[i],'[',SCAN_SIZE); if (log_date == NULL) { - fprintf(stderr,"abort due to a problem with %s:\n%s\n",argv[i+1],log_buffer[i]); + fprintf(stderr,"abort due to a problem with %s:\n%s\n",argv[i],log_buffer[i]); exit(1); } @@ -239,7 +364,7 @@ int main (int argc, char *argv[]) { */ for (j=0;((j == 12)&&(memcmp(months+2*j,log_date+5,2) != 0));j++); if (j == 12) { - fprintf(stderr,"abort due to a problem with %s:\n%s\n",argv[i+1],log_buffer[i]); + fprintf(stderr,"abort due to a problem with %s:\n%s\n",argv[i],log_buffer[i]); exit(1); } memcpy(log_month[i],trans_digits[j],2); @@ -255,7 +380,7 @@ int main (int argc, char *argv[]) { extract the date of this first line */ if (sscanf(log_date+1,"%d/%3c/%d:%d:%d:%d",&day,month,&year,&hour,&minut,&second) < 6) { - fprintf(stderr,"abort due to a problem with %s:\n%s\n",argv[i+1],log_buffer[i]); + fprintf(stderr,"abort due to a problem with %s:\n%s\n",argv[i],log_buffer[i]); exit(1); } @@ -270,7 +395,7 @@ int main (int argc, char *argv[]) { date->tm_isdst=-1; for (j=0;((j<12)&&(memcmp(months+2*j,month+1,2) != 0));j++); if (j == 12) { - fprintf(stderr,"abort due to a problem with %s:\n%s\n",argv[i+1],log_buffer[i]); + fprintf(stderr,"abort due to a problem with %s:\n%s\n",argv[i],log_buffer[i]); exit(1); } date->tm_mon=j; @@ -303,6 +428,8 @@ int main (int argc, char *argv[]) { exit if we have only empty files */ if (nb_files == 0) { + if(outFile) + myclose(outFile); exit(0); } @@ -323,7 +450,7 @@ int main (int argc, char *argv[]) { /* start to compute since this date */ - nb_files_orig=argc-1; + nb_files_orig=argc; for(;;) { /* @@ -377,7 +504,55 @@ int main (int argc, char *argv[]) { write the log line faster than a puts and we are sure to find a '\0' in log_buffer[i] */ - write(1,log_buffer[i],(size_t)((char *)memchr(log_buffer[i],0,BUFFER_SIZE)-log_buffer[i])); + if(goOutput) { + /* Check whether any of the date/time components concerned + * have changes or if we have no output file opened. It + * doesn't make much sense to do all the comparinsons in case + * we have no output file opened, but it makes even less sense + * to check for file handle each time we evaluate date. */ + if( +# define TMCOMPARE(tmpart) ( gotmFlags.tm_##tmpart && (outFileTM.tm_##tmpart!=date->tm_##tmpart) ) + TMCOMPARE(sec) || TMCOMPARE(min) || TMCOMPARE(hour) + || TMCOMPARE(mday) || TMCOMPARE(mon) || TMCOMPARE(year) + || TMCOMPARE(wday) || TMCOMPARE(yday) + || !outFile +# undef TMCOMPARE + ) { + if(!strftime(outFileNewName,sizeof(outFileNewName)-1,goOutput,date)) { + fprintf(stderr,"abort due to a failed attempt to generate output file name\n"); + if(outFile) + myclose(outFile); + exit(1); + } + if( (!outFile) || strcmp(outFileName,outFileNewName) ) { + /* okay, we're about to change output file or start + * writing first file */ + if(outFile) { + myclose(outFile); + } + if(goVerbose) + fprintf(stderr, "Writing to \"%s\"..\n",outFileNewName); + outFile = myopen(outFileNewName,"a"); + if(!outFile) { + fprintf(stderr, "abort due to a failed attempt to open/create output file %s: %m\n",outFileNewName); + exit(1); + } + memmove(outFileName,outFileNewName,sizeof(outFileName)); + memmove(&outFileTM,date,sizeof(outFileTM)); + } + } + /* %Y-%m-%d_%H-%M-%S-access.log */ + outBytes = (size_t)((char *)memchr(log_buffer[i],0,BUFFER_SIZE)-log_buffer[i]); + if( mywrite(outFile,log_buffer[i],outBytes) != outBytes) { + fprintf(stderr,"abort due to a failed write operation on output file %s: %m\n",outFileName); + if(outFile) + myclose(outFile); + exit(1); + } + }else{ + /* do it the way we would do without */ + write(1,log_buffer[i],(size_t)((char *)memchr(log_buffer[i],0,BUFFER_SIZE)-log_buffer[i])); + } /* is it an end of file ? @@ -388,7 +563,9 @@ int main (int argc, char *argv[]) { close all log files and exit if all end of files are reached */ if (--nb_files == 0) { - for (j=0;j<argc-1;j++) { + if(outFile) + myclose(outFile); + for (j=0;j<argc;j++) { myclose(log_file[j]); } exit(0); @@ -425,7 +602,7 @@ int main (int argc, char *argv[]) { } else { for (j=0;((j<12)&&(memcmp(months+2*j,log_date+5,2) != 0));j++); if (j == 12) { - fprintf(stderr,"problem with %s:\n%s\ncontinuing...\n",argv[i+1],log_buffer[i]); + fprintf(stderr,"problem with %s:\n%s\ncontinuing...\n",argv[i],log_buffer[i]); } else { memcpy(log_month[i],trans_digits[j],2); memcpy(log_month[i]+2,months+2*j,2); @@ -439,7 +616,7 @@ int main (int argc, char *argv[]) { } } } else { - fprintf(stderr,"problem with %s:\n%s\ncontinuing...\n",argv[i+1],log_buffer[i]); + fprintf(stderr,"problem with %s:\n%s\ncontinuing...\n",argv[i],log_buffer[i]); } } } @@ -451,3 +628,7 @@ int main (int argc, char *argv[]) { */ exit(1); } +/* vim:set textwidth=72: */ +/* vim:set cindent smartindent: */ +/* vim:set formatoptions-=t formatoptions+=croql: */ +/* vim:set expandtab shiftwidth=2: */