file provides buffered, filtered text file input and output, and supports the manipulation of linked lists of lines of text.
Copyright © 2003, 2004 Dave Bayer. Subject to the terms and conditions of the MIT License.
BUFLEN is the maximum number of characters allowed for a single line of text, not including the terminating null.
#define BUFLEN 4096
The line struct is used for lines of text.
Return characters '\n' are explicitly stored in s, on input and optionally for output. User data should refer to s by index, not by pointer, to remain valid after copying.
typedef struct line
{
char *s;
unsigned len, max;
void *data;
struct line *next;
} line;
lineFreeFn is the type of a user function for freeing the data pointer, used by lineFree, lineListFree, lineReset, and lineDelete.
typedef void (*lineFreeFn)( void *data );
fileRead, fileWrite, and lineListFilter accept optional lists of filters, of type lineFilter. A filter routine filt transforms buf1 to buf2, creating a null terminated string in buf2->s, setting buf2->len to the number of characters in buf2->s before the terminating null, and optionally setting buf2->data to point to user data. data is passed to filt as an argument. next provides for linked lists.
typedef struct lineFilter
{
void (*filt)( line *buf1, line *buf2, void *data );
void *data;
struct lineFilter *next;
} lineFilter;
Function prototypes:
FILE *fileOpen( const char* fileName, char *mode, BOOL abort ); BOOL fileClose( FILE *fp, BOOL abort ); BOOL checkEOF( FILE *fin, BOOL abort ); line *lineAlloc( unsigned max ); void lineFree( line *l, lineFreeFn freedata ); void lineListFree( line *l, lineFreeFn freedata ); line *lineCopy( line *l, unsigned max ); void lineReset( line *l, lineFreeFn freedata ); line *lineFromStr( const char *s ); void lineAdvance( line ***ppl ); void lineInsert( line *l, line ***ppl ); void lineListInsert( line *l, line **pl, line ***ppl); void lineRemove( line **pl ); void lineListRemove( line **pl, line ***ppm ); void lineDelete( line **pl, lineFreeFn freedata ); line *lineListCopy( line *l, unsigned max ); BOOL lineRead( FILE *fin, line *l, unsigned detab ); void lineWrite( FILE *fout, line *l ); line *fileRead( FILE *fin, lineFilter *pfilter, unsigned detab ); void fileWrite( FILE *fout, line *l, lineFilter *pfilter ); line *lineListFilter( line *l, lineFilter *pfilter ); #ifdef TESTCODE void fileTest( void ); #endif
Copyright © 2003, 2004 Dave Bayer. Subject to the terms and conditions of the MIT License.
#include "root.h" #include "file.h"
fileOpen, fileClose, and checkEOF are wrappers around the corresponding library routines. They each take a boolean argument abort, which if YES terminates program execution on an error.
Otherwise, fileOpen returns a file pointer if successful, null on an error.
FILE *fileOpen( const char* fileName, char *mode, BOOL abort )
{
FILE *fp;
fp = fopen( fileName, mode );
if ( fp == 0 )
{
fprintf( stderr, "fileOpen: error opening file \"%s\" with mode \"%s\"\n", fileName, mode );
executionError( 0, abort );
}
return fp;
}
fileClose returns YES if successful, NO on on an error.
BOOL fileClose( FILE *fp, BOOL abort )
{
if ( fclose( fp ) == EOF )
{
executionError( "fileClose: error closing file", abort );
return NO;
}
return YES;
}
checkEOF returns YES if successful, NO on on an error.
BOOL checkEOF( FILE *fin, BOOL abort )
{
if ( !feof( fin ))
{
executionError( "checkEOF: error reading file", abort );
return NO;
}
return YES;
}
lineAlloc allocates a single, initialized line, capable of holding max characters before the terminating null.
line *lineAlloc( unsigned max )
{
char *s;
line *l;
s = malloc( (max + 1) * sizeof( *s ));
*s = '\0';
l = malloc( sizeof( *l ));
l->next = 0;
l->len = 0;
l->max = max;
l->s = s;
l->data = 0;
return l;
}
lineFree frees a single line l, calling freedata if not null to free the data pointer. The value of l->next is ignored.
void lineFree( line *l, lineFreeFn freedata )
{
free( l->s );
if ( freedata ) freedata( l->data );
free( l );
}
lineListFree frees a list of lines l, calling freedata if not null to free the data pointers.
void lineListFree( line *l, lineFreeFn freedata )
{
line *p;
while ( l != 0 )
{
p = l->next;
lineFree( l, freedata );
l = p;
}
}
lineCopy copies a single line l. The contents of s are copied to new memory of capacity at least max, and the pointer data is reused.
Take care not to free data twice. Note that strncpy copies exactly len+1 characters, including the terminating null.
line *lineCopy( line *l, unsigned max )
{
line *p;
unsigned len;
assert( l->len == strlen( l->s ));
len = l->len;
p = lineAlloc( MAX( max, len ));
p->len = len;
strncpy( p->s, l->s, len+1 );
p->data = l->data;
return p;
}
lineReset resets l for reuse, calling freedata if not null to free the data pointer.
void lineReset( line *l, lineFreeFn freedata )
{
l->next = 0;
l->len = 0;
*l->s = '\0';
if ( freedata ) freedata( l->data );
l->data = 0;
}
lineFromStr creates a line from the string s, allocating new memory of capacity to hold s.
line *lineFromStr( const char *s )
{
line *l;
unsigned len;
len = strlen( s );
l = lineAlloc( len );
l->len = len;
strncpy( l->s, s, len+1 );
return l;
}
The following routines manipulate handles into lists of lines. lineAdvance, lineInsert, lineListInsert, and lineListRemove take as an argument a pointer to a handle, in order to be able to update the handle in the calling procedure.
lineAdvance advances *ppl by one line if possible.
void lineAdvance( line ***ppl )
{
line *l;
l = **ppl;
if ( l != 0 )
*ppl = &l->next;
}
lineInsert inserts the single line l into the location ppl, and advances ppl past the inserted line l. The old value of l->next is ignored.
void lineInsert( line *l, line ***ppl )
{
l->next = **ppl;
**ppl = l;
*ppl = &l->next;
}
lineListInsert inserts the list of lines l terminated at pl into the location ppl, and advances ppl past the inserted lines l. The old value of *pl is ignored.
void lineListInsert( line *l, line **pl, line ***ppl)
{
assert( l != 0 );
*pl = **ppl;
**ppl = l;
*ppl = pl;
}
lineRemove removes a single line from the location pl in a list of lines. The removed line is terminated, and is not freed.
void lineRemove( line **pl )
{
line *l;
l = *pl;
*pl = l->next;
l->next = 0;
}
lineListRemove removes the nonempty list of lines from the location pl up to the location ppm in a list of lines. The removed lines are terminated, and are not freed. ppm is reset to continue to point to the remainder of the list.
void lineListRemove( line **pl, line ***ppm )
{
assert( *pl != **ppm );
*pl = **ppm;
**ppm = 0;
*ppm = pl;
}
lineDelete deletes a single line from the location pl in a list of lines. The deleted line is freed, calling freedata if not null to free the data pointer.
void lineDelete( line **pl, lineFreeFn freedata )
{
line *l;
l = *pl;
*pl = l->next;
lineFree( l, freedata );
}
lineListCopy copies a list of lines l, calling lineCopy for each line.
line *lineListCopy( line *l, unsigned max )
{
line *p, *m, **pm;
m = 0;
pm = &m;
for ( p=l; p!=0; p=p->next )
{
*pm = lineCopy( p, max );
lineAdvance( &pm );
}
return m;
}
lineRead reads a single line of text from fin into the line l, accepting any or missing return characters and replacing with '\n'. lineRead returns YES if any characters are read, NO on end of file. If detab is nonzero, detab using a tab stop every detab spaces.
BOOL lineRead( FILE *fin, line *l, unsigned detab )
{
int c, n;
char *s;
for ( n=0, s=l->s; n<l->max; ++n )
{
c = getc( fin );
switch( c )
{
case EOF:
checkEOF( fin, YES );
if ( n == 0 ) return NO;
goto end;
Accept Unix \n, Mac \r, or DOS \r\n line breaks. The \r case falls through to the \n case.
case '\r':
c = getc( fin );
if ( c != '\n' ) ungetc( c, fin );
case '\n':
goto end;
Tabs fall through to the default case unless detab is nonzero.
case '\t':
if ( detab )
{
*s++ = ' ';
while (( n + 1 ) % detab != 0 ) ++n, *s++ = ' ';
break;
}
default:
*s++ = c;
break;
}
}
executionError( "lineRead: line length exceeded", YES );
end:
n now counts the number of characters read, not including return characters. len counts all characters before the terminating null, hence the discrepancy.
Remove trailing white space, close the line with '\n', and terminate the string.
for ( --s; n > 0 && isspace( *s ); --n, --s ) {}
*++s = '\n';
*++s = '\0';
l->len = n+1;
return YES;
}
lineWrite writes a single line of text l to the file fout.
void lineWrite( FILE *fout, line *l )
{
assert( l->len == strlen( l->s ));
if ( fputs( l->s, fout ) == EOF )
executionError( "lineWrite: error writing file", YES );
}
fileRead reads the input text file fin, returning a list of lines. The list of filters filter is applied to each line in turn, if not null. If detab is nonzero, detab using a tab stop every detab spaces.
line *fileRead( FILE *fin, lineFilter *pfilter, unsigned detab )
{
line *buf1, *buf2, *swap, *m, **pm;
lineFilter *pf;
buf1 = lineAlloc( BUFLEN );
buf2 = pfilter != 0 ? lineAlloc( BUFLEN ) : 0;
m = 0;
pm = &m;
while ( lineRead( fin, buf1, detab ))
{
for ( pf = pfilter; pf != 0; pf=pf->next )
{
lineReset( buf2, 0 );
pf->filt( buf1, buf2, pf->data );
assert( buf2->len <= buf2->max && buf2->len == strlen( buf2->s ));
swap = buf2;
buf2 = buf1;
buf1 = swap;
}
*pm = lineCopy( buf1, 0 );
lineAdvance( &pm );
}
lineFree( buf1, 0 );
if ( pfilter != 0 ) lineFree( buf2, 0 );
return m;
}
fileWrite writes the list of lines l to the output text file fout. The list of filters filter is applied to each line in turn, if not null.
void fileWrite( FILE *fout, line *l, lineFilter *pfilter )
{
line *p, *p1, *p2, *swap, *buf1, *buf2;
lineFilter *pf;
if ( pfilter != 0 )
{
buf1 = lineAlloc( BUFLEN );
buf2 = lineAlloc( BUFLEN );
}
else buf1 = buf2 = 0;
for ( p=l; p!=0; p=p->next )
{
To avoid unnecessary copying, juggle the buffers into use as needed. The pair of pointers p1, p2 advances as a sliding window through the sequence of memory locations p, buf1, buf2, buf1, buf2, and so forth. swap holds the next value for p2.
p1 = p;
p2 = buf1;
swap = buf2;
for ( pf = pfilter; pf != 0; pf=pf->next )
{
lineReset( p2, 0 );
pf->filt( p1, p2, pf->data );
assert( p2->len <= p2->max && p2->len == strlen( p2->s ));
p1 = p2;
p2 = swap;
swap = p1;
}
lineWrite( fout, p1 );
}
if ( pfilter != 0 )
{
lineFree( buf1, 0 );
lineFree( buf2, 0 );
}
}
lineListFilter applies the list of filters filter to the list of lines l, returning a list of lines. This is less efficient than filtering during input or output, because of the extra copying that is required.
line *lineListFilter( line *l, lineFilter *pfilter )
{
line *p, *p1, *p2, *swap, *buf1, *buf2, *m, **pm;
lineFilter *pf;
buf1 = lineAlloc( BUFLEN );
buf2 = lineAlloc( BUFLEN );
m = 0;
pm = &m;
for ( p=l; p!=0; p=p->next )
{
p1 = p;
p2 = buf1;
swap = buf2;
for ( pf = pfilter; pf != 0; pf=pf->next )
{
lineReset( p2, 0 );
pf->filt( p1, p2, pf->data );
assert( p2->len <= p2->max && p2->len == strlen( p2->s ));
p1 = p2;
p2 = swap;
swap = p1;
}
*pm = lineCopy( p1, 0 );
lineAdvance( &pm );
}
lineFree( buf1, 0 );
lineFree( buf2, 0 );
return m;
}
Test code:
#ifdef TESTCODE
identityLineFilter is an example filter, which copies the string s from buf1 to buf2, and sets len. Note that strncpy copies exactly len+1 characters, including the terminating null.
static void identityLineFilter( line *buf1, line *buf2, void *data )
{
strncpy( buf2->s, buf1->s, buf1->len+1 );
buf2->len = buf1->len;
}
fileTest reads the input text file filename, and writes it to fout.
void fileTest( void )
{
line *l, *m;
FILE *fin, *fout;
lineFilter filter, filter2, filter3;
const char name[] = "TempFileTest";
const char text[] =
"Sample line\n"
"\n"
"Another sample line\n"
;
fout = fileOpen( name, "w", YES );
fputs( text, fout );
fileClose( fout, YES );
filter.filt = identityLineFilter;
filter.next = &filter2;
filter.data = 0;
filter2.filt = identityLineFilter;
filter2.next = &filter3;
filter2.data = 0;
filter3.filt = identityLineFilter;
filter3.next = 0;
filter3.data = 0;
fin = fileOpen( name, "r", YES );
l = fileRead( fin, &filter, 0 );
m = lineListFilter( l, &filter );
lineListFree( l, 0 );
fileWrite( stdout, m, &filter );
lineListFree( m, 0 );
fileClose( fin, YES );
}
#endif