211 lines
4.8 KiB
C
211 lines
4.8 KiB
C
/* $Id$ */
|
|
/* Copyright (c) 2007 Pierre Pronchery <khorben@defora.org> */
|
|
/* This file is part of DeforaOS Unix utils */
|
|
/* utils is not free software; you can redistribute it and/or modify it under
|
|
* the terms of the Creative Commons Attribution-NonCommercial-ShareAlike 3.0
|
|
* Unported as published by the Creative Commons organization.
|
|
*
|
|
* utils is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
|
* A PARTICULAR PURPOSE. See the Creative Commons Attribution-NonCommercial-
|
|
* ShareAlike 3.0 Unported license for more details.
|
|
*
|
|
* You should have received a copy of the Creative Commons Attribution-
|
|
* NonCommercial-ShareAlike 3.0 along with utils; if not, browse to
|
|
* http://creativecommons.org/licenses/by-nc-sa/3.0/ */
|
|
|
|
|
|
|
|
#include <unistd.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
|
|
#define OPTS_c 1
|
|
#define OPTS_d 2
|
|
#define OPTS_u 4
|
|
|
|
|
|
/* uniq */
|
|
/* PRE if in == NULL then out == NULL too
|
|
* POST
|
|
* 0 success
|
|
* else error(s) occured */
|
|
static int _uniq_error(char const * message, int ret);
|
|
static int _uniq_do(int opts, char const * fields, unsigned int skip,
|
|
FILE * infp, FILE * outfp);
|
|
static int _uniq(int opts, char const * fields, unsigned int skip,
|
|
char const * in, char const * out)
|
|
{
|
|
FILE * infp = stdin;
|
|
FILE * outfp = stdout;
|
|
int ret;
|
|
|
|
if(in != NULL && (infp = fopen(in, "r")) == NULL)
|
|
return _uniq_error(in, 1);
|
|
if(out != NULL && (outfp = fopen(out, "w")) == NULL)
|
|
{
|
|
fclose(infp);
|
|
return _uniq_error(out, 1);
|
|
}
|
|
ret = _uniq_do(opts, fields, skip, infp, outfp);
|
|
if(in != NULL)
|
|
{
|
|
if(fclose(infp) != 0)
|
|
ret = _uniq_error(in, 1);
|
|
if(out != NULL && fclose(outfp) != 0)
|
|
return _uniq_error(out, 1);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int _uniq_error(char const * message, int ret)
|
|
{
|
|
fputs("uniq: ", stderr);
|
|
perror(message);
|
|
return ret;
|
|
}
|
|
|
|
static void _do_count(int opts, unsigned int skip, char * line, FILE * fp);
|
|
static int _uniq_do(int opts, char const * fields, unsigned int skip,
|
|
FILE * infp, FILE * outfp)
|
|
{
|
|
int ret = 0;
|
|
#define BUF 80
|
|
char * line = NULL;
|
|
int len = 0;
|
|
char * p;
|
|
|
|
for(;;)
|
|
{
|
|
if((p = realloc(line, len + BUF + 1)) == NULL)
|
|
{
|
|
free(line);
|
|
ret = _uniq_error("malloc", 1);
|
|
break;
|
|
}
|
|
line = p;
|
|
if(fgets(&line[len], BUF + 1, infp) == NULL)
|
|
{
|
|
if(!feof(infp))
|
|
ret = _uniq_error("fread", 1);
|
|
break;
|
|
}
|
|
for(p = &line[len]; *p != '\0' && *p != '\n'; p++);
|
|
len += BUF;
|
|
if(p == line + BUF)
|
|
continue;
|
|
if(*p == '\n')
|
|
*p = '\0';
|
|
#ifdef DEBUG
|
|
fprintf(stderr, "%s%s%s", "DEBUG: Got line \"", line, "\"\n");
|
|
#endif
|
|
_do_count(opts, skip, line, outfp);
|
|
line = NULL;
|
|
len = 0;
|
|
}
|
|
_do_count(opts, skip, NULL, outfp);
|
|
return ret;
|
|
}
|
|
|
|
static int _count_repeated(char * lastline, char * line, unsigned int skip);
|
|
static void _do_count(int opts, unsigned int skip, char * line, FILE * fp)
|
|
{
|
|
static char * lastline = NULL;
|
|
static unsigned int cnt = 1;
|
|
|
|
if(lastline == NULL)
|
|
{
|
|
lastline = line;
|
|
cnt = 1;
|
|
return;
|
|
}
|
|
if(line != NULL && _count_repeated(lastline, line, skip))
|
|
{
|
|
free(line);
|
|
cnt++;
|
|
return;
|
|
}
|
|
if(cnt == 1 && !(opts & OPTS_d)) /* line is not repeated */
|
|
fprintf(fp, "%s%s\n", opts & OPTS_c ? "1 " : "", lastline);
|
|
else if(cnt > 1 && !(opts & OPTS_u)) /* line is repeated */
|
|
{
|
|
if(opts & OPTS_c)
|
|
fprintf(fp, "%d ", cnt);
|
|
fprintf(fp, "%s\n", lastline);
|
|
}
|
|
free(lastline);
|
|
lastline = line;
|
|
cnt = 1;
|
|
}
|
|
|
|
/* PRE line and lastline are valid strings
|
|
* POST */
|
|
static int _count_repeated(char * lastline, char * line, unsigned int skip)
|
|
{
|
|
if(strlen(lastline) < skip)
|
|
return strlen(line) < skip;
|
|
if(strlen(line) < skip)
|
|
return 0;
|
|
if(strcmp(&lastline[skip], &line[skip]) == 0)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* usage */
|
|
static int _usage(void)
|
|
{
|
|
fputs("Usage: uniq [-c|-d|-u][-f fields][-s char][input_file\
|
|
[output_file]]\n\
|
|
-c precede each output line with a count of the repetitions for the line\n\
|
|
-d suppress the writing of lines that are not repeated\n\
|
|
-s ignore the first char characters when doing comparisons\n\
|
|
-u suppress the writing of lines that are repeated\n", stderr);
|
|
return 1;
|
|
}
|
|
|
|
int main(int argc, char * argv[])
|
|
{
|
|
int opts = 0;
|
|
char * fields = NULL;
|
|
int skip = 0;
|
|
char * p;
|
|
char * in = NULL;
|
|
char * out = NULL;
|
|
int o;
|
|
|
|
while((o = getopt(argc, argv, "cduf:s:")) != -1)
|
|
switch(o)
|
|
{
|
|
case 'c':
|
|
opts |= OPTS_c;
|
|
break;
|
|
case 'd':
|
|
opts |= OPTS_d;
|
|
break;
|
|
case 's':
|
|
skip = strtol(optarg, &p, 10);
|
|
if(*optarg == '\0' || *p != '\0' || skip < 0)
|
|
return _usage();
|
|
break;
|
|
case 'u':
|
|
opts |= OPTS_u;
|
|
break;
|
|
case 'f':
|
|
/* FIXME */
|
|
default:
|
|
return _usage();
|
|
}
|
|
if(argc - optind >= 1)
|
|
{
|
|
in = argv[optind];
|
|
if(argc - optind == 2)
|
|
out = argv[optind+1];
|
|
else if(argc - optind > 2)
|
|
return _usage();
|
|
}
|
|
return _uniq(opts, fields, skip, in, out) == 0 ? 0 : 2;
|
|
}
|