/*

   dups.c

   Find duplicate files.

   Date:	1998-02-22 13:46:36
   Last Change:	1999-02-07 02:10:52

   Copyright (C) 1998 Sander van Malssen <svm@kozmix.ow.nl>

   This software is released under the GNU Public Licence. See the
   file `COPYING' for details.

*/

#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>


#undef	BUFSIZ
#define	BUFSIZ 512

#define FCMP_SAME	 2
#define FCMP_DUPL	 1
#define FCMP_DIFF	 0
#define FCMP_ERR	-1


const char *moi;

struct entry {
    struct entry *prev;
    struct entry *next;
    char *path;
    dev_t dev;
    ino_t ino;
    size_t size;
};

struct entry *list = NULL;


char *
prettify_path (const char *path)
{
    static char ret[NAME_MAX + 1], *q;
    int i;

    q = ret;
    for (i = 0; path[i]; i++) {
	if (i && path[i] == '/' && path[i-1] == '/')
	    continue;
	*q++ = path[i];
    }
    *q = '\0';
    return ret;
}


int
fcmp (const struct entry *e1, const struct entry *e2)
{
    int fd1, fd2, ret;
    char buf1[BUFSIZ], buf2[BUFSIZ];

    if (e1->dev == e2->dev && e1->ino == e2->ino)
	return FCMP_SAME;

    if (e1->size != e2->size)
	return FCMP_DIFF;

    /* OK, if we've come to this point we're going to have to find out
       the hard way. */

    if ((fd1 = open (e1->path, O_RDONLY)) < 0) {
	fprintf (stderr, "%s: fcmp: fopen %s: %s\n", moi, e1->path, strerror (errno));
	return FCMP_ERR;
    }
    if ((fd2 = open (e2->path, O_RDONLY)) < 0) {
	fprintf (stderr, "%s: fcmp: fopen %s: %s\n", moi, e2->path, strerror (errno));
	return FCMP_ERR;
    }

    while (1) {
	ssize_t r1, r2;

	r1 = read (fd1, buf1, BUFSIZ);
	if (r1 == -1) {
	    fprintf (stderr, "%s: fcmp: read %s: %s\n", moi, e1->path, strerror (errno));
	    ret = FCMP_ERR;
	    break;
	}
	r2 = read (fd2, buf2, BUFSIZ);
	if (r2 == -1) {
	    fprintf (stderr, "%s: fcmp: read %s: %s\n", moi, e2->path, strerror (errno));
	    ret = FCMP_ERR;
	    break;
	}

	if (r1 != r2) {
	    /* Can't normally happen since we'd have bailed out
               earlier if the files had different lengths, but perhaps
               some other process is busy writing to one of our files.
               Anyway, they're not the same. */
	    ret = FCMP_DIFF;
	    break;
	}

	if (r1 == 0) {
	    /* We've gotten to EOF without encountering differences.
               We have a duplicate. */
	    ret = FCMP_DUPL;
	    break;
	}

	if (memcmp (buf1, buf2, r1)) {
	    ret = FCMP_DIFF;
	    break;
	}
    }

    close (fd2);
    close (fd1);
    return ret;
}


void
find_same (void)
{
    struct entry *this;

    for (this = list; this; this = this->next) {
	struct entry *that;
	int count = 0;

	that = this->next;
	if (!that)
	    return;

	while (that) {
	    if (fcmp (this, that) == FCMP_DUPL) {
		struct entry *tmp;

		if (count == 0)
		    printf ("%s", prettify_path(this->path));
		printf (" %s", prettify_path(that->path));

		/* Save ourselves both some time and duplicate entries
                   further down the road by pulling this entry from
                   the list. */

		that->prev->next = that->next;
		if (that->next)
		    that->next->prev = that->prev;

		tmp = that;
		that = that->next;
		free (tmp->path);
		free (tmp);

		count++;
	    } else
		that = that->next;
	}
	if (count)
	    putchar ('\n');
    }
}


void
add_entry (char *path, struct stat st)
{
    struct entry *entry;

    if (!(entry = (struct entry *) malloc (sizeof(struct entry)))) {
	fprintf (stderr, "%s: add_entry: malloc: out of memory!\n", moi);
	exit (1);
    }

    entry->path = strdup (path);
    entry->dev = st.st_dev;
    entry->ino = st.st_ino;
    entry->size = st.st_size;

    entry->next = list;
    entry->prev = NULL;
    if (list)
	list->prev = entry;
    list = entry;
}


void
do_dir (const char *path)
{
    DIR *dir;
    struct dirent *dentry;

    if ((dir = opendir (path)) == NULL) {
	fprintf (stderr, "%s: do_dir: opendir %s: %s\n", moi, path, strerror (errno));
	return;
    }

    while (1) {
	struct stat st;
	char fullpath[NAME_MAX + 1];

	errno = 0;
	dentry = readdir (dir);
	if (!dentry) {
	    if (errno) {
		fprintf (stderr, "%s: do_dir: readdir %s: %s\n",
			 moi, dentry->d_name, strerror (errno));
		continue;
	    }
	    else /* Simply EOF on this directory. */
		break;
	}

	if (!strcmp (dentry->d_name, ".") || !strcmp (dentry->d_name, ".."))
	    continue;

	strncpy (fullpath, path, NAME_MAX);
	strncat (fullpath, "/", NAME_MAX);
	strncat (fullpath, dentry->d_name, NAME_MAX);

	if (lstat (fullpath, &st) == -1) {
	    fprintf (stderr, "%s: do_dir: lstat %s: %s\n", moi, fullpath, strerror (errno));
	    continue;
	}
	if (S_ISDIR (st.st_mode))
	    do_dir (fullpath);
	else if (S_ISREG (st.st_mode))
	    add_entry(fullpath, st);
    }
    closedir (dir);
}


int
main (int ac, char **av)
{
    if ((moi = strrchr (*av, '/')))
	moi++;
    else
	moi = *av;

    if (ac == 1) {
	do_dir (".");
    } else {
	for (av++; *av; av++) {
	    struct stat st;

	    if (stat (*av, &st) == -1) {
		fprintf (stderr, "%s: main: stat %s: %s\n", moi, *av, strerror (errno));
		continue;
	    }
	    if (!S_ISDIR (st.st_mode)) {
		fprintf (stderr, "%s: main: %s not a directory\n", moi, *av);
		continue;
	    }

	    do_dir (*av);
	}
    }

    find_same();

    exit (0);
}


/*
   Local variables:
   rm-trailing-spaces: t
   End:
*/
