From 8efee09aa7e1bab3657eb6741a02096cc36d2147 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Wed, 15 Jun 2011 22:23:25 +0700 Subject: [PATCH] version 1.2.6 --- ChangeLog | 482 ++++++++++++++++++ Makefile | 249 ++++++++++ calc_sums.c | 852 ++++++++++++++++++++++++++++++++ calc_sums.h | 80 +++ common_func.c | 284 +++++++++++ common_func.h | 93 ++++ crc_print.c | 553 +++++++++++++++++++++ crc_print.h | 40 ++ crc_update.c | 381 +++++++++++++++ crc_update.h | 15 + dist/MD5.bat | 2 + dist/magnet.bat | 2 + dist/rhashrc.sample | 25 + file_mask.c | 84 ++++ file_mask.h | 25 + file_set.c | 638 ++++++++++++++++++++++++ file_set.h | 41 ++ find_file.c | 263 ++++++++++ find_file.h | 35 ++ librhash/Makefile | 30 +- librhash/crc32.c | 6 +- librhash/md4.c | 10 +- librhash/timing.c | 12 +- librhash/timing.h | 6 +- librhash/util.h | 12 +- output.c | 451 +++++++++++++++++ output.h | 49 ++ parse_cmdline.c | 947 ++++++++++++++++++++++++++++++++++++ parse_cmdline.h | 101 ++++ rhash.1 | 277 +++++++++++ rhash.1.win.sed | 11 + rhash.spec.in | 374 ++++++++++++++ rhash_main.c | 306 ++++++++++++ rhash_main.h | 41 ++ tests/test_rhash.sh | 175 +++++++ version.h | 1 + win32/dirent.h | 5 + win32/platform-dependent.h | 44 ++ win32/stdint.h | 13 + win32/unistd.h | 7 + win32/vc-2010/rhash.vcxproj | 306 ++++++++++++ win_utils.c | 503 +++++++++++++++++++ win_utils.h | 66 +++ 43 files changed, 7862 insertions(+), 35 deletions(-) create mode 100644 ChangeLog create mode 100644 Makefile create mode 100644 calc_sums.c create mode 100644 calc_sums.h create mode 100644 common_func.c create mode 100644 common_func.h create mode 100644 crc_print.c create mode 100644 crc_print.h create mode 100644 crc_update.c create mode 100644 crc_update.h create mode 100644 dist/MD5.bat create mode 100644 dist/magnet.bat create mode 100644 dist/rhashrc.sample create mode 100644 file_mask.c create mode 100644 file_mask.h create mode 100644 file_set.c create mode 100644 file_set.h create mode 100644 find_file.c create mode 100644 find_file.h create mode 100644 output.c create mode 100644 output.h create mode 100644 parse_cmdline.c create mode 100644 parse_cmdline.h create mode 100644 rhash.1 create mode 100644 rhash.1.win.sed create mode 100644 rhash.spec.in create mode 100644 rhash_main.c create mode 100644 rhash_main.h create mode 100644 tests/test_rhash.sh create mode 100644 version.h create mode 100644 win32/dirent.h create mode 100644 win32/platform-dependent.h create mode 100644 win32/stdint.h create mode 100644 win32/unistd.h create mode 100644 win32/vc-2010/rhash.vcxproj create mode 100644 win_utils.c create mode 100644 win_utils.h diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 00000000..16ffefb7 --- /dev/null +++ b/ChangeLog @@ -0,0 +1,482 @@ +Tue Jun 14 2011 Aleksey + * === Version 1.2.6 === + +Sat Jun 11 2011 Aleksey + * allowed options to be intermixed with file names in arbitrary order + * switched option -G and the '%G' printf pattern to print GOST hash + * Bugfix: --output failed for cyrillic file name + +Wed Jun 8 2011 Aleksey + * librhash: better shared library compilation/testing support + +Mon Jun 6 2011 Aleksey + * librhash: exported benchmarking functions in the shared library + * librhash: added prefix to all functions to avoid poluting linker namespace + * librhash: fixed rare alignment bugs in rhash_print and EDON-R 512 + +Sat May 28 2011 Aleksey + * librhash: loading openssl at runtime if it is present + * Bugfix: LLVM GOST amd64 asm compilation error + +Wed May 18 2011 Aleksey + * === Version 1.2.5 === + * option --openssl allows to replace some algorithms by the OpenSSL ones + * Bugfix: incorrect recursive traversing of very long UTF-8 filepaths + +Wed Apr 27 2011 Aleksey + * Bugfix: corrected calculation of BTIH hash and torrent files + +Fri Apr 15 2011 Aleksey + * === Version 1.2.4 === + * option --benchmark-raw for machine-readable benchmark output format + * on Intel/AMD CPUs benchmark now prints the clocks-per-byte value + +Tue Apr 5 2011 Aleksey + * changed config file locations + +Fri Apr 1 2011 Aleksey + * Bugfix: repaired --path-separator on linux/unix + +Sun Mar 27 2011 Aleksey + * === Version 1.2.3 === + +Fri Mar 25 2011 Aleksey + * one-line percent for linux/unix + +Mon Mar 14 2011 Aleksey + * added printf modificator %{mtime} to print the last modified date of a file + +Thu Feb 17 2011 Aleksey + * Bugfix: verification of base2-like formated md5 hash sums + +Fri Jan 14 2011 Aleksey + * === Version 1.2.2 === + * one-line percents (windows only) + +Tue Jan 11 2011 Aleksey + * supported EDON-R 256/512 hash sums + +Sun Dec 19 2010 Aleksey + * increased process priority when benchmarking on windows + +Thu Dec 16 2010 Aleksey + * Bugfix: eight hash sums were broken on PowerPC + * Bugfix: --accept/--crc-accept were not working since 1.1.9 + +Tue Dec 14 2010 Aleksey + * === Version 1.2.1 === + * Bugfix: GOST broken on OpenSolaris since 1.2.0 + * option --list-hashes: list names of all supported hashes, one per line + +Mon Nov 29 2010 Aleksey + * SHA 224/256/384/512 hash functions supported + * Bugfix: broken asm compilation on openbsd and freebsd + +Wed Nov 24 2010 Aleksey + * option --path-separator= for directories scanning + +Sun Nov 14 2010 Aleksey + * === Version 1.2.0 === + * --gost-cryptopro option: calculate GOST with CryptoPro parameters + * --gost-reverse option: reverse bytes in GOST hash sum + * Bugfix: btih/gost/ripemd/has160/snefru were not verified correctly in bsd and magnet formats + +Fri Oct 29 2010 Aleksey + * Bugfix: rhash compiled by MS VC skipped files of size >4Gb + +Fri Oct 15 2010 Aleksey + * === Version 1.1.9 === + * new interface for internal library librhash + +Mon Jul 5 2010 Ruslan Nikolaev + * GOST algorithm x86-64 assembler optimization + +Sun Apr 25 2010 Aleksey + * new options --uppercase and --lowercase + * Bugfix: GOST worked incorrectly when compiled by GCC with `-O0' + +Wed Apr 21 2010 Aleksey + * windows distribution updated + +Fri Apr 16 2010 Aleksey + * BugFix: options with string values were incorrectly loaded from config + +Wed Apr 14 2010 Aleksey + * === Version 1.1.8 === + * option --template= to read printf-like template from + +Mon Apr 12 2010 Xiangli Huang + * BugFix: `--recursive *' traversed parent directory .. under windows + * BugFix: `--check ' reported strange warning for dirs + +Mon Apr 12 2010 Aleksey + * printf-directives starting with capital letter print upper-cased hashes, e.g. %{Gost} + * %u directive switched to print url-encoded filename (alias for %{urlname}) + * ed2k links verification supported + +Fri Apr 9 2010 Aleksey + * BugFix: linking problem on OpenSolaris + * filenames in urls are now always utf8-encoded (Windows only fix) + +Wed Apr 7 2010 Aleksey + * '%B','%@' modifiers to print base64/raw representation of any hash (e.g. '%BM') + +Wed Mar 31 2010 Aleksey + * === Version 1.1.7 === + * option --btih to print BitTorrent infohash + * option --torrent to create torrent file + * option --bt-private for private torrents + * option --bt-piece-length to change torrent piece length + * option --bt-announce to set torrent announcement url + +Tue Mar 30 2010 Aleksey + * the -m option made to be an alias for --magnet + +Mon Mar 29 2010 Xiangli Huang + * print program version, when benchmarking + +Fri Mar 26 2010 Aleksey + * Bugfix: infite loop while recursively updating hash files under Windows + +Thu Mar 4 2010 Aleksey + * maxdepth parameter now is checked to be a number + +Thu Feb 25 2010 Aleksey + * output tiger hash in the big-endian order + +Wed Feb 24 2010 Aleksey + * === Version 1.1.6 === + * man page updated + * now all supported hashes are verified when cheking magnet links + * benchmark now reports the size of the hashed message + +Fri Feb 19 2010 Aleksey + * Bugfix: fixed GOST for big-endian systems + +Wed Feb 17 2010 Aleksey + * Bugfix: buffer owerflow while parsing long command line under Windows + +Sun Feb 14 2010 Aleksey + * Bugfix: fixed HAS-160 for big-endian systems + +Wed Feb 3 2010 Aleksey + * Bugfix: crash while printing sfv header for files modified before 1970 + +Fri Jan 29 2010 Aleksey + * Bugfix: sfv-hash symlink now sets default print format to SFV + * Bugfix: ed2k-link symlink did not work as expected + +Thu Jan 28 2010 Aleksey + * === Version 1.1.5 === + * option --utf8 under Windows, also UTF8 now is the default encoding + * Bugfix: non-existing files were reported twice in `-r --sfv' mode + +Wed Jan 27 2010 Aleksey + * option --embed-crc-delimiter= to insert before a crc sum in -e mode + * alias -e for --embed-crc + * alias -B for --benchmark + +Mon Jan 11 2010 Aleksey + * Bugfix: percents output fixed for files of more than 4Gb of data + +Fri Dec 18 2009 Aleksey + * AICH algorithm has been re-written to process files of unknown size like stdin, pipes, sockets + * ED2K switched to use eMule algorithm when filesize is an exact multiple of 9728000 bytes + +Thu Dec 17 2009 Aleksey + * Bugfix: buffer overflow when printing eDonkey links for 0-sized files + * Bugfix: --ripemd160 and --md4 option were broken + * added `%R' printf entity for RIPEMD-160 + +Mon Dec 14 2009 Aleksey + * === Version 1.1.4 === + * supported algorithms: RIPEMD-160, HAS-160, GOST, MD4, SNEFRU-128, SNEFRU-256 + * long printf format entities, e.g. %{md4}, %{has160}, %{gost}, %{snefru256} + * `u' printf modifier for uppercase hashes, e.g. %u{gost} + * switched to %{urlname} printf-entity for url-encoded file name + * useful symlinks are installed by `make install-symlinks' + +Sun Dec 6 2009 Aleksey + * WHIRLPOOL hash function supported, option --whirlpool + +Wed Dec 2 2009 Aleksey + * print file checking statistics when --check-embedded specified + +Sun Nov 29 2009 Aleksey + * === Version 1.1.3 === + * forbid simultaneous usage of --check, --update and --check-embedded options + +Sun Nov 22 2009 Aleksey + * Bugfix: Checking of md5 file always reported OK + * --check-embedded option to verify files by crc32 sum embedded in their names. + * --embed-crc option to rename processed files by embedding crc32 sum into name. + +Mon Nov 9 2009 Aleksey + * --benchmark option now prints names of tested hash sums + * use magnet format as default if the program name contains 'magnet' + +Wed Jun 24 2009 Aleksey + * supported checking of files containing a single hash sum without a filename + +Mon Jun 15 2009 Aleksey + * === Version 1.1.2 === + * verification of files with magnet links supported + +Wed May 20 2009 Aleksey + * Bugfix: --skip-ok was broken since 1.1.0 + +Fri May 15 2009 Aleksey + * Bugfix: checking of ed2k hashes was broken since version 1.1.0 + * Bugfix: --verbose lead to crash under OpenSolaris when config file not present + +Mon Mar 23 2009 Aleksey + * === Version 1.1.1 === + * config file described in the man page + * Bugfix: buffer owerflow when printing tiger hash + +Sat Mar 21 2009 Aleksey + * Bugfix: some options couldn't be read from config file + +Sat Mar 14 2009 Aleksey + * === Version 1.1.0 === + * various small changes and refactoring + +Tue Mar 10 2009 Aleksey + * option --speed to print per-file and total speed statistics + +Thu Mar 5 2009 Aleksey + * option --output to write calculation and check results to a file + * option --log to log percents, speed and verbose messages + +Wed Mar 4 2009 Aleksey + * option --percents to show wget-like percents + +Tue Feb 26 2009 Aleksey + * Bugfix: fixed processing of unaligned messages in the get_crc32() function + +Sat Feb 14 2009 Aleksey + * === Version 1.0.8 === + * --magnet option supported to format sums as a magnet link + * Bugfix: printf option from config conflicted with command line + +Sun Dec 14 2008 Aleksey + * === Version 1.0.7 === + * config file supported to load default options values + * if --verbose, report verification errors as "sum is XXXXXXXX, should be YYYYYYYY" + * '%h' modifier changed to '%x' + +Fri Nov 14 2008 Aleksey + * === Version 1.0.6 === + * reg-file for FAR user menu + +Thu Oct 9 2008 Aleksey + * interpret '#' symbol as a comment + +Sat Sep 20 2008 ivan386 + * under windows skip files openned for writing + * Bugfix: printf arguments %p and %f corrected + +Sun Sep 14 2008 Aleksey + * === Version 1.0.5 === + +Wed Aug 6 2008 Aleksey + * '%b','%h' modifiers to print base32/hex representation of any hash (e.g. '%bH') + * supported -p '\0' symbol + * supported setting width for filesizes (e.g. -p '%12s') + +Tue Jul 22 2008 Aleksey + * --verbose prints read speed statistics to stderr after each file + * read buffer increased to 2 MiB + +Wed Jul 9 2008 Aleksey + * === Version 1.0.4 === + * '%u' prints URL-encoded filename + * EDonkey links now have URL-encoded filename and contain AICH hash + +Mon Jul 7 2008 Aleksey + * AICH hashsums supported, option --aich + +Sat Jun 28 2008 Aleksey + * === Version 1.0.3 === + * ed2k calculation fixed for files with 9728000 < filesize <= 9732096 + * Big-endian processors supported for all sums + +Sat Jun 14 2008 Aleksey + * === Version 1.0.2 === + +Fri Jun 6 2008 Aleksey + * --benchmark option added + * skip locked files under win32 when calculating 'em sums + +Tue May 20 2008 Aleksey + * Bugfix: updating of md5 files was broken + * Bugfix: more rigid parsing of base32/hex hash sums + +Wed May 15 2008 Aleksey + * === Version 1.0.1 === + * Bugfix: last line without '\n' couldn't be parsed + +Wed May 14 2008 Aleksey + * Bugfix: empty lines were not skipped, when verifying a crc file + * option '--skip-ok' to skip OK messages for successfuly verified files + +Tue Jan 22 2008 Aleksey + * option '-a' to print all supported hash sums + * Changed default behavior: if no formating option are set, sfv header is printed only for --crc32 + +Wed Dec 19 2007 Aleksey + * Bugfix: fixed buffer overflow for command line -p '%%%%d' + * Bugfix: fixed size calculation for stdin (rhash -p '%s' - = 56 + * Tiger hash optimised to be 5% faster + +Wed May 02 2007 Aleksey + * === Version 0.8.8 === + +Sun Apr 22 2007 Aleksey + * added options --accept and --crc-accept + * added --verbose option + * added --maxdepth option + * added check before verifying a crc file that it isn't a binary file + +Mon Apr 16 2007 Aleksey + * === Version 0.8.7 === + * Tiger hash sum optimised for IA32 + +Tue Apr 10 2007 Aleksey + * Bugfix: --update of sfv files worked incorrectly under windows + +Mon Apr 09 2007 Aleksey + * implemented Tiger hash function + +Sun Apr 01 2007 Aleksey + * added check before updating a crc file that it isn't a binary file + +Mon Mar 26 2007 Aleksey + * === Version 0.8.6 === + * Ctrl+C now prints a message and partitial statistics + +Sat Mar 24 2007 Aleksey + * default format changed to SFV + +Mon Mar 19 2007 Aleksey + * updating of crc files supported + +Wed Jan 31 2007 Aleksey + * === Version 0.8.5 === + * supported many short options as one argument, e.g. '-MCEr' + * option -S (--sha1) changed to -H + * Bugfix: program crashed under BSD while printing SFV file header + +Sun Nov 05 2006 Aleksey + * === Version 0.8.4 === + * Bugfix: errors/miss stats calculation corrected + +Sun Oct 29 2006 Aleksey + * supported "-c -" option to check hash sums from stdin + * added stdout flushing after each processed file + * the program returns exit code 0 on success and 1 if an error occurred + +Fri Sep 08 2006 Aleksey + * corrected parsing of md5-like files with star-prepended filenames + +Wed Apr 19 2006 Aleksey + * checking of md5/sha1 files in *BSD format supported + * improved I/O errors handling + +Mon Apr 10 2006 Aleksey + * === Version 0.8.3 === + * cheking of files in standart md5sum/sha1sum format supported + * default output format for md5/sha1/ed2k sums changed + * man page rewrited + +Thu Mar 30 2006 Aleksey + * === Version 0.8.2 === + * GCC 2.96 supported + +Thu Feb 23 2006 Aleksey + * Bugfix: files with .SFV extension (in uppercase) were skiped while recursive checking + +Wed Jan 25 2006 Aleksey + * === Version 0.8.1 === + * option --check now works with --recursive + * Bugfix: corrected output format when checking files + * Bugfix: files wasn't opened as binary on Windows when checking sums + +Mon Jan 23 2006 Aleksey + * === Version 0.8 === + * documentation now distributed with windows version + * some *.bat files added to windows version + +Sun Jan 22 2006 Aleksey + * --check option added, to check hash sums files + * --ansi option added (for Windows version only) + * program name is parsed now to specify default sums to compute + +Sat Jan 14 2006 Aleksey + * Bugfix: console windows version now uses OEM (DOS) character set for output + + * === Version 0.7 === + * some fixes in sfv format output + +Fri Sep 16 2005 Aleksey + * --recursive option added + * --ed2k-link option added + +Fri Sep 02 2005 Aleksey + * === Version 0.6 === + +Sun Aug 28 2005 Aleksey + * Bugfix: files wasn't opened as binary on win32 + * --sfv format now implies uppercase hashes + +Wed Aug 24 2005 Aleksey + * added .spec file and Makefile 'rpm' target + +Sun Aug 14 2005 Aleksey + * === Version 0.5 === + * the first public version + * win32 platform supported + +Mon Aug 08 2005 Aleksey + * Bugfix: fixed calculation of md5/ed2k hashes for AMD64 + +Fri Aug 05 2005 Aleksey + * === Version 0.06 === + * initial linux version supporting crc32, md5, ed2k and sha1 diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..98583a50 --- /dev/null +++ b/Makefile @@ -0,0 +1,249 @@ +# Samples of usage: +# compile with debug info: make OPTFLAGS=-g OPTLDFLAGS= TARGET=rhash.debug +# compile for pentiumpro: make OPTFLAGS="-O2 -DNDEBUG -march=i586 -mcpu=pentiumpro -fomit-frame-pointer" +# create rpm with statically linked program: make rpm LDFLAGS="-Llibrhash -lrhash -static -s -Wl,--gc-sections" +CC = gcc +VERSION = 1.2.6 +PREFIX = /usr/local +OPTFLAGS = -O2 -DNDEBUG -fomit-frame-pointer -ffunction-sections -fdata-sections +#DEFFLAGS = -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ffunction-sections -fdata-sections +ADDCFLAGS = +ADDLDFLAGS = +CFLAGS := -pipe $(OPTFLAGS) $(ADDCFLAGS) \ + -Wall -W -Wstrict-prototypes -Wnested-externs -Winline -Wpointer-arith \ + -Wbad-function-cast -Wmissing-prototypes -Wmissing-declarations +OPTLDFLAGS = -s +LDFLAGS = -Llibrhash -lrhash $(OPTLDFLAGS) $(ADDLDFLAGS) +HEADERS = calc_sums.h crc_print.h common_func.h crc_update.h file_mask.h file_set.h find_file.h output.h parse_cmdline.h rhash_main.h win_utils.h version.h +SOURCES = calc_sums.c crc_print.c common_func.c crc_update.c file_mask.c file_set.c find_file.c output.c parse_cmdline.c rhash_main.c win_utils.c +OBJECTS = calc_sums.o crc_print.o common_func.o crc_update.o file_mask.o file_set.o find_file.o output.o parse_cmdline.o rhash_main.o win_utils.o +OUTDIR = +PROGNAME = rhash +TARGET = $(OUTDIR)$(PROGNAME) +SYMLINKS = sfv-hash tiger-hash tth-hash whirlpool-hash has160-hash gost-hash ed2k-link magnet-link +SPECFILE = $(PROGNAME).spec +LIN_DIST_FILES = Makefile ChangeLog INSTALL COPYING README $(SPECFILE) $(SPECFILE).in \ + $(SOURCES) $(HEADERS) tests/test_rhash.sh rhash.1 rhash.1.win.sed rhash.1.html rhash.1.txt +WIN_DIST_FILES = dist/MD5.bat dist/magnet.bat dist/rhashrc.sample +WIN_SRC_FILES = win32/dirent.h win32/stdint.h win32/unistd.h win32/platform-dependent.h \ + win32/vc-2010/rhash.vcxproj +LIBRHASH_FILES = librhash/algorithms.c librhash/algorithms.h \ + librhash/byte_order.c librhash/byte_order.h librhash/timing.c librhash/timing.h \ + librhash/plug_openssl.c librhash/plug_openssl.h librhash/rhash.c librhash/rhash.h \ + librhash/aich.c librhash/aich.h librhash/crc32.c librhash/crc32.h \ + librhash/ed2k.c librhash/ed2k.h librhash/edonr.c librhash/edonr.h \ + librhash/gost.c librhash/gost.h librhash/has160.c librhash/has160.h \ + librhash/hex.c librhash/hex.h librhash/md4.c librhash/md4.h librhash/md5.c librhash/md5.h \ + librhash/ripemd-160.c librhash/ripemd-160.h librhash/sha1.c librhash/sha1.h \ + librhash/sha256.c librhash/sha256.h librhash/sha512.c librhash/sha512.h \ + librhash/snefru.c librhash/snefru.h librhash/tiger.c librhash/tiger.h \ + librhash/tiger_sbox.c librhash/tth.c librhash/tth.h librhash/whirlpool.c \ + librhash/whirlpool.h librhash/whirlpool_sbox.c librhash/test_sums.c \ + librhash/test_sums.h librhash/torrent.h librhash/torrent.c \ + librhash/util.c librhash/util.h librhash/config.h librhash/Makefile +DIST_FILES = $(LIN_DIST_FILES) $(LIBRHASH_FILES) $(WIN_DIST_FILES) $(WIN_SRC_FILES) +WIN_SUFFIX = win32 +ARCHIVE_BZIP = rhash-$(VERSION)-src.tar.bz2 +ARCHIVE_GZIP = rhash-$(VERSION)-src.tar.gz +ARCHIVE_DEB_GZ = ../rhash_$(VERSION).orig.tar.gz +ARCHIVE_7Z = rhash-$(VERSION)-src.tar.7z +ARCHIVE_ZIP = rhash-$(VERSION)-$(WIN_SUFFIX).zip +WIN_ZIP_DIR = RHash-$(VERSION)-$(WIN_SUFFIX) +DESTDIR = +BINDIR = $(PREFIX)/bin +MANDIR = $(PREFIX)/share/man +RPMTOP = rpms +RPMDIRS = SOURCES SPECS BUILD SRPMS RPMS +LIBRHASH = librhash/librhash.a +# Set variables according to GNU coding standard +INSTALL = install +INSTALL_PROGRAM = $(INSTALL) -m 755 +INSTALL_DATA = $(INSTALL) -m 644 + +all: $(TARGET) +install: install-program install-symlinks +uninstall: uninstall-program uninstall-symlinks +check: check-version + +# creating archives +dist: gzip +gzip: check-version $(ARCHIVE_GZIP) +bzip: check-version $(ARCHIVE_BZIP) +7z: check-version $(ARCHIVE_7Z) +zip : $(ARCHIVE_ZIP) +win-dist : $(ARCHIVE_ZIP) + +install-program: all + $(INSTALL) -d $(DESTDIR)$(BINDIR) $(DESTDIR)$(MANDIR)/man1 $(DESTDIR)/etc + $(INSTALL_PROGRAM) $(TARGET) $(DESTDIR)$(BINDIR) + $(INSTALL_DATA) rhash.1 $(DESTDIR)$(MANDIR)/man1/rhash.1 + sed -e 's/\x0D//g' dist/rhashrc.sample > rhashrc && $(INSTALL_DATA) rhashrc $(DESTDIR)/etc/rhashrc + rm -f rhashrc + +install-symlinks: + test -x $(DESTDIR)$(BINDIR)/$(PROGNAME) + for f in $(SYMLINKS); do ln -s rhash $(DESTDIR)$(BINDIR)/$$f; done + cd $(DESTDIR)$(MANDIR)/man1 && for f in $(SYMLINKS); do ln -s rhash.1* $$f.1; done + +uninstall-program: + rm -f $(DESTDIR)$(BINDIR)/$(PROGNAME) + rm -f $(DESTDIR)$(MANDIR)/man1/rhash.1 + +uninstall-symlinks: + for f in $(SYMLINKS); do rm -f $(DESTDIR)$(BINDIR)/$$f; done + +install-lib-static: + cd librhash && make install-lib-static + +install-lib-shared: + cd librhash && make install-lib-shared + +lib-static: $(LIBRHASH) + +lib-shared : + cd librhash && make lib-shared + +test-hashes: + cd librhash && make test + +test: $(TARGET) test-hashes + chmod +x tests/test_$(PROGNAME).sh + tests/test_$(PROGNAME).sh + +version.h : Makefile + echo "#define VERSION \"$(VERSION)\"" > version.h + +check-version: version.h + grep -q '\* === Version $(VERSION) ===' ChangeLog + grep -q "^- === Version $(VERSION) ===" rhash.spec.in +# [ ! -f debian/changelog ] || grep -q '^rhash ($(VERSION)-' debian/changelog + grep -q '^#define VERSION "$(VERSION)"' version.h + [ -s rhash.1.txt -a -s rhash.1.html ] + +$(LIBRHASH): $(LIBRHASH_FILES) + cd librhash && make lib-static + +$(TARGET): $(OBJECTS) $(LIBRHASH) + $(CC) $(OBJECTS) -o $(TARGET) $(LDFLAGS) + +# NOTE: dependences were generated by 'gcc -Ilibrhash -MM *.c' +# we are using plain old makefile style to support BSD make +calc_sums.o: calc_sums.c librhash/hex.h librhash/rhash.h \ + librhash/torrent.h common_func.h librhash/util.h parse_cmdline.h \ + rhash_main.h file_set.h calc_sums.h crc_print.h output.h win_utils.h \ + version.h + $(CC) -c $(CFLAGS) $< -o $@ + +common_func.o: common_func.c librhash/hex.h win_utils.h parse_cmdline.h \ + common_func.h librhash/util.h + $(CC) -c $(CFLAGS) $< -o $@ + +crc_print.o: crc_print.c librhash/hex.h librhash/byte_order.h \ + common_func.h librhash/util.h calc_sums.h librhash/rhash.h \ + parse_cmdline.h crc_print.h + $(CC) -c $(CFLAGS) $< -o $@ + +crc_update.o: crc_update.c common_func.h librhash/util.h win_utils.h \ + parse_cmdline.h output.h rhash_main.h file_set.h calc_sums.h \ + librhash/rhash.h file_mask.h crc_update.h + $(CC) -c $(CFLAGS) $< -o $@ + +file_mask.o: file_mask.c common_func.h librhash/util.h file_mask.h + $(CC) -c $(CFLAGS) $< -o $@ + +file_set.o: file_set.c librhash/hex.h librhash/crc32.h common_func.h \ + librhash/util.h crc_print.h parse_cmdline.h rhash_main.h output.h \ + file_set.h calc_sums.h librhash/rhash.h + $(CC) -c $(CFLAGS) $< -o $@ + +find_file.o: find_file.c common_func.h librhash/util.h win_utils.h \ + find_file.h + $(CC) -c $(CFLAGS) $< -o $@ + +output.o: output.c librhash/rhash.h common_func.h librhash/util.h \ + calc_sums.h parse_cmdline.h rhash_main.h output.h +parse_cmdline.o: parse_cmdline.c librhash/rhash.h common_func.h \ + librhash/util.h win_utils.h file_mask.h output.h rhash_main.h version.h \ + parse_cmdline.h + $(CC) -c $(CFLAGS) $< -o $@ + +rhash_main.o: rhash_main.c librhash/torrent.h common_func.h \ + librhash/util.h win_utils.h find_file.h file_set.h calc_sums.h \ + librhash/rhash.h crc_update.h file_mask.h crc_print.h parse_cmdline.h \ + output.h rhash_main.h + $(CC) -c $(CFLAGS) $< -o $@ + +win_utils.o: win_utils.c common_func.h librhash/util.h version.h \ + parse_cmdline.h rhash_main.h win_utils.h + $(CC) -c $(CFLAGS) $< -o $@ + +dist/rhash.1.html: rhash.1 rhash.1.win.sed + sed -f rhash.1.win.sed rhash.1 | rman -fHTML -roff | \ + sed -e '/ dist/rhash.1.html + +rhash.1.html: rhash.1 + -which rman &>/dev/null && (rman -fHTML -roff rhash.1 | sed -e '/ rhash.1.html) + +rhash.1.txt: rhash.1 + -which groff &>/dev/null && (groff -t -e -mandoc -Tascii rhash.1 | sed -e 's/.\[[0-9]*m//g' > rhash.1.txt) + +cpp-doc: + cppdoc_cmd -title=RHash -company=Animegorodok -classdir=classdoc -module="cppdoc-standard" -overwrite -extensions="c,h" -languages="c=cpp,h=cpp" -generate-deprecations-list=false $(SOURCES) $(HEADERS) ./Documentation/CppDoc/index.html + +permissions: + chmod -x $(DIST_FILES) + chmod +x tests/test_$(PROGNAME).sh + +$(ARCHIVE_GZIP): $(DIST_FILES) + make permissions + rm -rf $(PROGNAME)-$(VERSION) + mkdir $(PROGNAME)-$(VERSION) + cp -rl --parents $(DIST_FILES) $(PROGNAME)-$(VERSION)/ + tar czf $(ARCHIVE_GZIP) $(PROGNAME)-$(VERSION)/ + rm -rf $(PROGNAME)-$(VERSION) + +$(ARCHIVE_BZIP): $(DIST_FILES) + make permissions + rm -rf $(PROGNAME)-$(VERSION) + mkdir $(PROGNAME)-$(VERSION) + cp -rl --parents $(DIST_FILES) $(PROGNAME)-$(VERSION)/ + tar cjf $(ARCHIVE_BZIP) $(PROGNAME)-$(VERSION)/ + rm -rf $(PROGNAME)-$(VERSION) + +$(ARCHIVE_7Z): $(DIST_FILES) + make permissions + rm -rf $(PROGNAME)-$(VERSION) + mkdir $(PROGNAME)-$(VERSION) + cp -rl --parents $(DIST_FILES) $(PROGNAME)-$(VERSION)/ + tar cf - $(PROGNAME)-$(VERSION)/ | 7zr a -si $(ARCHIVE_7Z) + rm -rf $(PROGNAME)-$(VERSION) + +$(ARCHIVE_ZIP): $(WIN_DIST_FILES) dist/rhash.1.html rhash.1.txt + [ -s dist/rhash.1.html -a -x $(TARGET) ] + -rm -rf $(WIN_ZIP_DIR) + mkdir $(WIN_ZIP_DIR) + cp $(TARGET).exe dist/rhash.1.html rhash.1.txt ChangeLog $(WIN_DIST_FILES) $(WIN_ZIP_DIR)/ + -[ -f $(OUTDIR)libeay32.dll ] && cp $(OUTDIR)libeay32.dll $(WIN_ZIP_DIR)/ + zip -9r $(ARCHIVE_ZIP) $(WIN_ZIP_DIR) + rm -rf $(WIN_ZIP_DIR) + +$(ARCHIVE_DEB_GZ) : $(DIST_FILES) + make $(ARCHIVE_GZIP) + mv -f $(ARCHIVE_GZIP) $(ARCHIVE_DEB_GZ) + +# packaging +$(SPECFILE): $(SPECFILE).in Makefile + sed -e 's/@VERSION@/$(VERSION)/' $(SPECFILE).in > $(SPECFILE) + +rpm: gzip + -for i in $(RPMDIRS); do mkdir -p $(RPMTOP)/$$i; done + cp -f $(ARCHIVE_GZIP) $(RPMTOP)/SOURCES + rpmbuild -ba --clean --define "_topdir `pwd`/$(RPMTOP)" $(SPECFILE) + mv -f `find $(RPMTOP) -name "rhash-*$(VERSION)*.rpm"` . + rm -rf $(RPMTOP) + +dist-clean: clean + +clean: + cd librhash && make clean + rm -f *.o $(TARGET) diff --git a/calc_sums.c b/calc_sums.c new file mode 100644 index 00000000..a3472502 --- /dev/null +++ b/calc_sums.c @@ -0,0 +1,852 @@ +/* calc_sums.c - crc calculating and printing functions */ + +#include "common_func.h" /* should be included before the C library files */ +#include +#include +#include /* free() */ +#include /* read() */ +#include /* open() */ +#include /* localtime(), time() */ +#include /* stat() */ +#include +#include + +#include "librhash/hex.h" +#include "librhash/rhash.h" +#include "librhash/timing.h" +#include "parse_cmdline.h" +#include "rhash_main.h" +#include "file_set.h" +#include "crc_print.h" +#include "output.h" +#include "win_utils.h" +#include "version.h" + +#include "calc_sums.h" + +/** + * Initialize BTIH hash function. Unlike other algorithms BTIH + * requires more data for correct computation. + * + * @param info the file data + */ +static void init_btih_data(struct file_info *info) +{ + assert((info->rctx->hash_id & RHASH_BTIH) != 0); + rhash_transmit(RMSG_BT_ADD_FILE, info->rctx, RHASH_STR2UPTR((char*)get_basename(file_info_get_utf8_print_path(info))), (rhash_uptr_t)&info->size); + rhash_transmit(RMSG_BT_SET_PROGRAM_NAME, info->rctx, RHASH_STR2UPTR(PROGRAM_NAME "/" VERSION), 0); + + if(opt.flags & OPT_BT_PRIVATE) { + rhash_transmit(RMSG_BT_SET_OPTIONS, info->rctx, RHASH_BT_OPT_PRIVATE, 0); + } + + if(opt.bt_announce) { + rhash_transmit(RMSG_BT_SET_ANNOUNCE, info->rctx, RHASH_STR2UPTR(opt.bt_announce), 0); + } + + if(opt.bt_piece_length) { + rhash_transmit(RMSG_BT_SET_PIECE_LENGTH, info->rctx, RHASH_STR2UPTR(opt.bt_piece_length), 0); + } +} + +/** + * Calculate hash sums simultaneously, according to the info->sums.flags. + * Calculated hashes are stored in info->rctx. + * + * @param info file data. The info->full_path can be "-" to denote stdin + * @return 0 on success, -1 on fail with error code stored in errno + */ +static int calc_sums(struct file_info *info) +{ + FILE* fd = stdin; /* stdin */ + int res; + + if(IS_DASH_STR(info->full_path)) { + info->print_path = "(stdin)"; + +#ifdef _WIN32 + /* using 0 instead of _fileno(stdin). _fileno() is undefined under 'gcc -ansi' */ + if(setmode(0, _O_BINARY) < 0) { + return -1; + } +#endif + } else { + struct rsh_stat_struct stat_buf; + /* skip non-existing files */ + if(rsh_stat(info->full_path, &stat_buf) < 0) { + return -1; + } + + if((opt.mode & (MODE_CHECK | MODE_CHECK_EMBEDDED)) && S_ISDIR(stat_buf.st_mode)) { + errno = EISDIR; + return -1; + } + + info->size = stat_buf.st_size; /* total size, in bytes */ + IF_WINDOWS(win32_set_filesize64(info->full_path, &info->size)); /* set correct filesize for large files under win32 */ + + if(!info->sums.flags) return 0; + + /* skip files opened with exclusive rights without reporting an error */ + fd = rsh_fopen_bin(info->full_path, "rb"); + if(!fd) { + return -1; + } + } + + assert(info->rctx == 0); + info->rctx = rhash_init(info->sums.flags); + + /* initialize bittorrent data */ + if(info->sums.flags & RHASH_BTIH) { + init_btih_data(info); + } + + if(percents_output->update != 0) { + rhash_set_callback(info->rctx, (rhash_callback_t)percents_output->update, info); + } + + /* read and hash file content */ + if((res = rhash_file_update(info->rctx, fd)) != -1) { + rhash_final(info->rctx, 0); /* finalize hashing */ + + info->size = info->rctx->msg_size; + rhash_data.total_size += info->size; + } + + if(fd != stdin) fclose(fd); + return res; +} + +/** + * Free memory allocated by given file_info structure. + * + * @param info pointer the structure to de-initialize + */ +void file_info_destroy(struct file_info* info) +{ + free(info->utf8_print_path); + free(info->allocated_ptr); + rhash_free(info->rctx); +} + +/** + * Store print_path in a file_info struct, replacing if needed + * system path separators with specified by user command line option. + * + * @param info pointer to the the file_info structure to change + * @param print_path the print path to store + */ +static void file_info_set_print_path(struct file_info* info, const char* print_path) +{ + char *p; + char wrong_sep; + + /* check if path separator was specified by command line options */ + if(opt.path_separator) { + wrong_sep = (opt.path_separator == '/' ? '\\' : '/'); + if((p = (char*)strchr(print_path, wrong_sep)) != NULL) { + info->allocated_ptr = rsh_strdup(print_path); + info->print_path = info->allocated_ptr; + p = info->allocated_ptr + (p - print_path); + + /* replace wrong_sep in the print_path with separator defined by options */ + for(; *p; p++) { + if(*p == wrong_sep) *p = opt.path_separator; + } + return; + } + } + + /* if path was not replaces, than just store the value */ + info->print_path = print_path; +} + +/** + * Return utf8 version of print_path. + * + * @param info file information + * @return utf8 string on succes, NULL if couldn't convert. + */ +const char* file_info_get_utf8_print_path(struct file_info* info) +{ + if(info->utf8_print_path == NULL) { + if(is_utf8()) return info->print_path; + info->utf8_print_path = to_utf8(info->print_path); + } + return info->utf8_print_path; +} + +/* functions to calculate and print file sums */ + +/** + * Search for a crc32 hash sum in the given file name. + * + * @param filepath the path to the file. + * @param crc32 pointer to integer to receive parced hash sum. + * @return non zero if crc32 was found, zero otherwise. + */ +static int find_embedded_crc32(const char* filepath, unsigned* crc32_be) +{ + const char* e = filepath + strlen(filepath) - 10; + + /* search for the sum enclosed in brackets */ + for(; e >= filepath && !IS_PATH_SEPARATOR(*e); e--) { + if((*e == '[' && e[9] == ']') || (*e == '(' && e[9] == ')')) { + const char *p = e + 8; + for(; p > e && IS_HEX(*p); p--); + if(p == e) { + rhash_hex_to_byte(e + 1, (char unsigned*)crc32_be, 8); + return 1; + } + e -= 9; + } + } + return 0; +} + +/** + * Rename given file inserting its crc32 sum enclosed in braces just before + * the file extension. + * + * @param info pointer to the data of the file to rename. + */ +int rename_file_to_embed_crc32(struct file_info *info) +{ + size_t len = strlen(info->full_path); + const char* p = info->full_path + len; + const char* c = p - 1; + char* new_path; + char* insertion_point; + unsigned crc32_be; + + /* check that filename doesn't end with this sum */ + if(find_embedded_crc32(info->print_path, &crc32_be)) { + char calculated_crc32[9]; + rhash_print(calculated_crc32, info->rctx, RHASH_CRC32, RHPR_UPPERCASE); + + if(crc32_be == info->sums.crc32.be) { + return 0; + } else { + log_msg("warning: wrong embedded sum, should be %s\n", calculated_crc32); + } + } + + /* find file extension (the point to insert the hash sum) */ + for(; c >= info->full_path && !IS_PATH_SEPARATOR(*c); c--) { + if(*c == '.') { + p = c; + break; + } + } + + /* now p is the point to insert the 10-bytes hash string */ + new_path = rsh_malloc(len + 11); + insertion_point = new_path + (p - info->full_path); + memcpy(new_path, info->full_path, p - info->full_path); + if(opt.embed_crc_delimiter && *opt.embed_crc_delimiter) *(insertion_point++) = *opt.embed_crc_delimiter; + rhash_print(insertion_point+1, info->rctx, RHASH_CRC32, RHPR_UPPERCASE); + insertion_point[0] = '['; + insertion_point[9] = ']'; /* note: overrides '\0' inserted by rhash_print_sum() */ + strcpy(insertion_point + 10, p); + + /* try to rename */ + if(rename(info->full_path, new_path) < 0) { + fprintf(rhash_data.log, PROGRAM_NAME ": can't move %s to %s: %s\n", info->full_path, new_path, strerror(errno)); + free(new_path); + return -1; + } + + /* change file name in the file info structure */ + if(info->print_path >= info->full_path && info->print_path < p) { + file_info_set_print_path(info, new_path + len - strlen(info->print_path)); + } else { + file_info_set_print_path(info, new_path); + } + + free(info->full_path); + info->full_path = new_path; + return 0; +} + +/** + * Save torrent file. + * + * @param info information about the hashed file + */ +static void save_torrent(struct file_info* info) +{ + char *str; + FILE* fd; + struct rsh_stat_struct stat_buf; + size_t path_len = strlen(info->full_path); + size_t text_len; + char* path = (char*)rsh_malloc(path_len + 9); + + /* append .torrent extension to the file path */ + memcpy(path, info->full_path, path_len); + memcpy(path + path_len, ".torrent", 9); + + /* get torrent file content */ + text_len = rhash_transmit(RMSG_BT_GET_TEXT, info->rctx, (unsigned long)&str, 0); + assert(text_len != RHASH_ERROR); + + if(rsh_stat(path, &stat_buf) >= 0) { + errno = EEXIST; + log_file_error(path); + } else { + fd = rsh_fopen_bin(path, "wb"); + if(fd && text_len == fwrite(str, 1, text_len, fd) && !ferror(fd)) { + log_msg("%s saved\n", path); + } else { + log_file_error(path); + } + if(fd) fclose(fd); + } + free(path); +} + +/** + * Calculate and print file hash sums using printf format. + * + * @param out a stream to print to + * @param filepath path to the file to calculate sums for + * @param fullpath fullpath to the file relative to the current directory + * @return 0 on success, -1 on fail + */ +int calculate_and_print_sums(FILE* out, const char *print_path, const char *full_path, struct rsh_stat_struct* stat_buf) +{ + struct file_info info; + timedelta_t timer; + int res = 0; + + memset(&info, 0, sizeof(info)); + info.full_path = rsh_strdup(full_path); + file_info_set_print_path(&info, print_path); + info.size = 0; + + info.sums.flags = opt.sum_flags; + + if( IS_DASH_STR(full_path) ) { + print_path = "(stdin)"; + memset(&info.stat_buf, 0, sizeof(info.stat_buf)); + } else { + if(stat_buf != NULL) { + memcpy(&info.stat_buf, stat_buf, sizeof(info.stat_buf)); + } else { + if(rsh_stat(full_path, (stat_buf = &info.stat_buf)) < 0) { + log_file_error(full_path); + free(info.full_path); + file_info_destroy(&info); + return -1; + } + } + if(S_ISDIR(stat_buf->st_mode)) return 0; /* don't handle directories */ + + info.size = stat_buf->st_size; /* total size, in bytes */ + IF_WINDOWS(win32_set_filesize64(info.full_path, &info.size)); /* set correct filesize for large files under win32 */ + } + + /* initialize percents output */ + init_percents(&info); + rhash_timer_start(&timer); + + if(info.sums.flags) { + /* calculate sums */ + if(calc_sums(&info) < 0) { + /* print error unless sharing access error occurred */ + if(errno == EACCES) return 0; + log_file_error(full_path); + res = -1; + } + } + + info.time = rhash_timer_stop(&timer); + finish_percents(&info, res); + + if(opt.mode & MODE_TORRENT) { + save_torrent(&info); + } + + if(opt.flags & OPT_EMBED_CRC) { + /* rename the file */ + rename_file_to_embed_crc32(&info); + } + + if((opt.mode & MODE_UPDATE) && opt.fmt == FMT_SFV) { + print_sfv_header_line(rhash_data.upd_fd, info.print_path, info.full_path); + if(opt.flags & OPT_VERBOSE) { + print_sfv_header_line(rhash_data.log, info.print_path, info.full_path); + fflush(rhash_data.log); + } + } + + if(rhash_data.print_list && res >= 0) { + print_line(out, rhash_data.print_list, &info); + fflush(out); + + /* duplicate calculated line to stderr or log file if verbose */ + if( (opt.mode & MODE_UPDATE) && (opt.flags & OPT_VERBOSE) ) { + print_line(rhash_data.log, rhash_data.print_list, &info); + fflush(rhash_data.log); + } + + if((opt.flags & OPT_SPEED) && info.sums.flags) { + print_file_time_stats(&info); + } + } + free(info.full_path); + file_info_destroy(&info); + return res; +} + +/** + * Return pointer to the bynary hash by hash sum id. + * + * @param sums rhash_sums_t structure holding hashes for all supported algorithms + * @param hash_id hash sum id + * @return pointer to the hash sum digest + */ +unsigned char* rhash_get_digest_ptr(struct rhash_sums_t *sums, unsigned hash_id) +{ + switch(hash_id) { + case RHASH_CRC32: + return sums->crc32.digest; + case RHASH_MD4: + return sums->md4_digest; + case RHASH_MD5: + return sums->md5_digest; + case RHASH_SHA1: + return sums->sha1_digest; + case RHASH_TIGER: + return sums->tiger_digest; + case RHASH_TTH: + return sums->tth_digest; + case RHASH_ED2K: + return sums->ed2k_digest; + case RHASH_AICH: + return sums->aich_digest; + case RHASH_WHIRLPOOL: + return sums->whirlpool_digest; + case RHASH_RIPEMD160: + return sums->ripemd160_digest; + case RHASH_GOST: + return sums->gost_digest; + break; + case RHASH_GOST_CRYPTOPRO: + return sums->gost_cryptopro_digest; + break; + case RHASH_SNEFRU256: + return sums->snefru256_digest; + break; + case RHASH_SNEFRU128: + return sums->snefru128_digest; + break; + case RHASH_HAS160: + return sums->has160_digest; + break; + case RHASH_BTIH: + return sums->btih_digest; + break; + case RHASH_SHA224: + return sums->sha224_digest; + break; + case RHASH_SHA256: + return sums->sha256_digest; + break; + case RHASH_SHA384: + return sums->sha384_digest; + break; + case RHASH_SHA512: + return sums->sha512_digest; + break; + case RHASH_EDONR256: + return sums->edonr256_digest; + break; + case RHASH_EDONR512: + return sums->edonr512_digest; + break; + default: + assert(0); /* impossible hash_id */ + } + return 0; +} + +/** + * Retrive binary values of all caculated hash sums from the info->rctx + * RHash context and put them into the info->sums structure. + * + * @param info information about the file to process + */ +static void fill_sums_struct(struct file_info *info) +{ + unsigned hash_ids = (info->sums.flags & RHASH_ALL_HASHES); + unsigned id = hash_ids & -(int)hash_ids; + assert(info->rctx != NULL); + assert(info->sums.flags != 0); + if(!id) return; /* protection, do nothing */ + + for(; id <= hash_ids; id <<= 1) { + if(id & hash_ids) { + char* digest = (char*)rhash_get_digest_ptr(&info->sums, id); + rhash_print(digest, info->rctx, id, RHPR_RAW); + } + } +} + +/** + * Forward and reverse compare. Compares two byte strings using + * directed and reversed byte order. The function is used to compare + * GOST hashes which can be reversed, because byte order of + * an output string is not specified by GOST standart. + * The function acts almost the same way as memcmp, by returning + * always 1 for different strings. + * + * @param mem1 the first byte string + * @param mem2 the second byte string + * @param size the length of byte strings to much + * 0 if strings are matched, 1 otherwise. + */ +static int fr_cmp(const void* mem1, const void* mem2, size_t size) +{ + const char *p1, *p2, *pe; + if(memcmp(mem1, mem2, size) == 0) return 0; + p1 = (const char*)mem1, p2 = ((const char*)mem2) + size - 1; + for(pe = ((const char*)mem1) + size / 2; p1 < pe; p1++, p2--) { + if(*p1 != *p2) return 1; + } + return 0; +} + +/** + * Verify hash sums of the file. + * + * @param info structure file path to process + * @return zero on success, -1 on file error, -2 if hash sums are different + */ +static int verify_sums(struct file_info *info) +{ + struct rhash_sums_t orig_sums; + timedelta_t timer; + int res = 0; + + memcpy(&orig_sums, &info->sums, sizeof(orig_sums)); + info->orig_sums = &orig_sums; + info->wrong_sums = 0; + errno = 0; + + if(info->sums.flags & RHASH_IS_MIXED) { + info->sums.flags |= RHASH_MD5 | RHASH_ED2K; + } + + /* initialize percents output */ + init_percents(info); + rhash_timer_start(&timer); + + if(calc_sums(info) < 0) { + finish_percents(info, -1); + return -1; + } + fill_sums_struct(info); + info->time = rhash_timer_stop(&timer); + + /* compare the sums and fill info->wrong_sums flags */ + if((orig_sums.flags & RHASH_CRC32) && info->sums.crc32.be != orig_sums.crc32.be) { + info->wrong_sums |= RHASH_CRC32; + } + if((orig_sums.flags & RHASH_SHA1) && memcmp(info->sums.sha1_digest, orig_sums.sha1_digest, 20) != 0) { + info->wrong_sums |= RHASH_SHA1; + } + if((orig_sums.flags & RHASH_TIGER) && memcmp(info->sums.tiger_digest, orig_sums.tiger_digest, 24) != 0) { + info->wrong_sums |= RHASH_TIGER; + } + if((orig_sums.flags & RHASH_TTH) && memcmp(info->sums.tth_digest, orig_sums.tth_digest, 24) != 0) { + info->wrong_sums |= RHASH_TTH; + } + if((orig_sums.flags & RHASH_AICH) && memcmp(info->sums.aich_digest, orig_sums.aich_digest, 20) != 0) { + info->wrong_sums |= RHASH_AICH; + } + if((orig_sums.flags & RHASH_WHIRLPOOL) && memcmp(info->sums.whirlpool_digest, orig_sums.whirlpool_digest, 20) != 0) { + info->wrong_sums |= RHASH_WHIRLPOOL; + } + if((orig_sums.flags & RHASH_GOST) && fr_cmp(info->sums.gost_digest, orig_sums.gost_digest, 32) != 0) { + info->wrong_sums |= RHASH_GOST; + } + if((orig_sums.flags & RHASH_GOST_CRYPTOPRO) && fr_cmp(info->sums.gost_cryptopro_digest, orig_sums.gost_cryptopro_digest, 32) != 0) { + info->wrong_sums |= RHASH_GOST_CRYPTOPRO; + } + if((orig_sums.flags & RHASH_BTIH) && memcmp(info->sums.btih_digest, orig_sums.btih_digest, 20) != 0) { + info->wrong_sums |= RHASH_BTIH; + } + if((orig_sums.flags & RHASH_RIPEMD160) && memcmp(info->sums.ripemd160_digest, orig_sums.ripemd160_digest, 20) != 0) { + info->wrong_sums |= RHASH_RIPEMD160; + } + if((orig_sums.flags & RHASH_HAS160) && memcmp(info->sums.has160_digest, orig_sums.has160_digest, 20) != 0) { + info->wrong_sums |= RHASH_HAS160; + } + if((orig_sums.flags & RHASH_SNEFRU128) && memcmp(info->sums.snefru128_digest, orig_sums.snefru128_digest, 16) != 0) { + info->wrong_sums |= RHASH_SNEFRU128; + } + if((orig_sums.flags & RHASH_SNEFRU256) && memcmp(info->sums.snefru256_digest, orig_sums.snefru256_digest, 32) != 0) { + info->wrong_sums |= RHASH_SNEFRU256; + } + if((orig_sums.flags & RHASH_SHA224) && memcmp(info->sums.sha224_digest, orig_sums.sha224_digest, 28) != 0) { + info->wrong_sums |= RHASH_SHA224; + } + if((orig_sums.flags & RHASH_SHA256) && memcmp(info->sums.sha256_digest, orig_sums.sha256_digest, 32) != 0) { + info->wrong_sums |= RHASH_SHA256; + } + if((orig_sums.flags & RHASH_SHA384) && memcmp(info->sums.sha384_digest, orig_sums.sha384_digest, 48) != 0) { + info->wrong_sums |= RHASH_SHA384; + } + if((orig_sums.flags & RHASH_SHA512) && memcmp(info->sums.sha512_digest, orig_sums.sha512_digest, 64) != 0) { + info->wrong_sums |= RHASH_SHA512; + } + if((orig_sums.flags & RHASH_EDONR256) && memcmp(info->sums.edonr256_digest, orig_sums.edonr256_digest, 32) != 0) { + info->wrong_sums |= RHASH_EDONR256; + } + if((orig_sums.flags & RHASH_EDONR512) && memcmp(info->sums.edonr512_digest, orig_sums.edonr512_digest, 64) != 0) { + info->wrong_sums |= RHASH_EDONR512; + } + + if((orig_sums.flags & RHASH_MD4) && memcmp(info->sums.md4_digest, orig_sums.md4_digest, 16) != 0) { + info->wrong_sums |= RHASH_MD4; + } + if(orig_sums.flags & RHASH_MD5) { + if(memcmp(info->sums.md5_digest, orig_sums.md5_digest, 16) != 0 && + ((orig_sums.flags & RHASH_MD5_ED2K_MIXED_UP) == 0 || + memcmp(info->sums.ed2k_digest, orig_sums.md5_digest, 16) != 0) ) { + info->wrong_sums |= RHASH_MD5; + } + } + if(orig_sums.flags & RHASH_ED2K) { + if(memcmp(info->sums.ed2k_digest, orig_sums.ed2k_digest, 16) != 0 && + ((orig_sums.flags & RHASH_MD5_ED2K_MIXED_UP) == 0 || + memcmp(info->sums.md5_digest, orig_sums.ed2k_digest, 16) != 0) ) { + info->wrong_sums |= RHASH_ED2K; + } + } + + if(opt.flags & OPT_EMBED_CRC) { + unsigned crc32_be; + if(find_embedded_crc32(info->print_path, &crc32_be)) { + if(crc32_be != info->sums.crc32.be) + info->wrong_sums |= RHASH_EMBEDDED_CRC32; + } + } + + if(info->wrong_sums) { + res = -2; + } + + finish_percents(info, res); + + if((opt.flags & OPT_SPEED) && info->sums.flags) { + print_file_time_stats(info); + } + return res; +} + +/** + * Check hash sums in crc file. + * Lines beginning with ';' and '#' are ignored. + * + * @param crc_file_path - the path of the file with hash sums to verify. + * @param chdir - true if function should emulate chdir to directory of filepath before checking it. + * @return zero on success, -1 on fail + */ +int check_crc_file(const char* crc_file_path, int chdir) +{ + FILE *fd; + char buf[2048]; + size_t pos; + const char *ralign; + timedelta_t timer; + struct file_info info; + int res = 0, line_num = 0; + double time; + + /* process --check-embedded option */ + if(opt.mode & MODE_CHECK_EMBEDDED) { + unsigned crc32_be; + if(find_embedded_crc32(crc_file_path, &crc32_be)) { + /* initialize file_info structure */ + memset(&info, 0, sizeof(info)); + info.full_path = rsh_strdup(crc_file_path); + file_info_set_print_path(&info, info.full_path); + info.sums.flags = RHASH_CRC32; + info.sums.crc32.be = crc32_be; + res = verify_sums(&info); + fflush(rhash_data.out); + + if(res == 0) rhash_data.ok++; + else if(res == -1 && errno == ENOENT) rhash_data.miss++; + rhash_data.processed++; + + free(info.full_path); + file_info_destroy(&info); + } else { + log_msg("warning: file name doesn't contain a crc: %s\n", crc_file_path); + return -1; + } + return 0; + } + + /* initialize statistics */ + rhash_data.processed = rhash_data.ok = rhash_data.miss = 0; + rhash_data.total_size = 0; + + if( IS_DASH_STR(crc_file_path) ) { + fd = stdin; + crc_file_path = ""; + } else if( !(fd = rsh_fopen_bin(crc_file_path, "rb") )) { + log_file_error(crc_file_path); + return -1; + } + + pos = strlen(crc_file_path)+16; + ralign = str_set(buf, '-', (pos < 80 ? 80 - (int)pos : 2)); + fprintf(rhash_data.out, "\n--( Verifying %s )%s\n", crc_file_path, ralign); + fflush(rhash_data.out); + rhash_timer_start(&timer); + + /* mark dirname part of the path, by setting pos */ + if(chdir) { + pos = strlen(crc_file_path); + for(; pos > 0 && !IS_PATH_SEPARATOR(crc_file_path[pos]); pos--); + if(IS_PATH_SEPARATOR(crc_file_path[pos])) pos++; + } else pos = 0; + + /* read crc file line by line */ + for(line_num = 0; fgets(buf, 2048, fd); line_num++) + { + char* line = buf; + char* path_without_ext = NULL; + + /* skip unicode BOM */ + if(line_num == 0 && buf[0] == (char)0xEF && buf[1] == (char)0xBB && buf[2] == (char)0xBF) line += 3; + + if(*line == 0) continue; /* skip empty lines */ + + if(is_binary_string(line)) { + fprintf(rhash_data.log, PROGRAM_NAME ": error: file is binary: %s\n", crc_file_path); + if(fd != stdin) fclose(fd); + return -1; + } + + /* skip comments and empty lines */ + if(IS_COMMENT(*line) || *line == '\r' || *line == '\n') continue; + + memset(&info, 0, sizeof(info)); + parse_crc_file_line(line, &info.print_path, &info.sums, !feof(fd)); + + /* see if crc file contains a hash sum without a filename */ + if(!info.print_path && info.sums.flags) { + char* point; + path_without_ext = rsh_strdup(crc_file_path); + point = strrchr(path_without_ext, '.'); + + if(point) { + *point = '\0'; + file_info_set_print_path(&info, path_without_ext); + } + } + + if(!info.print_path || !info.sums.flags) { + log_msg("warning: can't parse line: %s\n", buf); + } else { + int is_absolute = IS_PATH_SEPARATOR(info.print_path[0]); + IF_WINDOWS(is_absolute = is_absolute || (info.print_path[0] && info.print_path[1] == ':')); + + /* if filename shall be prepent by directory path */ + if(pos && !is_absolute) { + size_t len = strlen(info.print_path); + info.full_path = (char*)rsh_malloc(pos+len+1); + memcpy(info.full_path, crc_file_path, pos); + strcpy(info.full_path+pos, info.print_path); + } else { + info.full_path = rsh_strdup(info.print_path); + } + + /* verify hash sums of the file */ + res = verify_sums(&info); + fflush(rhash_data.out); + free(info.full_path); + file_info_destroy(&info); + + /* update statistics */ + if(res == 0) rhash_data.ok++; + else if(res == -1 && errno == ENOENT) rhash_data.miss++; + } + rhash_data.processed++; + free(path_without_ext); + } + time = rhash_timer_stop(&timer); + + fprintf(rhash_data.out, "%s\n", str_set(buf, '-', 80)); + print_check_stats(); + + if(rhash_data.processed != rhash_data.ok) rhash_data.error_flag = 1; + + if(opt.flags & OPT_SPEED && rhash_data.processed > 1) { + print_time_stats(time, rhash_data.total_size, 1); + } + + rhash_data.processed = 0; + res = ferror(fd); /* check that crc file has been read without errors */ + if(fd != stdin) fclose(fd); + return (res == 0 ? 0 : -1); +} + +/** + * Print a file info line in SFV header format. + * + * @param out a stream to print info to + * @param printpath relative file path to print + * @param fullpath a path to the file relative to the current directory. + * @return 0 on success, -1 on fail with error code stored in errno + */ +int print_sfv_header_line(FILE* out, const char* printpath, const char* fullpath) +{ + struct rsh_stat_struct stat_buf; + uint64_t filesize; + char buf[24]; + + if( (rsh_stat(fullpath, &stat_buf)) < 0 ) { + return -1; /* not reporting an error here */ + } + if(S_ISDIR(stat_buf.st_mode)) return 0; /* don't handle directories */ + + filesize = stat_buf.st_size; /* total size, in bytes */ + IF_WINDOWS(win32_set_filesize64(fullpath, &filesize)); /* set correct filesize for large files under win32 */ + +#ifdef _WIN32 + /* skip file if it can't be opened with exclusive sharing rights */ + if(!can_open_exclusive(fullpath)) { + return 0; + } +#endif + + sprintI64(buf, filesize, 12); + fprintf(out, "; %s ", buf); + print_time(out, stat_buf.st_mtime); + fprintf(out, " %s\n", printpath); + return 0; +} + +/** + * Print an SFV header banner. The banner consist of 3 comment lines, + * with the program description and current time. + * + * @param out a stream to print to + */ +void print_sfv_banner(FILE* out) +{ + time_t cur_time = time(NULL); + struct tm *t = localtime(&cur_time); + if(t) { + fprintf(out, "; Generated by " PROGRAM_NAME " v" VERSION " on %4u-%02u-%02u at %02u:%02u.%02u\n" + "; Written by Aleksey (Akademgorodok) - http://rhash.sourceforge.net/\n;\n", + (1900+t->tm_year), t->tm_mon+1, t->tm_mday, t->tm_hour, t->tm_min, t->tm_sec); + } +} diff --git a/calc_sums.h b/calc_sums.h new file mode 100644 index 00000000..4864739f --- /dev/null +++ b/calc_sums.h @@ -0,0 +1,80 @@ +/* calc_sums.h */ +#ifndef CALC_SUMS_H +#define CALC_SUMS_H + +#include +#include "librhash/rhash.h" +#include "common_func.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* binary result of calculations */ +typedef struct rhash_sums_t +{ + unsigned flags; + union { + unsigned char digest[4]; + unsigned be; + } crc32; + unsigned char md4_digest[16]; + unsigned char md5_digest[16]; + unsigned char ed2k_digest[16]; + unsigned char sha1_digest[20]; + unsigned char tiger_digest[24]; + unsigned char tth_digest[24]; + unsigned char aich_digest[20]; + unsigned char whirlpool_digest[64]; + unsigned char ripemd160_digest[20]; + unsigned char gost_digest[32]; + unsigned char gost_cryptopro_digest[32]; + unsigned char snefru256_digest[32]; + unsigned char snefru128_digest[16]; + unsigned char has160_digest[20]; + unsigned char btih_digest[20]; + unsigned char sha224_digest[28]; + unsigned char sha256_digest[32]; + unsigned char sha384_digest[48]; + unsigned char sha512_digest[64]; + unsigned char edonr256_digest[32]; + unsigned char edonr512_digest[64]; +} rhash_sums_t; + +#include /* struct stat */ +#include /* stat() */ + +/* information about currently processed file */ +struct file_info { + char* full_path; + const char* print_path; + char* utf8_print_path; + uint64_t size; /* the size of the file */ + double time; /* file processing time in seconds */ + struct infohash_ctx *infohash; + struct rhash_sums_t *orig_sums; /* sums from a crc file */ + rhash rctx; /* state of hash algorithms */ + unsigned wrong_sums; /* sum comparison results */ + int error; /* -1 for i/o error, -2 for wrong sum, 0 on success */ + char* allocated_ptr; + + /* note: rsh_stat_struct size depends on _FILE_OFFSET_BITS */ + struct rsh_stat_struct stat_buf; /* file attributes */ + struct rhash_sums_t sums; /* sums of the file */ +}; + +void file_info_destroy(struct file_info*); /* free allocated memory */ +const char* file_info_get_utf8_print_path(struct file_info*); +unsigned char* rhash_get_digest_ptr(struct rhash_sums_t *sums, unsigned hash_id); + +int calculate_and_print_sums(FILE* out, const char *print_path, const char *full_path, struct rsh_stat_struct* stat_buf); +int check_crc_file(const char* crc_file_path, int chdir); +int rename_file_to_embed_crc32(struct file_info *info); +void print_sfv_banner(FILE* out); +int print_sfv_header_line(FILE* out, const char* printpath, const char* fullpath); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* CALC_SUMS_H */ diff --git a/common_func.c b/common_func.c new file mode 100644 index 00000000..a6c86949 --- /dev/null +++ b/common_func.c @@ -0,0 +1,284 @@ +/* common_func.c */ + +#include "common_func.h" /* should be included before the C library files */ +#include +#include +#include +#include +#include +#include +#include + +#include "librhash/hex.h" +#include "win_utils.h" +#include "parse_cmdline.h" + +/** + * Print a 0-terminated string representation of a 64-bit number to char buffer. + */ +void sprintI64(char *dst, uint64_t number, int max_width) +{ + char buf[24]; + size_t len; + char *p = buf + 23; + *(p--) = 0; /* last symbol should be '\0' */ + if(number == 0) { + *(p--) = '0'; + } else { + for(; p >= buf && number != 0; p--, number /= 10) { + *p = '0' + (char)(number % 10); + } + } + len = buf + 22 - p; + if((size_t)max_width > len) { + memset(dst, 0x20, max_width - len); + dst += max_width - len; + } + memcpy(dst, p+1, len+1); +} + +/** + * Calculate length of decimal representation of given 64-bit integer. + * + * @param num integer to calculate the length for + * @return length of decimal representation + */ +int int_len(uint64_t num) +{ + int len; + for(len = 0; num; len++, num /= 10); + return (len == 0 ? 1 : len); /* note: int_len(0) == 1 */ +} + +/* unsafe characters are "<>{}[]%#/|\^~`@:;?=&+ */ +#define IS_GOOD_URL_CHAR(c) (((unsigned char)(c) < 128 && isalnum(c)) || strchr("$-_.!*'(),", c)) + +/** + * URL-encode given string. + * + * @param dst buffer to recieve result or NULL to calculate encoded string size + * @param filename the file name + * @return the length of the result string + */ +int urlencode(char *dst, const char *name) +{ + const char *start; + if(!dst) { + int len; + for(len = 0; *name; name++) len += (IS_GOOD_URL_CHAR(*name) ? 1 : 3); + /* ed2k://|file|||2E398E5533AE4A83475B1AF001C6CEE6|h=RKLBEXT4O2H4RZER676WAVWGACIHQ56Z|/ */ + return len; + } + /* encode URL as specified by RFC 1738 */ + for(start = dst; *name; name++) { + if( IS_GOOD_URL_CHAR(*name) ) { + *dst++ = *name; + } else { + *dst++ = '%'; + dst = rhash_print_hex_byte(dst, *name, 'A'); + } + } + *dst = 0; + return (int)(dst - start); +} + +/** + * Convert given string to lower case. + * The result string will be allocated by malloc. + * The allocated memory should be freed by calling free(). + * + * @param str a string to convert + * @return converted string allocated by malloc + */ +char* str_tolower(const char* str) +{ + char* buf = rsh_strdup(str); + char* p; + if(buf) { + for(p = buf; *p; p++) *p = tolower(*p); + } + return buf; +} + +/** + * Remove spaces from the begin and the end of the string. + * + * @param str the modifiable buffer with the string + * @return trimmed string + */ +char* str_trim(char* str) +{ + char* last = str + strlen(str) - 1; + while(isspace(*str)) str++; + while(isspace(*last) && last > str) *(last--) = 0; + return str; +} + +/** + * Fill a buffer with NULL-terminated string consisting + * solely of a given repeated character. + * + * @param buf the modifiable buffer to fill + * @param ch the character to fill string with + * @param length the length of the string to construct + * @return the buffer + */ +char* str_set(char* buf, int ch, int length) +{ + memset(buf, ch, length); + buf[length] = '\0'; + return buf; +} + +/** + * Check if a string is a binary string, which means the string contain + * a character with ACII code below 0x20 other than '\r', '\n', '\t'. + * + * @param str a string to check + * @return non zero if string is binary + */ +int is_binary_string(const char* str) +{ + for(; *str; str++) { + if(((unsigned char)*str) < 32 && ((1 << (unsigned char)*str) & ~0x2600)) { + return 1; + } + } + return 0; +} + +/** + * Count number of utf8 characters in a 0-terminated string + * + * @param str the string to measure + * @return number of utf8 characters in the string + */ +size_t strlen_utf8_c(const char *str) +{ + size_t length = 0; + for(; *str; str++) { + if((*str & 0xc0) != 0x80) length++; + } + return length; +} + +/** +* Exit the program, with restoring console state. +* +* @param code the program exit code +*/ +void rhash_exit(int code) +{ + IF_WINDOWS(restore_console()); + exit(code); +} + +/* FILE FUNCTIONS */ + +/** + * Return filename without path. + * + * @param path file path + * @return filename + */ +const char* get_basename(const char* path) +{ + const char *p = path + strlen(path) - 1; + for(; p >= path && !IS_PATH_SEPARATOR(*p); p--); + return (p+1); +} + +/** + * Return allocated buffer with the directory part of the path. + * The buffer must be freed by calling free(). + * + * @param path file path + * @return directory + */ +char* get_dirname(const char* path) +{ + const char *p = path + strlen(path) - 1; + char *res; + for(; p > path && !IS_PATH_SEPARATOR(*p); p--); + if((p - path) > 1) { + res = (char*)rsh_malloc(p-path+1); + memcpy(res, path, p-path); + res[p-path] = 0; + return res; + } else { + return rsh_strdup("."); + } +} + +/** + * Assemble a filepath from its directory and filename. + * + * @param dir_path directory path + * @param filename file name + * @return filepath + */ +char* make_path(const char* dir_path, const char* filename) +{ + char* buf; + size_t len; + assert(dir_path); + assert(filename); + + /* remove leading path separators from filename */ + while(IS_PATH_SEPARATOR(*filename)) filename++; + + /* copy directory path */ + len = strlen(dir_path); + buf = (char*)rsh_malloc(len + strlen(filename) + 2); + strcpy(buf, dir_path); + + /* separate directory from filename */ + if(len > 0 && !IS_PATH_SEPARATOR(buf[len-1])) + buf[len++] = SYS_PATH_SEPARATOR; + + /* append filename */ + strcpy(buf+len, filename); + return buf; +} + +/** + * Print time formated as hh:mm.ss YYYY-MM-DD to a file stream. + * + * @param out the stream to print time to + * @param time the time to print + */ +void print_time(FILE *out, time_t time) +{ + struct tm *t = localtime(&time); + static struct tm zero_tm; + if(t == NULL) { + /* if strange day, then print `00:00.00 1900-01-00' */ + t = &zero_tm; + t->tm_hour = t->tm_min = t->tm_sec = + t->tm_year = t->tm_mon = t->tm_mday = 0; + } + fprintf(out, "%02u:%02u.%02u %4u-%02u-%02u", t->tm_hour, t->tm_min, + t->tm_sec, (1900+t->tm_year), t->tm_mon+1, t->tm_mday); +} + +#ifdef _WIN32 +#include +#endif + +/** + * Return ticks in milliseconds for time intervals measurement. + * This function should be not precise but the fastest one + * to retrive internal clock value. + * + * @return ticks count in milliseconds + */ +unsigned rhash_get_ticks(void) +{ +#ifdef _WIN32 + return GetTickCount(); +#else + struct timeval tv; + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000 + tv.tv_usec / 1000); +#endif +} diff --git a/common_func.h b/common_func.h new file mode 100644 index 00000000..20672001 --- /dev/null +++ b/common_func.h @@ -0,0 +1,93 @@ +/* common_func.h */ +#ifndef COMMON_FUNC_H +#define COMMON_FUNC_H + +/* use 64-bit off_t */ +#define _LARGEFILE64_SOURCE +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include /* for time_t */ +#include "librhash/util.h" + +#ifndef _WIN32 +#include /* for timeval */ +/*#else +#include */ +#elif _MSC_VER > 1300 +#include "win32/platform-dependent.h" +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + +/* string function */ +void sprintI64(char *dst, uint64_t number, int max_width); +int int_len(uint64_t num); + +int urlencode(char *dst, const char *name); +int is_binary_string(const char* str); +char* str_tolower(const char* str); +char* str_trim(char* str); +char* str_set(char* buf, int ch, int size); +size_t strlen_utf8_c(const char *str); + +#define IS_DASH_STR(s) ((s)[0] == '-' && (s)[1] == '\0') +#define IS_COMMENT(c) ((c) == ';' || (c) == '#') + +/* file function */ +const char* get_basename(const char* path); +char* get_dirname(const char* path); +char* make_path(const char* dir, const char* filename); + +#ifdef _WIN32 +# define IF_WINDOWS(code) code +# define SYS_PATH_SEPARATOR '\\' +# define IS_PATH_SEPARATOR(c) ((c) == '\\' || (c) == '/') +# define IS_PATH_SEPARATOR_W(c) ((c) == L'\\' || (c) == L'/') +# define rsh_fopen_bin(path, mode) win_fopen_bin(path, mode) +# define is_utf8() win_is_utf8() +# define to_utf8(str) win_to_utf8(str) +#else /* non _WIN32 part */ +# define IF_WINDOWS(code) +# define SYS_PATH_SEPARATOR '/' +# define IS_PATH_SEPARATOR(c) ((c) == '/') +# define rsh_fopen_bin(path, mode) fopen(path, mode) + /* stub for utf8 */ +# define is_utf8() 1 +# define to_utf8(str) NULL +#endif /* _WIN32 */ + +/* rhash stat function */ +#if (__MSVCRT_VERSION__ >= 0x0601) || (_MSC_VER >= 1400) +# define rsh_stat_struct __stat64 +# define rsh_time_struct __time64_t +# define rsh_stat(path, st) win_stat(path, st) +# define clib_wstat(path, st) _wstat64(path, st) +#elif defined(_WIN32) && (defined(__MSVCRT__) || defined(_MSC_VER)) +# define rsh_stat_struct _stati64 +# define rsh_time_struct __time64_t +# define rsh_stat(path, st) win_stat(path, st) +# define clib_wstat(path, st) _wstati64(path, st) +#else +# define rsh_stat_struct stat +# define rsh_time_struct time_t +# define rsh_stat(path, st) stat(path, st) +/* # define clib_wstat(path, st) _wstat32(path, st) */ +#endif + +typedef struct rsh_stat_struct rsh_stat_buf; + +void print_time(FILE *out, time_t time); +unsigned rhash_get_ticks(void); + +void rhash_exit(int code); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* COMMON_FUNC_H */ diff --git a/crc_print.c b/crc_print.c new file mode 100644 index 00000000..4f0d3ddf --- /dev/null +++ b/crc_print.c @@ -0,0 +1,553 @@ +/* crc_print.c - functions to output hash sums using printf-like format */ + +#include "common_func.h" /* should be included before the C library files */ +#include +#include +#include +#include +#include + +#include "librhash/hex.h" +#include "librhash/byte_order.h" +#include "calc_sums.h" +#include "parse_cmdline.h" +#include "crc_print.h" + +/* table with information about hashes */ +print_hash_info hash_info_table[32]; + +/* print_item types */ +enum { + PRINT_ED2K_LINK = 0x100000, + PRINT_FLAG_UPPERCASE = 0x200000, + PRINT_FLAG_RAW = 0x0400000, + PRINT_FLAG_HEX = 0x0800000, + PRINT_FLAG_BASE32 = 0x1000000, + PRINT_FLAG_BASE64 = 0x2000000, + PRINT_FLAG_PAD_WITH_ZERO = 0x4000000, + PRINT_FLAGS_ALL = PRINT_FLAG_UPPERCASE | PRINT_FLAG_PAD_WITH_ZERO | PRINT_FLAG_RAW + | PRINT_FLAG_HEX | PRINT_FLAG_BASE32 | PRINT_FLAG_BASE64, + PRINT_STR = 0x10000000, + PRINT_ZERO, + PRINT_FILEPATH, + PRINT_BASENAME, + PRINT_URLNAME, + PRINT_SIZE, + PRINT_MTIME /*PRINT_ATIME, PRINT_CTIME*/ +}; + +/* internal function used when parsing format string */ +static print_item* parse_percent_item(const char** str); + +/** + * Allocate new print_item. + * + * @param flags the print_item flags + * @param hash_id optional hash_id + * @param data optional string to strore + * @return allocated print_item + */ +static print_item* new_print_item(unsigned flags, unsigned hash_id, const char *data) +{ + print_item* item = (print_item*)rsh_malloc(sizeof(print_item)); + item->flags = flags; + item->hash_id = hash_id; + item->width = 0; + item->data = (data ? rsh_strdup(data) : NULL); + item->next = NULL; + return item; +} + +/** + * Parse an esaped sequence in a printf-like format string. + * + * @param pformat pointer to the sequence, the pointer + * is changed to point to the next symbol after parsed sequence + * @return result character + */ +static char parse_escaped_char(const char **pformat) +{ + const char* start = *pformat; + switch( *((*pformat)++) ) { + case 't': return '\t'; + case 'r': return '\r'; + case 'n': return '\n'; + case '\\': return '\\'; + case 'x': + /* \xNN byte with hexadecimal value NN (1 to 2 digits) */ + if( IS_HEX(**pformat) ) { + int ch; + ch = (**pformat <= '9' ? **pformat & 15 : (**pformat + 9) & 15); + (*pformat) ++; + if(IS_HEX(**pformat)) { + /* read the second digit */ + ch = 16 * ch + (**pformat <= '9' ? **pformat & 15 : (**pformat + 9) & 15); + (*pformat)++; + } + if(ch) return ch; + } + break; + default: + (*pformat)--; + /* \NNN - character with octal value NNN (1 to 3 digits) */ + if('0' < **pformat && **pformat <= '7') { + int ch = *((*pformat)++) - '0'; + if('0' <= **pformat && **pformat <= '7') { + ch = ch * 8 + *((*pformat)++) - '0'; + if('0' <= **pformat && **pformat <= '7') + ch = ch * 8 + *((*pformat)++) - '0'; + } + return (char)ch; + } + } + *pformat = start; + return '\\'; +} + +/** + * Parse format string. + * + * @return a print_item list with parsed information + */ +print_item* parse_print_string(const char* format, unsigned *sum_mask) +{ + char *buf, *p; + print_item *list = NULL, **tail, *item = NULL; + + buf = p = (char*)rsh_malloc( strlen(format) + 1 ); + tail = &list; + *sum_mask = 0; + + for(;;) { + while(*format && *format != '%' && *format != '\\') + *(p++) = *(format++); + + if(*format == '\\') { + if(*(++format) == '0') { + item = new_print_item(PRINT_ZERO, 0, NULL); + format++; + } else { + *p++ = parse_escaped_char(&format); + continue; + } + } else if(*format == '%') { + if( *(++format) == '%' ) { + *(p++) = *format++; + continue; + } else { + item = parse_percent_item(&format); + if(!item) { + *(p++) = '%'; + continue; + } + if(item->hash_id) + *sum_mask |= item->hash_id; + } + } + if(p > buf || (!*format && list == NULL && item == NULL)) { + *p = '\0'; + *tail = new_print_item(PRINT_STR, 0, buf); + tail = &(*tail)->next; + p = buf; + } + if(item) { + *tail = item; + tail = &item->next; + item = NULL; + } + if(!*format) + break; + }; + free(buf); + return list; +} + +/** + * Convert given case-insensitive name to a printf directive id + * + * @param name printf directive name (not a 0-terminamted) + * @param length name length + * @return directive id on success, 0 on fail + */ +static unsigned printf_name_to_id(const char* name, size_t length, unsigned *flags) +{ + char buf[20]; + size_t i; + print_hash_info *info = hash_info_table; + unsigned bit; + + if(length > (sizeof(buf)-1)) return 0; + for(i = 0; i < length; i++) buf[i] = tolower(name[i]); + + /* check for urlname for compatibility */ + if(length == 7 && memcmp(buf, "urlname", 7) == 0) { + *flags = PRINT_URLNAME; + return 0; + } else if(length == 5 && memcmp(buf, "mtime", 5) == 0) { + *flags = PRINT_MTIME; + return 0; + } + + for(bit = 1; bit <= RHASH_ALL_HASHES; bit = bit << 1, info++) { + if(memcmp(buf, info->short_name, length) == 0 && + info->short_name[length] == 0) return bit; + } + return 0; +} + +/** + * Parse a string followed by a percent sign in a printf-like format string. + * + * @return a print_item with parsed information + */ +print_item* parse_percent_item(const char** str) +{ + const char* format = *str; + const char* p = NULL; + unsigned hash_id = 0; + unsigned modifier_flags = 0; + int id_found = 0; + int width = 0; + int pad_with_zero_bit = 0; + print_item* item = NULL; + + static const char *short_hash = "CMHTGWRAE"; + static const char *short_other = "Llpfus"; + static const unsigned hash_ids[] = { + RHASH_CRC32, RHASH_MD5, RHASH_SHA1, RHASH_TTH, RHASH_GOST, + RHASH_WHIRLPOOL, RHASH_RIPEMD160, RHASH_AICH, RHASH_ED2K + }; + static const unsigned other_flags[] = { + PRINT_ED2K_LINK, PRINT_ED2K_LINK, PRINT_FILEPATH, PRINT_BASENAME, + PRINT_URLNAME, PRINT_SIZE + }; + /* detect pad with zero flag */ + if(*format == '0') { + pad_with_zero_bit = PRINT_FLAG_PAD_WITH_ZERO; + format++; + } + + /* parse b,x and u flags */ + if(*format == 'x') { + modifier_flags |= PRINT_FLAG_HEX; + format++; + } else if(*format == 'b') { + modifier_flags |= PRINT_FLAG_BASE32; + format++; + } else if(*format == 'B') { + modifier_flags |= PRINT_FLAG_BASE64; + format++; + } else if(*format == '@') { + modifier_flags |= PRINT_FLAG_RAW; + format++; + } + for(; isdigit(*format); format++) width = 10 * width + (*format - '0'); + + if(*format == '{') { + /* parse %{printf-entity} substring */ + const char* p = ++format; + for(; isalnum(*p) || (*p == '-'); p++); + if(*p == '}') { + hash_id = printf_name_to_id(format, (int)(p - (format)), &modifier_flags); + format--; + if(hash_id || modifier_flags == PRINT_URLNAME || modifier_flags == PRINT_MTIME) { + /* set uppercase flag if the first letter of printf-entity is uppercase */ + modifier_flags |= (format[1] & 0x20 ? 0 : PRINT_FLAG_UPPERCASE); + format = p; + id_found = 1; + } + } else --format; + } + + if(!id_found) { + const char upper = *format & ~0x20; + if( *format == '\0' ) return NULL; + if( (p = strchr(short_hash, upper)) ) { + assert( (p - short_hash) < (int)(sizeof(hash_ids) / sizeof(unsigned)) ); + hash_id = hash_ids[p - short_hash]; + modifier_flags |= (*format & 0x20 ? 0 : PRINT_FLAG_UPPERCASE); + } else if( (p = strchr(short_other, *format)) ) { + assert( (p - short_other) < (int)(sizeof(other_flags) / sizeof(unsigned)) ); + modifier_flags = other_flags[p - short_other]; + + if(modifier_flags == PRINT_ED2K_LINK) { + modifier_flags |= (*p & 0x20 ? 0 : PRINT_FLAG_UPPERCASE); + hash_id = RHASH_ED2K | RHASH_AICH; + } + } else { + return 0; /* no valid print id found */ + } + } + modifier_flags |= pad_with_zero_bit; + + item = new_print_item(modifier_flags, hash_id, NULL); + item->width = width; + *str = ++format; + return item; +} + +/** + * Print EDonkey 2000 url for given file to a stream. + * + * @param out the stream where to print url to + * @param filename the file name + * @param filesize the file size + * @param sums the file hash sums + */ +static void fprint_ed2k_url(FILE* out, struct file_info *info, int print_type) +{ + const char *filename = get_basename(file_info_get_utf8_print_path(info)); + int upper_case = (print_type & PRINT_FLAG_UPPERCASE ? RHPR_UPPERCASE : 0); + int len = urlencode(NULL, filename) + int_len(info->size) + (info->sums.flags&RHASH_AICH ? 84 : 49); + char* buf = (char*)rsh_malloc( len + 1 ); + char* dst = buf; + + assert(info->sums.flags & (RHASH_ED2K|RHASH_AICH)); + assert(info->rctx); + + strcpy(dst, "ed2k://|file|"); + dst += 13; + dst += urlencode(dst, filename); + *dst++ = '|'; + sprintI64(dst, info->size, 0); + dst += strlen(dst); + *dst++ = '|'; + rhash_print(dst, info->rctx, RHASH_ED2K, upper_case); + dst += 32; + if((info->sums.flags & RHASH_AICH) != 0) { + strcpy(dst, "|h="); + rhash_print(dst += 3, info->rctx, RHASH_AICH, RHPR_BASE32 | upper_case); + dst += 32; + } + strcpy(dst, "|/"); + fprintf(out, "%s", buf); + free(buf); +} + +/** + * Output aligned uint64_t number to specified output stream. + * + * @param out the stream to output to + * @param filesize the 64-bit integer to output, usually a file size + * @param width minimal width of integer to output + * @param flag =1 if the integer shall be prepent by zeroes + */ +static void fprintI64(FILE* out, uint64_t filesize, int width, int zero_pad) +{ + char *buf = (char*)rsh_malloc(width > 40 ? width + 1 : 41); + int len = int_len(filesize); + sprintI64(buf, filesize, width); + if(len < width && zero_pad) { + memset(buf, '0', width-len); + } + fprintf(out, "%s", buf); + free(buf); +} + +/** + * Print formated file information to given output stream. + * + * @param out the stream to print information to + * @param list the format according to which information shall be printed + * @param info the file information + */ +void print_line(FILE* out, print_item* list, struct file_info *info) +{ + const char* basename = get_basename(info->print_path), *tmp; + char *url = NULL, *ed2k_url = NULL; + char buffer[130]; + + for(; list; list = list->next) { + int print_type = list->flags & ~(PRINT_FLAGS_ALL); + size_t len; + + /* output a hash function digest */ + if(list->hash_id && print_type != PRINT_ED2K_LINK) { + unsigned hash_id = list->hash_id; + int print_flags = (list->flags & PRINT_FLAG_UPPERCASE ? RHPR_UPPERCASE : 0) + | (list->flags & PRINT_FLAG_RAW ? RHPR_RAW : 0) + | (list->flags & PRINT_FLAG_BASE32 ? RHPR_BASE32 : 0) + | (list->flags & PRINT_FLAG_BASE64 ? RHPR_BASE64 : 0) + | (list->flags & PRINT_FLAG_HEX ? RHPR_HEX : 0); + if((hash_id == RHASH_GOST || hash_id == RHASH_GOST_CRYPTOPRO) && (opt.flags & OPT_GOST_REVERSE)) + print_flags |= RHPR_REVERSE; + + len = rhash_print(buffer, info->rctx, hash_id, print_flags); + assert(len < sizeof(buffer)); + fwrite(buffer, 1, len, out); + continue; + } + + /* output other special items: filepath, urlname e.t.c. */ + switch(print_type) { + case PRINT_STR: + fprintf(out, "%s", list->data); + break; + case PRINT_ZERO: + fprintf(out, "%c", 0); + break; + case PRINT_FILEPATH: + fprintf(out, "%s", info->print_path); + break; + case PRINT_BASENAME: + fprintf(out, "%s", basename); + break; + case PRINT_URLNAME: + if(!url) { + tmp = get_basename(file_info_get_utf8_print_path(info)); + url = (char*)rsh_malloc(urlencode(NULL, tmp) + 1); + urlencode(url, tmp); + } + fprintf(out, "%s", url); + break; + case PRINT_MTIME: + print_time(out, info->stat_buf.st_mtime); + break; + case PRINT_SIZE: + fprintI64(out, info->size, list->width, (list->flags & PRINT_FLAG_PAD_WITH_ZERO)); + break; + case PRINT_ED2K_LINK: + fprint_ed2k_url(out, info, list->flags); + break; + } + } + free(url); + free(ed2k_url); +} + +/** + * Release memory allocated by given print_item list. + * + * @param list the list to free + */ +void free_print_list(print_item* list) +{ + while(list) { + print_item* next = list->next; + free(list); + list = next; + } +} + +/** + * Initialize information about hashes, stored in the + * hash_info_table global variable. + */ +void init_hash_info_table(void) +{ + unsigned index, bit; + unsigned short_opt_mask = RHASH_CRC32 | RHASH_MD5 | RHASH_SHA1 | RHASH_TTH | RHASH_ED2K | + RHASH_AICH | RHASH_WHIRLPOOL | RHASH_RIPEMD160 | RHASH_GOST | OPT_ED2K_LINK; + char* short_opt = "cmhteawrgl"; + print_hash_info *info = hash_info_table; + unsigned fullmask = RHASH_ALL_HASHES | OPT_ED2K_LINK; + + memset(hash_info_table, 0, sizeof(hash_info_table)); + + for(index = 0, bit = 1; bit <= fullmask; index++, bit = bit << 1, info++) { + const char *p; + char *e, *d; + + info->short_char = ((bit & short_opt_mask) != 0 && *short_opt ? + *(short_opt++) : 0); + + info->name = (bit & RHASH_ALL_HASHES ? rhash_get_name(bit) : "ED2K_LINK"); + d = info->short_name; + e = info->short_name + 15; /* buffer overflow protection */ + assert(strlen(info->name) < (size_t)(e-d)); + for(p = info->name; *p && d < e; p++) if(*p != '-' || p[1] >= '9') *(d++) = (*p | 0x20); + *d = 0; + + info->urn = (bit != RHASH_TTH ? info->short_name : "tree:tiger"); + } +} + +/** + * Initialize printf string according to program options. + * The function is called only when a printf format string is not specified + * from command line, so it sould be constructed from other options. + * + * @param out a string buffer to place the resulting format string into. + */ +void init_printf_format(strbuf_t* out) +{ + const char* fmt, *tail = 0; + unsigned bit, index = rhash_ctz(opt.sum_flags); + int uppercase; + char up_flag; + + if(!opt.fmt) { + /* print sfv header for crc32 or if no sums options specified */ + opt.fmt = (opt.sum_flags == RHASH_CRC32 || !opt.sum_flags ? FMT_SFV : FMT_SIMPLE); + } + uppercase = ((opt.flags & OPT_UPPERCASE) || + (!(opt.flags & OPT_LOWERCASE) && (opt.fmt & FMT_SFV))); + up_flag = (uppercase ? ~0x20 : 0xFF); + + rsh_str_ensure_size(out, 1024); /* allocate big enough buffer */ + + if(opt.sum_flags & OPT_ED2K_LINK) { + rsh_str_append_n(out, "%l", 2); + out->str[1] &= up_flag; + return; + } + + if(opt.sum_flags == 0) return; + + if(opt.fmt == FMT_BSD) { + fmt = "\003(%p) = \001\\n"; + } else if(opt.fmt == FMT_MAGNET) { + rsh_str_append(out, "magnet:?xl=%s&dn=%{urlname}"); + fmt = "&xt=urn:\002:\001"; + tail = "\\n"; + } else if(opt.fmt == FMT_SIMPLE && 0 == (opt.sum_flags & (opt.sum_flags - 1))) { + fmt = "\001 %p\\n"; + } else { + rsh_str_append_n(out, "%p", 2); + fmt = (opt.fmt == FMT_SFV ? " \001" : " \001"); + tail = "\\n"; + } + + /* loop by hashes */ + for(bit = 1 << index; bit <= opt.sum_flags; bit = bit << 1, index++) { + int base32 = (opt.fmt == FMT_MAGNET && (bit & (RHASH_SHA1 | RHASH_BTIH))); + const char *p = fmt; + print_hash_info *info = &hash_info_table[index]; + if((bit & opt.sum_flags) == 0) continue; + + rsh_str_ensure_size(out, out->len + 256); /* allocate big enough buffer */ + + for(;;) { + int i; + while(*p >= 0x20) out->str[out->len++] = *(p++); + if(*p == 0) break; + switch((int)*(p++)) { + case 1: + out->str[out->len++] = '%'; + if(base32) out->str[out->len++] = 'b'; + if(info->short_char) out->str[out->len++] = info->short_char & up_flag; + else { + char *letter; + out->str[out->len++] = '{'; + letter = out->str + out->len; + rsh_str_append(out, info->short_name); + *letter &= up_flag; + out->str[out->len++] = '}'; + } + break; + case 2: + rsh_str_append(out, info->urn); + break; + case 3: + rsh_str_append(out, info->name); + i = (int)strlen(info->name); + for(i = (i < 5 ? 6 - i : 1); i > 0; i--) out->str[out->len++] = ' '; + break; + } + } + } + if(tail) { + rsh_str_append(out, tail); + } + out->str[out->len] = '\0'; +} diff --git a/crc_print.h b/crc_print.h new file mode 100644 index 00000000..bc28ddfb --- /dev/null +++ b/crc_print.h @@ -0,0 +1,40 @@ +/* crc_printf.h - functions to print hash sums */ +#ifndef CRC_PRINT_H +#define CRC_PRINT_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct print_item { + struct print_item *next; + unsigned flags; + unsigned hash_id; + unsigned width; + const char *data; +} print_item; + +typedef struct print_hash_info +{ + char short_name[16]; + char short_char; + const char *name; + const char *urn; +} print_hash_info; + +extern print_hash_info hash_info_table[]; + +struct file_info; + +print_item* parse_print_string(const char* format, unsigned *sum_mask); +void print_line(FILE* out, print_item* list, struct file_info *info); +void free_print_list(print_item* list); + +void init_hash_info_table(void); +void init_printf_format(strbuf_t* out); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* CRC_PRINT_H */ diff --git a/crc_update.c b/crc_update.c new file mode 100644 index 00000000..2d33686a --- /dev/null +++ b/crc_update.c @@ -0,0 +1,381 @@ +/* crc_update.c functions to update a crc file */ + +#include "common_func.h" /* should be included before the C library files */ +#include +#include /* for qsort */ +#include +#include +#include +#include + +#include "librhash/timing.h" +#include "win_utils.h" +#include "parse_cmdline.h" +#include "output.h" +#include "rhash_main.h" +#include "file_set.h" +#include "file_mask.h" +#include "calc_sums.h" +#include "crc_update.h" + +/* first define some internal functions, implemented later in this file */ +static int add_new_crc_entries(const char* filepath, file_set *crc_entries); +static int file_set_load_from_crc_file(file_set *set, const char* crc_file_path); +static int fix_sfv_header(const char* crc_filepath); + +/** + * Update given crc file, by adding to it hashes of files from the same + * directory, but which the crc file doesn't contain yet. + * + * @param crc_file_path the path to the crc file + */ +int update_crc_file(const char* crc_file_path) +{ + file_set* crc_entries; + timedelta_t timer; + int res; + + if(opt.flags & OPT_VERBOSE) { + log_msg("Updating: %s\n", crc_file_path); + } + + crc_entries = file_set_new(); + res = file_set_load_from_crc_file(crc_entries, crc_file_path); + + if(opt.flags & OPT_SPEED) rhash_timer_start(&timer); + rhash_data.total_size = 0; + rhash_data.processed = 0; + + if(res == 0) { + /* add the crc file itself to the set of excluded from re-calculation files */ + file_set_add_name(crc_entries, get_basename(crc_file_path)); + file_set_sort(crc_entries); + + /* update crc file with sums of files not present in the crc_entries */ + res = add_new_crc_entries(crc_file_path, crc_entries); + } + file_set_free(crc_entries); + + if(opt.flags & OPT_SPEED && rhash_data.processed > 0) { + double time = rhash_timer_stop(&timer); + print_time_stats(time, rhash_data.total_size, 1); + } + + return res; +} + +/** + * Load a set of files from given crc file. + * + * @param set the file set to store loaded files + * @param crc_file_path the crc file to load + */ +static int file_set_load_from_crc_file(file_set *set, const char* crc_file_path) +{ + FILE *fd; + int line_num; + char buf[2048]; + if( !(fd = rsh_fopen_bin(crc_file_path, "rb") )) { + /* if file not exist, it will be created */ + return (errno == ENOENT ? 0 : -1); + } + for(line_num = 0; fgets(buf, 2048, fd); line_num++) { + char* line = buf; + const char *filepath; + file_item *item; + + /* skip unicode BOM */ + if(line_num == 0 && buf[0] == (char)0xEF && buf[1] == (char)0xBB && buf[2] == (char)0xBF) line += 3; + + if(*line == 0) continue; /* skip empty lines */ + + if(is_binary_string(line)) { + log_msg("error: skipping binary file %s\n", crc_file_path); + fclose(fd); + return -1; + } + + if(IS_COMMENT(*line) || *line == '\r' || *line == '\n') continue; + + item = file_item_new(NULL); + parse_crc_file_line(line, &filepath, &item->sums, !feof(fd)); + + /* store file info to the file set */ + if(filepath) { + file_item_set_filepath(item, filepath); + file_set_add(set, item); + } else { + log_msg("warning: can't parse line: %s\n", buf); + file_item_free(item); + } + } + fclose(fd); + return 0; +} + +/** + * Add hash sums of files from given file-set to a specified hash-file. + * A specified directory path will be prepended to the path of added files, + * if it is not a current directory. + * + * @param crc_file_path the hash file to add the sums to + * @param dir_path the directory path to prepend + * @param files_to_add the set of files to hash and add + * @return 0 on success, -1 on error + */ +static int add_sums_to_file(const char* crc_file_path, char* dir_path, file_set *files_to_add) +{ + struct rsh_stat_struct st; + FILE* fd; + unsigned i; + int ch; + + /* sfv banner will be printed only in sfv mode and only for empty crc files */ + int print_banner = (opt.fmt == FMT_SFV); + st.st_size = 0; + if(rsh_stat(crc_file_path, &st) == 0) { + if(print_banner && st.st_size > 0) print_banner = 0; + } + + /* open crc_file_path for writing */ + if( !(fd = fopen(crc_file_path, "r+") )) { + log_file_error(crc_file_path); + return -1; + } + rhash_data.upd_fd = fd; + + if(st.st_size > 0) { + /* read the last character of the file to check if it is EOL */ + if(fseek(fd, -1, SEEK_END) != 0) { + log_file_error(crc_file_path); + return -1; + } + ch = fgetc(fd); + + /* somehow writing doesn't work without seeking */ + if(fseek(fd, 0, SEEK_END) != 0) { + log_file_error(crc_file_path); + return -1; + } + + /* write EOL if it wasn't present */ + if(ch != '\n' && ch != '\r') { + /* fputc('\n', fd); */ + fprintf(fd, "\n"); + } + } + + /* append hash sums to the updated crc file */ + for(i = 0; i < files_to_add->size; i++, rhash_data.processed++) { + char *allocated = 0; + char *fullpath = file_set_get(files_to_add, i)->filepath; + if(dir_path[0] != '.' || dir_path[1] != 0) { + /* prepend the file path by directory path */ + fullpath = allocated = make_path(dir_path, fullpath); + } + if(opt.fmt == FMT_SFV) { + if(print_banner) { + print_sfv_banner(fd); + print_banner = 0; + } + } + + /* print hash sums to the crc file */ + calculate_and_print_sums(fd, file_set_get(files_to_add, i)->filepath, fullpath, NULL); + + free(allocated); + } + fclose(fd); + log_msg("Updated: %s\n", crc_file_path); + return 0; +} + +/** + * Compare two file items by filepath. + * + * @param rec1 pointer to the first file_item structure + * @param rec2 pointer to the second file_item structure + * @return 0 if files have the same filepath, and -1 or 1 (strcmp result) if not + */ +static int name_compare(const void *rec1, const void *rec2) +{ + return strcmp((*(file_item *const *)rec1)->filepath, (*(file_item *const *)rec2)->filepath); +} + +/** + * Sort files in the specified fileset by file path. + * + * @param set the file-set to sort + */ +static void sort_file_set_by_path(file_set *set) +{ + qsort(set->array, set->size, sizeof(file_item*), name_compare); +} + +/** + * Read a directory and load files not present in the crc_entries file-set + * into the files_to_add file-set. + * + * @param dir_path the path of the directory to load files from + * @param crc_entries file-set of files which should be skept + * @param files_to_add file-set to load the list of files into + * @return 0 on success, -1 on error (and errno is set) + */ +static int load_filtered_dir(const char* dir_path, file_set *crc_entries, file_set *files_to_add) +{ + DIR *dp; + struct dirent *de; + struct rsh_stat_struct st; + + /* read directory */ + dp = opendir(dir_path); + if(!dp) return -1; + + while((de = readdir(dp)) != NULL) { + char *path; + file_item* item; + int res; + + /* skip "." and ".." directories */ + if(de->d_name[0] == '.' && (de->d_name[1] == 0 || + (de->d_name[1] == '.' && de->d_name[2] == 0))) { + continue; + } + + /* retrive stat info of the given file */ + path = make_path(dir_path, de->d_name); + res = rsh_stat(path, &st); + free(path); + + /* skip unstat-able files and directories + * as well as files not accepted by current file filter + * and files already present in the crc_entries file set */ + if( res < 0 || S_ISDIR(st.st_mode) || + !file_mask_match(opt.files_accept, de->d_name) || + file_set_find(crc_entries, de->d_name) ) { + continue; + } + + item = file_item_new(de->d_name); + file_set_add(files_to_add, item); + } + return 0; +} + +/** + * Calculate and add to the given hash-file the hash-sums for all files + * from the same diriector as the hash-file, but absent from given + * crc_entries file-set. + * + *

If SFV format was specified by a command line switch, the after adding + * hash sums SFV header of the file is fixed by moving all lines starting + * with a semicolon before other lines. So an SFV-formated hash-file + * will remain correct. + * + * @param crc_file_path the hash-file to add sums into + * @param crc_entries file-set of files to omit from adding + * @return 0 on success, -1 on error + */ +static int add_new_crc_entries(const char* crc_file_path, file_set *crc_entries) +{ + file_set* files_to_add; + char* dir_path; + int res = 0; + + dir_path = get_dirname(crc_file_path); + files_to_add = file_set_new(); + + /* load into files_to_add files from directory not present in the crc_entries */ + load_filtered_dir(dir_path, crc_entries, files_to_add); + + if(files_to_add->size > 0) { + /* sort files by path */ + sort_file_set_by_path(files_to_add); + + /* calculate and write crc sums to the file */ + res = add_sums_to_file(crc_file_path, dir_path, files_to_add); + + if(res == 0 && opt.fmt == FMT_SFV) { + /* move sfv header from the end of updated file to its head */ + res = fix_sfv_header(crc_file_path); + } + } + + file_set_free(files_to_add); + free(dir_path); + return res; +} + +/** + * Move all SFV header lines (i.e. all lines starting with a semicolon) + * from the end of updated file to its head. + */ +static int fix_sfv_header(const char* crc_file_path) +{ + FILE* in; + FILE* out; + char line[2048]; + size_t len; + char* tmp_file; + int err = 0; + + if( !(in = fopen(crc_file_path, "r") )) { + log_file_error(crc_file_path); + return -1; + } + + /* open another file for writing */ + len = strlen(crc_file_path); + tmp_file = (char*)rsh_malloc(len+8); + memcpy(tmp_file, crc_file_path, len); + strcpy(tmp_file+len, ".new"); + + /* open the temporary file */ + if( !(out = fopen(tmp_file, "w") )) { + log_file_error(tmp_file); + free(tmp_file); + fclose(in); + return -1; + } + + /* The first, output all commented lines to the file header */ + while(fgets(line, 2048, in)) { + if(*line == ';') { + if(fputs(line, out) < 0) break; + } + } + if(!ferror(out) && !ferror(in)) { + fseek(in, 0, SEEK_SET); + /* The second, output non-commented lines */ + while(fgets(line, 2048, in)) { + if(*line != ';') { + if(fputs(line, out) < 0) break; + } + } + } + if(ferror(in)) { + log_file_error(crc_file_path); + err = 1; + } + if(ferror(out)) { + log_file_error(tmp_file); + err = 1; + } + + fclose(in); + fclose(out); + + /* overwrite crc file with a new one */ + if( !err ) { +#ifdef _WIN32 + /* under win32 crc_file must be removed before overwriting it */ + unlink(crc_file_path); +#endif + if(rename(tmp_file, crc_file_path) < 0) { + fprintf(rhash_data.log, PROGRAM_NAME ": can't move %s to %s: %s\n", tmp_file, crc_file_path, strerror(errno)); + err = 1; + } + } + free(tmp_file); + return (err ? -1 : 0); +} diff --git a/crc_update.h b/crc_update.h new file mode 100644 index 00000000..d8809168 --- /dev/null +++ b/crc_update.h @@ -0,0 +1,15 @@ +/* crc-update.h - functions to update a crc file */ +#ifndef CRC_UPDATE_H +#define CRC_UPDATE_H + +#ifdef __cplusplus +extern "C" { +#endif + +int update_crc_file(const char* filepath); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* CRC_UPDATE_H */ diff --git a/dist/MD5.bat b/dist/MD5.bat new file mode 100644 index 00000000..88523e6a --- /dev/null +++ b/dist/MD5.bat @@ -0,0 +1,2 @@ +@REM generate md5 file +@rhash.exe --md5 %1 %2 %3 %4 %5 %6 %7 %8 %9 diff --git a/dist/magnet.bat b/dist/magnet.bat new file mode 100644 index 00000000..2595c72c --- /dev/null +++ b/dist/magnet.bat @@ -0,0 +1,2 @@ +@REM generate magnet links +@rhash.exe --magnet %1 %2 %3 %4 %5 %6 %7 %8 %9 diff --git a/dist/rhashrc.sample b/dist/rhashrc.sample new file mode 100644 index 00000000..b2053e92 --- /dev/null +++ b/dist/rhashrc.sample @@ -0,0 +1,25 @@ +; The RHash config file config + +; crc32 is the default hash sum +#crc32=on +#md5=on +#sha1=on + +; use windows code page (useful only under windows) +#ansi=on + +; ignore case in filenames, when verifying crc files +#ignore-case=on + +; directories are not scanned recursively by default +#recursive=on + +; OK messages are printed by default +#skip-ok=on + +; accept only *.sfv files while recursively checking directories +#crc-accept=.sfv +#crc-accept=.sfv,.md5,.sha1,.tiger,.tth,.aich + +; percents are switched off by default +#percents=on diff --git a/file_mask.c b/file_mask.c new file mode 100644 index 00000000..d90fd470 --- /dev/null +++ b/file_mask.c @@ -0,0 +1,84 @@ +/* file_mask.c */ +#include +#include +#include +#include + +#include "common_func.h" +#include "file_mask.h" + +/** + * Convert a string to a lower-case and put it into array of file-masks. + * + * @param arr array of file masks + * @param mask a string to add + */ +static void file_mask_add(file_mask_array* arr, const char* mask) +{ + rsh_vector_add_ptr(arr, str_tolower(mask)); +} + +/** + * Construct array from a comma-separated list of strings. + * + * @param comma_separated_list the comma-separated list of strings + * @return constructed array + */ +file_mask_array* file_mask_new_from_list(const char* comma_separated_list) +{ + file_mask_array* array = file_mask_new(); + file_mask_add_list(array, comma_separated_list); + return array; +} + +/** + * Parse string consisting of comma-delimited list of elements an + * add them to array. + * + * @param array the array to put parsed elements to + * @param comma_separated_list tre string to parse + */ +void file_mask_add_list(file_mask_array* array, const char* comma_separated_list) +{ + char *buf, *cur, *next; + if(!comma_separated_list || !*comma_separated_list) { + return; + } + buf = rsh_strdup(comma_separated_list); + for(cur = buf; cur && *cur; cur = next) { + next = strchr(cur, ','); + if(next) *(next++) = '\0'; + if(*cur != '\0') file_mask_add(array, cur); + } + free(buf); +} + +/** + * Match a given name against a list of string trailers. + * Usually used to match a filename against list of file extensions. + * + * @param arr the array of string trailers + * @param name the name to match + */ +int file_mask_match(file_mask_array* arr, const char* name) +{ + unsigned i; + int res = 0; + size_t len, namelen; + char* buf; + /* all names should match against an empty array */ + if(!arr || !arr->size) return 1; + + /* get a lowercase name version to ignore case when matching */ + buf = str_tolower(name); + namelen = strlen(buf); + for(i = 0; isize; i++) { + len = strlen((char*)arr->array[i]); + if(namelen >= len && memcmp(buf + namelen - len, arr->array[i], len) == 0) { + res = 1; /* matched */ + break; + } + } + free(buf); + return res; +} diff --git a/file_mask.h b/file_mask.h new file mode 100644 index 00000000..b35fe141 --- /dev/null +++ b/file_mask.h @@ -0,0 +1,25 @@ +/* file_mask.h */ +#ifndef FILE_MASK_H +#define FILE_MASK_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "common_func.h" + +/* an array to store rules for file acceptance */ +typedef struct vector_t file_mask_array; + +#define file_mask_new rsh_vector_new_simple +#define file_mask_free(array) rsh_vector_free(array); + +file_mask_array* file_mask_new_from_list(const char* comma_separated_list); +void file_mask_add_list(file_mask_array*, const char* comma_separated_list); +int file_mask_match(file_mask_array*, const char* name); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* FILE_MASK_H */ diff --git a/file_set.c b/file_set.c new file mode 100644 index 00000000..f0bf4411 --- /dev/null +++ b/file_set.c @@ -0,0 +1,638 @@ +/* file_set.c */ +#include /* qsort */ +#include /* fopen */ +#include /* ptrdiff_t */ +#include +#include /* isspace */ +#include + +#include "librhash/hex.h" +#include "librhash/crc32.h" +#include "common_func.h" +#include "crc_print.h" +#include "parse_cmdline.h" +#include "rhash_main.h" +#include "output.h" +#include "file_set.h" + +/** + * Allocate a file_item structure and initialize it with a filepath. + * + * @param filepath a filepath to initialize the file_item + * @return allocated file_item structure + */ +file_item* file_item_new(const char* filepath) +{ + file_item *item = (file_item*)rsh_malloc(sizeof(file_item)); + memset(item, 0, sizeof(file_item)); + + if(filepath) { + if(!file_item_set_filepath(item, filepath)) { + free(item); + return NULL; + } + } + return item; +} + +/** + * Free memory allocated by file_item. + * + * @param item the item to delete + */ +void file_item_free(file_item *item) +{ + if(item->search_filepath != item->filepath) { + free(item->search_filepath); + } + free(item->filepath); + free(item); +} + +/** + * Set file path of the given item. + * + * @param item pointer to the item to change + * @param filepath the file path to set + */ +int file_item_set_filepath(file_item* item, const char* filepath) +{ + if(item->search_filepath != item->filepath) + free(item->search_filepath); + free(item->filepath); + item->filepath = rsh_strdup(filepath); + if(!item->filepath) return 0; + + /* apply str_tolower if CASE_INSENSITIVE */ + /* Note: strcasecmp() is not used instead of search_filepath due to portability issue */ + /* Note: item->search_filepath is always correctly freed by file_item_free() */ + item->search_filepath = (opt.flags&OPT_IGNORE_CASE ? str_tolower(item->filepath) : item->filepath); + item->hash = rhash_get_crc32_str(0, item->search_filepath); + return 1; +} + +/** + * Call-back function to compare two file items by search_filepath, using hashes + * + * @param pp_rec1 the first item to compare + * @param pp_rec2 the second item to compare + * @return 0 if items are equal, -1 if pp_rec1 < pp_rec2, 1 otherwise + */ +static int crc_pp_rec_compare(const void *pp_rec1, const void *pp_rec2) +{ + const file_item *rec1 = *(file_item *const *)pp_rec1, *rec2 = *(file_item *const *)pp_rec2; + if(rec1->hash != rec2->hash) return (rec1->hash < rec2->hash ? -1 : 1); + return strcmp(rec1->search_filepath, rec2->search_filepath); +} + +/** + * Sort given file_set using hashes of search_filepath for fast binary search. + * + * @param set the file_set to sort + */ +void file_set_sort(file_set *set) +{ + if(set->array) qsort(set->array, set->size, sizeof(file_item*), crc_pp_rec_compare); +} + +/** + * Create and add a file_item with given filepath to given file_set + * + * @param set the file_set to add the item to + * @param filepath the item file path + */ +void file_set_add_name(file_set *set, const char* filepath) +{ + file_item* item = file_item_new(filepath); + if(item) file_set_add(set, item); +} + +/** + * Find given file path in the file_set + * + * @param set the file_set to search + * @param filepath the file path to search for + * @return the found file_item or NULL if it was not found + */ +file_item* file_set_find(file_set *set, const char* filepath) +{ + int a, b, c; + unsigned hash; + char* search_filepath; + + if(!set->size) return NULL; + /*assert(set->array);*/ + + /* apply str_tolower if case shall be ignored */ + search_filepath = + ( opt.flags&OPT_IGNORE_CASE ? str_tolower(filepath) : (char*)filepath ); + + /* generate hash to speedup the search */ + hash = rhash_get_crc32_str(0, search_filepath); + + /* fast binary search */ + for(a = -1, b = set->size; (a + 1) < b;) { + file_item *item; + int cmp; + + c = (a + b) / 2; + /*assert(0 <= c && c < (int)set->size);*/ + + item = (file_item*)set->array[c]; + if(hash != item->hash) { + cmp = (hash < item->hash ? -1 : 1); + } else { + cmp = strcmp(search_filepath, item->search_filepath); + if(cmp == 0) { + if(search_filepath != filepath) free(search_filepath); + return item; /* file was found */ + } + } + if(cmp < 0) b = c; + else a = c; + } + if(search_filepath != filepath) free(search_filepath); + return NULL; +} + +/* bit flags to denote type of hexadecimal/base32 hash string */ +#define F_HEX 1 +#define F_BASE32 2 + +/** + * Test if a character is a hexadecimal/base32 digit. + * + * @param c the character to test + * @return result of the test, a comination of flags F_HEX and F_BASE32 + */ +static int test_hash_char(char c) +{ + return (IS_HEX(c) ? F_HEX : 0) | (IS_BASE32(c) ? F_BASE32 : 0); +} + +/** + * Detect if given string contains a hexadecimal or base32 hash. + * + * @param ptr the pointer to start scanning from + * @param end pointer to scan to + * @param p_len pointer to a number to store length of detected hash string + * @return type of detected hash as combination of F_HEX and F_BASE32 flags + */ +static int get_hash_type(char **ptr, char *end, int *p_len) +{ + int len = 0; + int char_type = 0, next_type = (F_HEX | F_BASE32); + + if(*ptr < end) { + /* search forward (but no more then 129 symbols) */ + if((end - *ptr) >= 129) end = *ptr + 129; + for(; (next_type &= test_hash_char(**ptr)) && *ptr <= end; len++, (*ptr)++) { + char_type = next_type; + } + } else { + /* search backward (but no more then 129 symbols) */ + if((*ptr-end) >= 129) end = *ptr - 129; + for(; (next_type &= test_hash_char(**ptr)) && *ptr >= end; len++, (*ptr)--) { + char_type = next_type; + } + } + *p_len = len; + return char_type; +} + +/** + * Test that the given string contain a hexadecimal or base32 hash string + * of one of supported hash sums. + * + * @param ptr the pointer to start scanning from + * @param end pointer to scan to + * @param p_len pointer to a number to store length of detected hash string + * @return possible type of detected hash as algorithm RHASH id + */ +static int test_hash_string(char **ptr, char *end, int *p_len) +{ + int len = 0; + int hash_type = 0; + int char_type = get_hash_type(ptr, end, &len); + + + if(len == 32 && char_type) { + hash_type = (char_type == F_BASE32 ? RHASH_AICH : + char_type == F_HEX ? RHASH_MD5_ED2K_MIXED_UP : RHASH_MD5_AICH_MIXED_UP | RHASH_MD5); + } else if((char_type & F_BASE32) != 0 && len == 39) { + hash_type = RHASH_TTH; + } else if((char_type & F_HEX) != 0) { + hash_type = (len == 8 ? RHASH_CRC32 : len == 32 ? RHASH_MD5_ED2K_MIXED_UP : + len == 40 ? RHASH_SHA1 : len == 48 ? RHASH_TIGER : len == 56 ? RHASH_SHA224 : + len == 64 ? RHASH_GOST : len == 96 ? RHASH_SHA384 : len == 128 ? RHASH_WHIRLPOOL : 0); + } + + if(hash_type != 0) *p_len = len; + return hash_type; +} + +/** + * Store a sum into sums structure (its type is guessed by sum length). + * + * @param sums the structure to store hash sum to + * @param str hexadecimal string representation of the sum + * @param len length of the sum + */ +static void put_hash_sum(struct rhash_sums_t* sums, const char* str, unsigned hash_type, size_t length) +{ + unsigned char *psum = 0; + unsigned sum_id = hash_type; + + /* if hash_type contains single bit */ + if(0 == (hash_type & (hash_type - 1)) && (hash_type & RHASH_ALL_HASHES)) { + psum = rhash_get_digest_ptr(sums, hash_type); + } + if(!psum) { + if(hash_type == RHASH_TTH || (hash_type & RHASH_AICH) != 0) { + psum = (hash_type == RHASH_TTH ? sums->tth_digest : sums->aich_digest); + if(hash_type != RHASH_TTH) sum_id = RHASH_AICH; + } + else if( hash_type & (RHASH_MD5 | RHASH_ED2K | RHASH_IS_MIXED) ) { + if( (hash_type & RHASH_IS_MIXED) != 0 ) { + hash_type |= ((sums->flags&RHASH_MD5) == 0 ? RHASH_MD5 : RHASH_ED2K); + } + sum_id = RHASH_MD5; + psum = (hash_type&RHASH_MD5 ? sums->md5_digest : hash_type&RHASH_ED2K ? sums->ed2k_digest : 0); + } + } + + if(psum) { + int len = rhash_get_digest_size(sum_id) * 2; + assert(len >= 0); + + /* parse hexadecimal or base32 formated hash sum */ + if( (int)length == len ) { + rhash_hex_to_byte(str, psum, (int)length); + } else { + rhash_base32_to_byte(str, psum, (int)length); + } + } + + /* note: no checking done for repeated sums (like ) */ + sums->flags |= hash_type; +} + +#ifndef _WIN32 +/** + * Convert a windows file path to a unix one, replacing backslashes + * by shlashes. + * + * @param path the path to convert + * @return converted path + */ +static void process_backslashes(char* path) +{ + for(;*path;path++) { + if(*path == '\\') *path = '/'; + } +} +#else /* _WIN32 */ +#define process_backslashes(path) +#endif /* _WIN32 */ + +/** + * Try to parse a bsd-formated line. + * (md5|sha1|crc32|tiger|tth|whirlpool|ed2k|aich) \( \) = ... + * + * @param line the line to parse + * @param sum the rhash_sums_t structure to store parsed hash sums + * @return 1 on success, 0 otherwise + */ +static int parse_bsd_format(char* line, const char** filename, struct rhash_sums_t* sums) +{ + /* NOTE: Starting and trailing spaces must be already removed from the line. */ + char* e; + char* sum_ptr; + const char* func_name; + int sum_flag, parsed_length; + size_t len; + unsigned hash_type; + + sum_flag = 0; + len = strlen(line); + switch(tolower(*line)) { + case 'a': + sum_flag = RHASH_AICH; + break; + case 'b': + sum_flag = RHASH_BTIH; + break; + case 'c': + sum_flag = RHASH_CRC32; + break; + case 'm': + if(len >=3 && line[2] == '5') { + sum_flag = RHASH_MD5; + } else { + sum_flag = RHASH_MD4; + } + break; + case 's': + if(tolower(line[1]) == 'h') { + if(len >= 4 && line[3] == '1') sum_flag = RHASH_SHA1; + else if(len >= 7) { + sum_flag = (line[5] == '2' ? RHASH_SHA224 : line[5] == '5' ? RHASH_SHA256 : + line[5] == '8' ? RHASH_SHA384 : RHASH_SHA512); + } + } else if(len >= 10) { + sum_flag = (line[7] == '1' ? RHASH_SNEFRU128 : RHASH_SNEFRU256); + } + break; + case 't': + if(tolower(line[1]) == 'i') { + sum_flag = RHASH_TIGER; + } else { + sum_flag = RHASH_TTH; + } + break; + case 'w': + sum_flag = RHASH_WHIRLPOOL; + break; + case 'r': + sum_flag = RHASH_RIPEMD160; + break; + case 'h': + sum_flag = RHASH_HAS160; + break; + case 'g': + if(len >= 14 && line[4] == '-') { + sum_flag = RHASH_GOST_CRYPTOPRO; + } else { + sum_flag = RHASH_GOST; + } + break; + case 'e': + sum_flag = (len < 9 || line[2] == '2' ? RHASH_ED2K : + line[6] == '2' ? RHASH_EDONR256 : RHASH_EDONR512); + break; + } + if(sum_flag == 0) return 0; + + func_name = rhash_get_name(sum_flag); + len = rhash_get_hash_length(sum_flag); + assert(func_name != 0 && len > 0); + + for(; *func_name; line++, func_name++) + if(toupper(*line) != *func_name) return 0; + + /* skip whitespaces */ + while(isspace(*line)) line++; + + /* check for '(' */ + if(*(line++) != '(') return 0; + + /* skip whitespaces */ + while(isspace(*line)) line++; + + e = line + strlen(line)-1; + sum_ptr = e-len+1; + if(sum_ptr <= line) return 0; + + /* check for hash sum */ + hash_type = test_hash_string(&e, sum_ptr-1, &parsed_length); + if(hash_type == 0 || (int)len != parsed_length) return -1; + + /* skip whitespaces */ + while(isspace(*e)) e--; + + /* check for '=' from the end */ + if(*(e--) != '=') return -1; + + /* skip whitespaces */ + while(isspace(*e)) e--; + + /* check for ')' from the end */ + if(*(e--) != ')') return -1; + + /* set the filename and terminate it with '\0' */ + *filename = line; + e[1] = '\0'; + process_backslashes(line); + + /* store parsed hash sum */ + put_hash_sum(sums, sum_ptr, sum_flag, len); + return 1; +} + +/** + * Try to parse given line as a magnet-link. + * + * @param line the magnet link to parse + * @param sum the rhash_sums_t structure to store parsed hash sums + * @return 1 on success, 0 otherwise + */ +static int parse_magnet_link(char* line, const char** filename, struct rhash_sums_t* sums) +{ + const char* prefix[] = { "xl=", "dn=", "xt=urn:" }; + char* ptr = line; + char* filename_end = 0; + + if(strncmp(ptr, "magnet:?", 8) != 0) { + return 0; + } + ptr += 8; + + /* parse urn substrings */ + while(*ptr) { + int index, hash_id; + size_t length; + char* param; + + for(index = 0; index < 3; index++) { + length = strlen(prefix[index]); + if(strncmp(ptr, prefix[index], length) == 0) { + ptr += length; + break; + } + } + if(index >= 3) { + continue; /* skip unknown type of parameter */ + } + + param = ptr; + + /* switch to the next parameter */ + for(; *ptr && *ptr != '&'; ptr++); + length = (ptr - param); + if(*ptr == '&') ptr++; + + if(index == 1) { + *filename = param; + filename_end = param + length; /* don't modify buffer until it is correctly parsed */ + } + + /* note: file size (xl=...) is not verified */ + if(index <= 1) continue; + assert(index == 2); + + /* detect parameter sum */ + for(index = 0; index < RHASH_HASH_COUNT; index++) { + const char* urn = hash_info_table[index].urn; + size_t len = strlen(urn); + if(strncmp(param, urn, len) == 0 && param[len] == ':') { + param += len + 1; + length -= len + 1; + break; + } + } + if(index >= RHASH_HASH_COUNT) { + if(opt.flags & OPT_VERBOSE) { + log_msg("warning: unknown hash in magnet link: %s\n", param); + } + continue; + } + hash_id = 1 << index; + put_hash_sum(sums, param, hash_id, (unsigned)length); + } + if(filename_end) *filename_end = 0; + return (filename_end ? 1 : 0); +} + +/** + * Try to given ed2k-link. + * + * @param line the ed2k link to parse + * @param sum the rhash_sums_t structure to store parsed hash sums + * @return 1 on success, 0 otherwise + */ +static int parse_ed2k_link(char* line, const char** filename, struct rhash_sums_t* sums) +{ + char* ptr = line; + char* filename_end = 0; + int index; + + if(strncmp(ptr, "ed2k://|file|", 13) != 0) { + return 0; + } + ptr += 13; + + /* parse urn substrings */ + for(index = 0; *ptr && *ptr != '/' && index <= 3; index++) { + int hash_id = 0, char_type; + int hlen; + ptrdiff_t length; + char *param, *end; + + if(index == 3 && ptr[0] == 'h' && ptr[1] == '=') ptr += 2; + end = param = ptr; + + /* switch to the next parameter */ + for(; *ptr && *ptr != '|'; ptr++); + length = (ptr - param); + if(*ptr != '|') break; + ptr++; + + + if(index == 0) { + *filename = param; + filename_end = param + length; /* don't modify buffer until it is correctly parsed */ + } + + /* note: file size is not parsed or verified */ + if(index <= 1) continue; + + char_type = (get_hash_type(&end, ptr, &hlen) & (index == 2 ? F_HEX : F_BASE32)); + if(length != 32 || !char_type) { + if(opt.flags & OPT_VERBOSE) { + log_msg("warning: can't parse hash in ed2k link: %s\n", param); + } + return 0; + } + + hash_id = (index == 2 ? RHASH_ED2K : RHASH_AICH); + put_hash_sum(sums, param, hash_id, (unsigned)length); + } + if((*ptr != '/' && *ptr != '\0') || !filename_end) return 0; + *filename_end = 0; + return 1; +} + +/** + * Parse a line of a generic crc file to extract crc sums and filename. + * + * @param line a modifiable buffer containing the line to parse + * @param filename pointer to recive parsed filename + * @param sums buffer to recieve parsed sums + * @param check_eol true if check for trailing '\n' required + * @return 0 on success, negative value on error + */ +int parse_crc_file_line(char* line, const char** filename, struct rhash_sums_t* sums, int check_eol) +{ + int count; + int res; + char* p = line; + char* e = line + strlen(line) - 1; + *filename = NULL; + + if(is_binary_string(line)) { + return -2; + } + /* return if EOL not found at the end of the line */ + if( (*e != '\n' && check_eol) || e <= line) return -1; + + /* note: it's simpler then to use str_tim, cause 'e' is used below */ + while(isspace(*e) && e > line) *(e--) = 0; /* remove trailing white spaces */ + while(isspace(*p)) p++; /* skip white spaces at the start of the line */ + + /* try to parse BSD formated line or magnet/ed2k links */ + if( (res = parse_bsd_format(p, filename, sums)) || + (res = parse_magnet_link(p, filename, sums)) || + (res = parse_ed2k_link(p, filename, sums)) ) { + return (res > 0 ? 0 : -1); + } + + /* parse lines with filename preceding hash sums */ + for(count = 0; e > p;) { + int len; + + /* search for hash sum from the end of the line */ + unsigned hash_type = test_hash_string(&e, p, &len); + int stop = (!hash_type || e <= p || !isspace(*e)); + + if(hash_type && (!stop || count == 0)) { + put_hash_sum(sums, e + 1, hash_type, len); + count++; + } + if(stop) break; + + /* skip hash sum and preceding white spaces */ + while(isspace(*e) && e > p) *(e--) = 0; + + *filename = line; + } + + if(count > 0) { + if(*filename) { + process_backslashes((char*)*filename); + } + return 0; + } + + /* parse lines with hash sums preceding a filename */ + e = p + strlen(p) - 1; + for(count = 0; p < e; count++) { + int len; + + /* search for hash sum */ + unsigned hash_type = test_hash_string(&p, e, &len); + + if(!hash_type || p >= e || !isspace(*p)) break; + put_hash_sum(sums, p - len, hash_type, len); + + /* skip processed hash sum and following white spaces */ + while(isspace(*p) && p < e) p++; + + /* remove preceding star '*' from filename */ + if(p && *p == '*') p++; + *filename = p; + } + if(*filename) { + process_backslashes((char*)*filename); + } + return 0; +} diff --git a/file_set.h b/file_set.h new file mode 100644 index 00000000..26125224 --- /dev/null +++ b/file_set.h @@ -0,0 +1,41 @@ +/* file_set.h - functions to manipulate a set of files with their hash sums */ +#ifndef FILE_SET_H +#define FILE_SET_H + +#include "calc_sums.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct file_item { + unsigned hash; + char* filepath; + char* search_filepath; /* for case-insensitive comparision */ + struct rhash_sums_t sums; +} file_item; + +/* array to store parsed crc sums */ +struct vector_t; +typedef struct vector_t file_set; + +int parse_crc_file_line(char* line, const char** filename, struct rhash_sums_t* sums, int check_eol); + +file_item* file_item_new(const char* filepath); +void file_item_free(file_item *item); +int file_item_set_filepath(file_item* item, const char* filepath); + +#define file_set_new() rsh_vector_new((void(*)(void*))file_item_free) /* allocate new file set */ +#define file_set_free(set) rsh_vector_free(set); /* free memory */ +#define file_set_get(set, index) ((file_item*)((set)->array[i])) /* get i-th element */ +#define file_set_add(set, item) rsh_vector_add_ptr(set, item) /* add a file_item to file_set */ + +void file_set_add_name(file_set *set, const char* filename); +void file_set_sort(file_set *set); +file_item* file_set_find(file_set *set, const char* filename); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* FILE_SET_H */ diff --git a/find_file.c b/find_file.c new file mode 100644 index 00000000..7cc339bd --- /dev/null +++ b/find_file.c @@ -0,0 +1,263 @@ +/* find_file.c + * + * find_file function for searching through directory trees doing work + * on each file found similar to the Unix find command. + */ + +#include "common_func.h" /* should be included before the C library files */ +#include +#include +#include +#include +#include /* ino_t */ +#include /* opendir/readdir */ +#include +#include +#include + +#include "win_utils.h" +#include "find_file.h" + +#if !defined(_WIN32) && (defined(_BSD_SOURCE) || _XOPEN_SOURCE >= 500) +#define USE_LSTAT_FOR_SYMLINKS +#endif + +/* from perl manual File::Find + * These are functions for searching through directory trees doing work on + * each file found, similarly to the Unix find command. + * File::Find exports two functions, "find" and "finddepth". + * They work similarly but have subtle differences. + * 1. find() does a breadth-first search over the given @directories in the order they are given. + * In essence, it works from the top down. + * 2. finddepth() works just like find() except it does a depth-first search. + * It works from the bottom of the directory tree up. */ + +typedef struct dir_entry +{ + struct dir_entry *next; + char* filename; + /*unsigned short level; nesting level */ + unsigned type; /* dir,link, e.t.c. */ + /*bool operator < (struct dir_entry &right) { return (name < right.name); }*/ +} dir_entry; + +/** + * Allocate and intialize a dir_entry. + * + * @param next next dir_entry in list + * @param filename a filename to store in the dir_entry + * @param type type of dir_entry + * @return allocated dir_entry + */ +static dir_entry* dir_entry_new(dir_entry *next, char* filename, unsigned type) +{ + dir_entry* e = (dir_entry*)malloc(sizeof(dir_entry)); + if(!e) return NULL; + if(filename) { + e->filename = rsh_strdup(filename); + if(!e->filename) { + free(e); + return NULL; + } + } else { + e->filename = NULL; + } + e->next = next; + e->type = type; + return e; +} + +/** + * Insert a dir_entry with given filename and type in list. + * + * @param at the position before which the entry will be inserted + * @param filename file name + * @param type file type + * @return pointer to the inserted dir_entry + */ +static dir_entry* dir_entry_insert(dir_entry **at, char* filename, unsigned type) +{ + dir_entry* e = dir_entry_new(*at, filename, type); + if(e) *at = e; + return e; +} + +/** + * Free memory allocated by a dir_entry object. + * + * @param e pointer to object to deallocate + */ +static void dir_entry_free(dir_entry* e) +{ + free(e->filename); + free(e); +} + +/** + * Directory iterator. + */ +typedef struct dir_iterator +{ + int left; + char* prev_dir; +} dir_iterator; +#define MAX_DIRS_DEPTH 64 + +/** + * Walk directory tree and call given callback function to process each file/directory. + * + * @param start_dir path to the directory to walk recursively + * @param callback the function to call on each file/directory + * @param options specifying howto walk the directory tree + * @param call_back_data a pointer to pass to callback + */ +int find_file(const char* start_dir, + int (*call_back)(const char* filepath, int type, void* data), + int options, int max_depth, void* call_back_data) +{ + dir_entry *dirs_stack = NULL; /* root of the dir_list */ + dir_iterator* it; + int level = 1; + dir_entry* entry; + struct rsh_stat_struct st; + + if(max_depth < 0 || max_depth >= MAX_DIRS_DEPTH) { + max_depth = MAX_DIRS_DEPTH; + } + /* skip the directory if max_depth == 0 */ + if(max_depth == 0) { + return 0; + } + + /* check that start_dir is a drectory */ + if(rsh_stat(start_dir, &st) < 0) { + return -1; /* errno is already set by stat */ + } + if( !S_ISDIR(st.st_mode) ) { + errno = ENOTDIR; + return -1; + } + + /* check if we should descend into the root directory */ + if( !(options&FIND_WALK_DEPTH_FIRST) + && !call_back(start_dir, FIND_IFDIR | FIND_IFFIRST, call_back_data)) { + return 0; + } + + it = (dir_iterator*)malloc(MAX_DIRS_DEPTH*sizeof(dir_iterator)); + if(!it) return 0; + + /* push root directory into dirs_stack */ + it[0].left = 1; + it[0].prev_dir = rsh_strdup(start_dir); + it[1].prev_dir = NULL; + if(!it[0].prev_dir) { + errno = ENOMEM; + return -1; + } + entry = dir_entry_insert(&dirs_stack, NULL, 0); + if(!entry) { + free(it[0].prev_dir); + free(it); + errno = ENOMEM; + return -1; + } + + for(;;) { + dir_entry *dir, **insert_at; + char* dir_path; + DIR *dp; + struct dirent *de; + int type; + /* walk back */ + while((--level) >= 0 && it[level].left <= 0) free(it[level+1].prev_dir); + if(level < 0) break; + assert(dirs_stack != NULL); + /* on the first cycle: level == 0, stack[0] == 0; */ + + dir = dirs_stack; /* take last dir from the list */ + dirs_stack = dirs_stack->next; /* remove last dir from the list */ + it[level].left--; + + dir_path = (!dir->filename ? rsh_strdup(it[level].prev_dir) : + make_path(it[level].prev_dir, dir->filename) ); + dir_entry_free(dir); + if(!dir_path) continue; + + level++; + it[level].left = 0; + it[level].prev_dir = dir_path; + + if( options&FIND_WALK_DEPTH_FIRST ) { + /* check if we should skip the directory */ + if( !call_back(dir_path, FIND_IFDIR, call_back_data) ) + continue; + } + + /* read dir */ + dp = opendir(dir_path); + if(dp == NULL) continue; + type = FIND_IFFIRST; + insert_at = &dirs_stack; + + while((de = readdir(dp)) != NULL) { + int res; + char* path; + /* skip "." and ".." dirs */ + if(de->d_name[0] == '.' && (de->d_name[1] == 0 || + (de->d_name[1] == '.' && de->d_name[2] == 0 ))) + continue; + + if( !(path = make_path(dir_path, de->d_name)) ) continue; + +#ifndef USE_LSTAT_FOR_SYMLINKS + if(rsh_stat(path, &st) < 0) { + free(path); + continue; + } +#else + res = (options & FIND_FOLLOW_LINKS ? rsh_stat(path, &st) : lstat(path, &st)); + /*if((st.st_mode&S_IFMT) == S_IFLNK) type |= FIND_IFLNK;*/ + if(res < 0 || (!(options & FIND_FOLLOW_LINKS) && S_ISLNK(st.st_mode)) ) { + free(path); + continue; + } +#endif + +/* check bits (the check fails for gcc -ansi) */ +/*#if( (S_IFMT >> 12) != 0x0f || (S_IFDIR >> 12) != FIND_IFDIR ) +# error wrong bits for S_IFMT and S_IFDIR +#endif*/ + + /*type |= (S_ISDIR(st.st_mode) ? FIND_IFDIR : 0);*/ + type |= ((st.st_mode >> 12) & 0x0f); + + if((type & FIND_IFDIR) && (options & FIND_WALK_DEPTH_FIRST)) res = 1; + else { + /* handle file by callback function */ + res = call_back(path, type, call_back_data); + } + free(path); + + /* if file is a directory and we need to walk it */ + if((type & FIND_IFDIR) && res && level < max_depth) { + /* don't go deeper if max_depth reached */ + + /* add directory to dirs_stack */ + if( dir_entry_insert(insert_at, de->d_name, type) ) { + /* if really added */ + insert_at = &((*insert_at)->next); + it[level].left++; + } + } + type = 0; /* clear FIND_IFFIRST flag */ + } + closedir(dp); + + if(it[level].left > 0) level++; + } + assert(dirs_stack == NULL); + + free(it); + return 0; +} diff --git a/find_file.h b/find_file.h new file mode 100644 index 00000000..4d68cacc --- /dev/null +++ b/find_file.h @@ -0,0 +1,35 @@ +/* find_file.h - declaration of find_file function. + * + * find_file function searces through a directory tree calling a call_back on + * each file. + */ +#ifndef FIND_FILE_H +#define FIND_FILE_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* find_file options */ +#define FIND_WALK_DEPTH_FIRST 1 +#define FIND_FOLLOW_LINKS 2 + +/* masks for file flags passed to the call_back function */ +#define FIND_IFDIR 0x04 +#define FIND_IFLNK 0x0a +#define FIND_IFFIRST 0x10 + +/*struct find_file_options { + unsigned flags; + int max_depth; +};*/ + +int find_file(const char* start_dir, + int (*call_back)(const char* filepath, int type, void* data), + int options, int max_depth, void* call_back_data); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* FIND_FILE_H */ diff --git a/librhash/Makefile b/librhash/Makefile index 1aff2b12..8a077c7c 100644 --- a/librhash/Makefile +++ b/librhash/Makefile @@ -18,6 +18,7 @@ LDFLAGS = -L. -lrhash $(OPTLDFLAGS) $(ADDLDFLAGS) HEADERS = algorithms.h aich.h timing.h byte_order.h plug_openssl.h rhash.h crc32.h util.h ed2k.h edonr.h hex.h md4.h md5.h sha1.h sha256.h sha512.h ripemd-160.h gost.h has160.h snefru.h tiger.h tth.h torrent.h whirlpool.h SOURCES = algorithms.c aich.c timing.c byte_order.c plug_openssl.c rhash.c crc32.c util.c ed2k.c edonr.c hex.c md4.c md5.c sha1.c sha256.c sha512.c ripemd-160.c gost.c has160.c snefru.c tiger.c tiger_sbox.c tth.c torrent.c whirlpool.c whirlpool_sbox.c OBJECTS = algorithms.o aich.o timing.o byte_order.o plug_openssl.o rhash.o crc32.o util.o ed2k.o edonr.o hex.o md4.o md5.o sha1.o sha256.o sha512.o ripemd-160.o gost.o has160.o snefru.o tiger.o tiger_sbox.o tth.o torrent.o whirlpool.o whirlpool_sbox.o +HEADERS_LIB = rhash.h timing.h # installation directories and names DESTDIR = PREFIX = /usr/local @@ -41,7 +42,7 @@ dist-clean: clean install-lib-static: $(LIBRARY) $(INSTALL) -d $(DESTDIR)$(LIBDIR) $(DESTDIR)$(INCDIR) $(INSTALL_DATA) $(LIBRARY) $(DESTDIR)$(LIBDIR)/ - $(INSTALL_DATA) $(HEADERS) $(DESTDIR)$(INCDIR)/ + $(INSTALL_DATA) $(HEADERS_LIB) $(DESTDIR)$(INCDIR)/ ln -s $(SONAME) $(DESTDIR)$(LIBDIR)/$(SOLINK) install-lib-shared: $(SONAME) @@ -56,13 +57,16 @@ uninstall-lib-static: uninstall-lib-shared: rm -f $(DESTDIR)$(LIBDIR)/$(SONAME) +install-all-headers: + $(INSTALL_DATA) $(HEADERS) $(DESTDIR)$(INCDIR)/ + # not using GNU make extensions for compatibility with Unix/*BSD make #%.o: %.c # $(CC) -c $(CFLAGS) $< -o $@ # NOTE: dependences were generated by 'gcc -MM -DIN_RHASH *.c' # we are using plain old makefile style to support BSD make -aich.o: aich.c byte_order.h config.h util.h aich.h sha1.h +aich.o: aich.c byte_order.h config.h algorithms.h rhash.h aich.h sha1.h $(CC) -c $(CFLAGS) $< -o $@ algorithms.o: algorithms.c byte_order.h config.h rhash.h algorithms.h \ @@ -98,11 +102,12 @@ md4.o: md4.c byte_order.h config.h md4.h md5.o: md5.c byte_order.h config.h md5.h $(CC) -c $(CFLAGS) $< -o $@ -plug_openssl.o: plug_openssl.c config.h plug_openssl.h +plug_openssl.o: plug_openssl.c algorithms.h rhash.h byte_order.h config.h \ + plug_openssl.h $(CC) -c $(CFLAGS) $< -o $@ -rhash.o: rhash.c byte_order.h config.h algorithms.h rhash.h \ - plug_openssl.h util.h hex.h +rhash.o: rhash.c byte_order.h config.h algorithms.h rhash.h torrent.h \ + util.h sha1.h plug_openssl.h hex.h $(CC) -c $(CFLAGS) $< -o $@ ripemd-160.o: ripemd-160.c byte_order.h config.h ripemd-160.h @@ -133,7 +138,8 @@ tiger_sbox.o: tiger_sbox.c byte_order.h config.h timing.o: timing.c byte_order.h config.h rhash.h timing.h $(CC) -c $(CFLAGS) $< -o $@ -torrent.o: torrent.c byte_order.h config.h sha1.h util.h torrent.h +torrent.o: torrent.c byte_order.h config.h algorithms.h rhash.h torrent.h \ + util.h sha1.h $(CC) -c $(CFLAGS) $< -o $@ tth.o: tth.c byte_order.h config.h tth.h tiger.h @@ -153,7 +159,7 @@ DLLNAME = librhash.dll dll: $(DLLNAME) $(DLLNAME): $(SOURCES) - rm -f $@ +# rm -f $@ sed -n '1s/.*/{ global:/p; s/^RHASH_API.* \([a-z0-9_]\+\)(.*/ \1;/p; $$s/.*/local: *; };/p' rhash.h timing.h > exports.sym $(CC) -shared -DRHASH_EXPORTS $(CFLAGS) -Wl,--version-script,exports.sym,-soname,$@ $(OPTLDFLAGS) $(SOURCES) -o $@ @@ -162,8 +168,7 @@ test-dll: $(DLLNAME) test_sums.o # shared and static libraries $(SONAME): $(SOURCES) - rm -f $@ - # note: not using \n and \t symbols in sed script to increase portability (for MSYS and unixes with old sed) +# rm -f $@ sed -n '1s/.*/{ global:/p; s/^RHASH_API.* \([a-z0-9_]\+\)(.*/ \1;/p; $$s/.*/local: *; };/p' rhash.h timing.h > exports.sym $(CC) -fpic -shared $(CFLAGS) -Wl,--version-script,exports.sym,-soname,$@ $(OPTLDFLAGS) $(SOURCES) -o $@ # use 'nm -Cg --defined-only $@' to view exported symbols @@ -175,10 +180,11 @@ $(LIBRARY): $(OBJECTS) $(TEST_TARGET): $(LIBRARY) test_sums.o $(CC) test_sums.o $(LDFLAGS) -o $@ -$(TEST_SHARED): $(DLLNAME) test_sums.o - $(CC) test_sums.o $(DLLNAME) -o $@ +#$(TEST_SHARED): $(DLLNAME) test_sums.o +# $(CC) test_sums.o $(DLLNAME) -o $@ -test-shared: $(TEST_SHARED) +test-shared: $(SONAME) test_sums.o + $(CC) test_sums.o $(SONAME) $(LDFLAGS) -o $(TEST_SHARED) LD_LIBRARY_PATH=. ./$(TEST_SHARED) test: $(TEST_TARGET) diff --git a/librhash/crc32.c b/librhash/crc32.c index b9d7d11f..916199a0 100644 --- a/librhash/crc32.c +++ b/librhash/crc32.c @@ -27,10 +27,10 @@ void rhash_crc32_init_table(void) int i, j; poly = 0xEDB88320; - for(i=0; i<256; i++) { + for(i = 0; i < 256; i++) { crc = i; - for(j=8; j>0; j--) { - if(crc&1) crc = (crc >> 1) ^ poly; + for(j = 8; j > 0; j--) { + if(crc & 1) crc = (crc >> 1) ^ poly; else crc >>= 1; } rhash_crc32_table[i] = crc; diff --git a/librhash/md4.c b/librhash/md4.c index 32f3ea3b..c28b9862 100644 --- a/librhash/md4.c +++ b/librhash/md4.c @@ -64,10 +64,7 @@ void rhash_md4_init(md4_ctx *ctx) static void rhash_md4_process_block(unsigned state[4], const unsigned* x) { register unsigned a, b, c, d; - a = state[0]; - b = state[1]; - c = state[2]; - d = state[3]; + a = state[0], b = state[1], c = state[2], d = state[3]; MD4_ROUND1(a, b, c, d, x[ 0], 3); MD4_ROUND1(d, a, b, c, x[ 1], 7); @@ -120,10 +117,7 @@ static void rhash_md4_process_block(unsigned state[4], const unsigned* x) MD4_ROUND3(c, d, a, b, x[ 7], 11); MD4_ROUND3(b, c, d, a, x[15], 15); - state[0] += a; - state[1] += b; - state[2] += c; - state[3] += d; + state[0] += a, state[1] += b, state[2] += c, state[3] += d; } /** diff --git a/librhash/timing.c b/librhash/timing.c index 78887614..2cb5cc1f 100644 --- a/librhash/timing.c +++ b/librhash/timing.c @@ -159,7 +159,7 @@ static int hash_in_loop(unsigned hash_id, const unsigned char* message, size_t m * Benchmark a hash algorithm. * * @param hash_id hash algorithm identifier - * @param flags benchmark flags, can be BENCHMARK_QUIET and BENCHMARK_CPB + * @param flags benchmark flags, can be RHASH_BENCHMARK_QUIET and RHASH_BENCHMARK_CPB * @param output the stream to print results */ void rhash_run_benchmark(unsigned hash_id, unsigned flags, FILE* output) @@ -205,7 +205,7 @@ void rhash_run_benchmark(unsigned hash_id, unsigned flags, FILE* output) time = rhash_timer_stop(&timer); total_time += time; - if((flags & (BENCHMARK_QUIET|BENCHMARK_RAW)) == 0) { + if((flags & (RHASH_BENCHMARK_QUIET | RHASH_BENCHMARK_RAW)) == 0) { fprintf(output, "%s %u MiB calculated in %.3f sec, %.3f MBps\n", hash_name, (unsigned)sz_mb, time, (double)sz_mb / time); fflush(output); } @@ -213,7 +213,7 @@ void rhash_run_benchmark(unsigned hash_id, unsigned flags, FILE* output) #if defined(HAVE_TSC) /* measure the CPU "clocks per byte" speed */ - if(flags & BENCHMARK_CPB) { + if(flags & RHASH_BENCHMARK_CPB) { unsigned int c1 = -1, c2 = -1; unsigned volatile long long cy0, cy1, cy2; int msg_size = 128*1024; @@ -237,17 +237,17 @@ void rhash_run_benchmark(unsigned hash_id, unsigned flags, FILE* output) } #endif /* HAVE_TSC */ - if(flags & BENCHMARK_RAW) { + if(flags & RHASH_BENCHMARK_RAW) { /* output result in a "raw" machine-readable format */ fprintf(output, "%s\t%u\t%.3f\t%.3f", hash_name, ((unsigned)sz_mb * rounds), total_time, (double)(sz_mb * rounds) / total_time); #if defined(HAVE_TSC) - if(flags & BENCHMARK_CPB) fprintf(output, "\t%.2f", cpb); + if(flags & RHASH_BENCHMARK_CPB) fprintf(output, "\t%.2f", cpb); #endif /* HAVE_TSC */ fprintf(output, "\n"); } else { fprintf(output, "%s %u MiB total in %.3f sec, %.3f MBps", hash_name, ((unsigned)sz_mb * rounds), total_time, (double)(sz_mb * rounds) / total_time); #if defined(HAVE_TSC) - if(flags & BENCHMARK_CPB) fprintf(output, ", CPB=%.2f", cpb); + if(flags & RHASH_BENCHMARK_CPB) fprintf(output, ", CPB=%.2f", cpb); #endif /* HAVE_TSC */ fprintf(output, "\n"); } diff --git a/librhash/timing.h b/librhash/timing.h index 0f0d5d24..513ed6d7 100644 --- a/librhash/timing.h +++ b/librhash/timing.h @@ -27,9 +27,9 @@ RHASH_API void rhash_timer_start(timedelta_t* timer); RHASH_API double rhash_timer_stop(timedelta_t* timer); /* flags for running a benchmark */ -#define BENCHMARK_QUIET 1 -#define BENCHMARK_CPB 2 -#define BENCHMARK_RAW 4 +#define RHASH_BENCHMARK_QUIET 1 +#define RHASH_BENCHMARK_CPB 2 +#define RHASH_BENCHMARK_RAW 4 RHASH_API void rhash_run_benchmark(unsigned hash_id, unsigned flags, FILE* output); diff --git a/librhash/util.h b/librhash/util.h index 54bdaedc..3fca611a 100644 --- a/librhash/util.h +++ b/librhash/util.h @@ -30,8 +30,8 @@ struct vector_t* rsh_vector_new(void (*destructor)(void*)); struct vector_t* rsh_vector_new_simple(void); void rsh_vector_free(struct vector_t* vect); void rsh_vector_destroy(struct vector_t* vect); -void rsh_vector_add_ptr (struct vector_t* vect, void *item); -void rsh_vector_sort(struct vector_t* vect, int (*compare)(const void *rec1, const void *rec2)); +void rsh_vector_add_ptr(struct vector_t* vect, void *item); +/*void rsh_vector_sort(struct vector_t* vect, int (*compare)(const void *rec1, const void *rec2));*/ void rsh_vector_item_add_empty(struct vector_t* vect, size_t item_size); #define rsh_vector_add_uint32(vect, item) { \ rsh_vector_item_add_empty(vect, item_size); \ @@ -58,10 +58,16 @@ void rsh_blocks_vector_destroy(struct blocks_vector_t* vect); (&((unsigned char*)((bvector)->blocks.array[(index) / (blocksize)]))[(item_size) * ((index) % (blocksize))]) #define rsh_blocks_vector_add(bvector, item, blocksize, item_size) { \ if(((bvector)->size % (blocksize)) == 0) \ - rsh_vector_add_ptr(&((bvector)->blocks), rsh_malloc((item_size) * (blocksize))); \ + rsh_vector_add_ptr(&((bvector)->blocks), rsh_malloc((item_size) * (blocksize))); \ memcpy(rsh_blocks_vector_get_ptr((bvector), (bvector)->size, (blocksize), (item_size)), (item), (item_size)); \ (bvector)->size++; \ } +#define rsh_blocks_vector_add_ptr(bvector, ptr, blocksize) { \ + if(((bvector)->size % (blocksize)) == 0) \ + rsh_vector_add_ptr(&((bvector)->blocks), rsh_malloc(sizeof(void*) * (blocksize))); \ + ((void***)(bvector)->blocks.array)[(bvector)->size / (blocksize)][(bvector)->size % (blocksize)] = (void*)ptr; \ + (bvector)->size++; \ +} #define rsh_blocks_vector_add_empty(bvector, blocksize, item_size) { \ if( (((bvector)->size++) % (blocksize)) == 0) \ rsh_vector_add_ptr(&((bvector)->blocks), rsh_malloc((item_size) * (blocksize))); \ diff --git a/output.c b/output.c new file mode 100644 index 00000000..ce8d8383 --- /dev/null +++ b/output.c @@ -0,0 +1,451 @@ +/* output.c */ + +#include "common_func.h" /* should be included before the C library files */ +#include +#include +#include +#include +#include /* exit() */ +#include +#include + +#include "librhash/rhash.h" +#include "calc_sums.h" +#include "parse_cmdline.h" +#include "rhash_main.h" +#include "output.h" + +/*#ifdef _WIN32 +#define WIN32_USE_CURSOR +#endif*/ + +#ifdef _WIN32 +#include +#include /* for _SH_DENYNO */ +#endif + +#ifdef WIN32_USE_CURSOR +#include +#endif + +/* global pointer to the selected method of percents output */ +struct percents_output_info_t *percents_output = NULL; + + +/** + * Print a formated message to program log, and flush the log stream. + * + * @param format print a formated message to the program log + * @param param a printf format parameter (NULL, if not needed) + */ +void log_msg(const char* format, ...) +{ + FILE* log = (rhash_data.log ? rhash_data.log : stderr); + va_list ap; + va_start(ap, format); + vfprintf(log, format, ap); + fflush(log); +} + +/** + * Print a file error to program log. + * + * @param filepath the path to file caused the error + */ +void log_file_error(const char* filepath) +{ + fprintf(rhash_data.log, PROGRAM_NAME " error: %s: %s\n", filepath, strerror(errno)); + fflush(rhash_data.log); +} + +/* a structure to store how much percents processed */ +struct percents_t { + int points; + int use_cursor; + int same_output; + unsigned ticks; + +#ifdef WIN32_USE_CURSOR + HANDLE hOut; + unsigned short cur_x, cur_y; /* cursor position, where to print percents */ +#endif +}; +static struct percents_t percents; + +/** + * Print file path and result of its verification by hash. + * Also if error occured, print error message. + * + * @param info pointer to the file-info structure + * @param print_name set to non-zero to print file path + * @param print_result set to non-zero to print hash verification result + */ +static void print_check_result(struct file_info *info, int print_name, int print_result) +{ + if(print_name) { + fprintf(rhash_data.out, "%-51s ", info->print_path); + } + if(print_result) { + if(info->error == -1) { + /* print error to stdout */ + fprintf(rhash_data.out, "%s\n", strerror(errno)); + } else if(info->wrong_sums == 0 || !(opt.flags & OPT_VERBOSE)) { + /* using 4 characters to overwrite percent */ + fprintf(rhash_data.out, (info->wrong_sums == 0 ? "OK \n" : "ERR\n") ); + } else { + int id; + char actual[130], expected[130]; + + /* print verbose info about wrong sums */ + fprintf(rhash_data.out, "ERROR"); + for(id = 1; id < RHASH_ALL_HASHES; id <<= 1) { + if(id & info->wrong_sums) { + int pflags = (rhash_is_base32(id) ? RHPR_BASE32 | RHPR_UPPERCASE : RHPR_HEX | RHPR_UPPERCASE); + rhash_print_bytes(expected, rhash_get_digest_ptr(info->orig_sums, id), rhash_get_digest_size(id), pflags); + + rhash_print(actual, info->rctx, id, RHPR_UPPERCASE); + fprintf(rhash_data.out, ", %s is %s should be %s", rhash_get_name(id), actual, expected); + } + } + if(RHASH_EMBEDDED_CRC32 & info->wrong_sums) { + rhash_print(expected, info->rctx, RHASH_CRC32, RHPR_UPPERCASE); + fprintf(rhash_data.out, ", embedded sum should be %s", expected); + } + fprintf(rhash_data.out, "\n"); + } + } + fflush(rhash_data.out); +} + +/** + * Prepare or print result of file processing. + * + * @param info pointer to the file-info structure + * @param init non-zero on initialization before hash calculation, + * and zero after hash calculation finished. + */ +static void print_results_on_check(struct file_info *info, int init) +{ + if(opt.mode & (MODE_CHECK | MODE_CHECK_EMBEDDED)) { + int print_name = (opt.flags & (OPT_PERCENTS | OPT_SKIP_OK) ? !init : init); + + if(!init && (opt.flags & OPT_SKIP_OK) && errno == 0 && info->wrong_sums == 0) { + return; /* skip OK message */ + } + + print_check_result(info, print_name, !init); + } +} + +/* functions to output file info without percents */ + +/** + * Print file name in hash checking mode. + * No information is printed in other modes. + * + * @param info pointer to the file-info structure + * @return non-zero, indicating that the output method succesfully initialized + */ +static int dummy_init_percents(struct file_info *info) +{ + print_results_on_check(info, 1); + return 1; +} + +/** + * Print file check results without printing percents. + * Information is printed only in hash verification mode. + * + * @param info pointer to the file-info structure + * @param process_res non-zero if error occured while hashing/checking + */ +static void dummy_finish_percents(struct file_info *info, int process_res) +{ + info->error = process_res; + print_results_on_check(info, 0); +} + +/* functions to output file info with simple multy-line wget-like percents */ + +/** + * Initialize dots percent mode. + * + * @param info pointer to the file-info structure + * @return non-zero, indicating that the output method succesfully initialized + */ +static int dots_init_percents(struct file_info *info) +{ + (void)info; + fflush(rhash_data.out); + fflush(rhash_data.log); + percents.points = 0; + print_results_on_check(info, 1); + return 1; +} + +/** + * Finish dots percent mode. If in hash verification mode, + * then print the results of file check. + * + * @param info pointer to the file-info structure + * @param process_res non-zero if error occured while hashing/checking + */ +static void dots_finish_percents(struct file_info *info, int process_res) +{ + char buf[80]; + info->error = process_res; + + if((percents.points % 74) != 0) { + log_msg("%s 100%%\n", str_set(buf, ' ', 74 - (percents.points%74) )); + } + print_results_on_check(info, 0); +} + +/** + * Output percents by printing one dot per each processed 1MiB. + * + * @param info pointer to the file-info structure + * @param offset current file offset in bytes + */ +static void dots_update_percents(struct file_info *info, uint64_t offset) +{ + const int pt_size = 1024*1024; /* 1MiB */ + if( (offset % pt_size) != 0 ) return; + + if(percents.points == 0) { + fprintf(rhash_data.log, "\n%s %s\n", + (opt.mode & (MODE_CHECK | MODE_CHECK_EMBEDDED) ? "Checking" : "Processing"), + info->print_path); + fflush(rhash_data.log); + } + putc('.', rhash_data.log); + + if(((++percents.points) % 74) == 0) { + if(info->size > 0) { + int perc = (int)( offset * 100.0 / (uint64_t)info->size + 0.5 ); + fprintf(rhash_data.log, " %2u%%\n", perc); + fflush(rhash_data.log); + } else { + putc('\n', rhash_data.log); + } + } +} + +/* console one-line percents */ + +/** + * Initialize one-line percent mode. + * + * @param info pointer to the file-info structure + * @return non-zero if the output method succesfully initialized + */ +static int p_init_percents(struct file_info *info) +{ +#ifdef WIN32_USE_CURSOR + CONSOLE_SCREEN_BUFFER_INFO csbInfo; + percents.hOut = NULL; +#endif + + (void)info; + percents.points = 0; + percents.same_output = 0; + percents.use_cursor = 0; + + fflush(rhash_data.out); + fflush(rhash_data.log); + assert(rhash_data.log == stderr); + + /* note: this output differs from print_check_result() by file handle */ + fprintf(rhash_data.log, "%-51s ", info->print_path); + +#ifdef WIN32_USE_CURSOR + if(percents.use_cursor) { + /* store cursor coordinates */ + percents.hOut = GetStdHandle(STD_ERROR_HANDLE); + if(percents.hOut == INVALID_HANDLE_VALUE || + !GetConsoleScreenBufferInfo(percents.hOut, &csbInfo)) { + percents.hOut = NULL; + return 0; + } else { + percents.cur_x = csbInfo.dwCursorPosition.X; + percents.cur_y = csbInfo.dwCursorPosition.Y; + } + } +#endif + + percents.same_output = (rhash_data.out == stdout && isatty(0)); + percents.ticks = rhash_get_ticks(); + return 1; +} + +/** + * Output one-line percents by printing them after file path. + * In the case the total file length is unknow (i.e. hashing stdin) + * output a rotating stick. + * + * @param info pointer to the file-info structure + * @param offset current file offset in bytes + */ +static void p_update_percents(struct file_info *info, uint64_t offset) +{ + static const char rot[4] = {'-', '\\', '|', '/'}; + int perc = 0; + unsigned ticks; + +#ifdef WIN32_USE_CURSOR + COORD dwCursorPosition; + if(percents.use_cursor && percents.hOut == NULL) return; +#endif + + if(info->size > 0) { + /* use only two digits to display percents: 0%-99% */ + perc = (int)( offset * 99.9 / (uint64_t)info->size ); + if(percents.points == perc) return; + } + + /* update percents no more than 20 times per second */ + ticks = rhash_get_ticks(); /* clock ticks count in milliseconds */ + if((unsigned)(ticks - percents.ticks) < 50) return; + + /* output percents or rotated bar */ + if(info->size > 0) { + fprintf(rhash_data.log, "%u%%", perc); + percents.points = perc; + } else { + fprintf(rhash_data.log, "%c", rot[(percents.points++) & 3]); + } + +#ifdef WIN32_USE_CURSOR + if(percents.use_cursor) { + fflush(rhash_data.log); + + /* rewind the cursor position */ + dwCursorPosition.X = percents.cur_x; + dwCursorPosition.Y = percents.cur_y; + SetConsoleCursorPosition(percents.hOut, dwCursorPosition); + } else +#endif + { + fprintf(rhash_data.log, "\r%-51s ", info->print_path); + fflush(rhash_data.log); + } + percents.ticks = ticks; +} + +/** + * Finish one-line percent mode. If in hash verification mode, + * then print the results of file check. + * + * @param info pointer to the file-info structure + * @param process_res non-zero if error occured while hashing/checking + */ +static void p_finish_percents(struct file_info *info, int process_res) +{ + int need_check_result; + +#ifdef WIN32_USE_CURSOR + if(percents.use_cursor && percents.hOut == NULL) return; +#endif + + need_check_result = (opt.mode & (MODE_CHECK | MODE_CHECK_EMBEDDED)) && + !((opt.flags & OPT_SKIP_OK) && errno == 0 && info->wrong_sums == 0); + info->error = process_res; + + if(percents.same_output && need_check_result) { + print_check_result(info, 0, 1); + } else { + fprintf(rhash_data.log, "100%%\n"); + fflush(rhash_data.log); + if(need_check_result) print_check_result(info, 1, 1); + } +} + +/* three methods of percents output */ +struct percents_output_info_t dummy_perc = { + dummy_init_percents, 0, dummy_finish_percents, "dummy" +}; +struct percents_output_info_t dots_perc = { + dots_init_percents, dots_update_percents, dots_finish_percents, "dots" +}; +struct percents_output_info_t p_perc = { + p_init_percents, p_update_percents, p_finish_percents, "digits" +}; + +/** + * Initialize pointers to output functions. + */ +void setup_output(void) +{ + rhash_data.out = stdout; + rhash_data.log = stderr; + + if(opt.flags & OPT_PERCENTS) { + /* we don't use _fileno() cause it is not in ISO C90, and so undefined + when compiling with the GCC -ansi option */ + if(rhash_data.log == stderr && isatty(2)) { + percents_output = &p_perc; + } else + { + percents_output = &dots_perc; + } + } else { + percents_output = &dummy_perc; + } + + if(opt.output) { +#ifdef _WIN32 + if( !(rhash_data.out = _wfsopen((wchar_t*)opt.output, L"w", _SH_DENYNO)) ) { +#else + if( !(rhash_data.out = fopen(opt.output, "w")) ) { +#endif + fprintf(stderr, PROGRAM_NAME ": %s: %s\n", opt.output, strerror(errno)); + rsh_exit(-1); + } + } + + if(opt.log) { +#ifdef _WIN32 + if( !(rhash_data.log = _wfsopen((wchar_t*)opt.log, L"w", _SH_DENYNO)) ) { +#else + if( !(rhash_data.log = fopen(opt.log, "w")) ) { +#endif + fprintf(stderr, PROGRAM_NAME ": %s: %s\n", opt.log, strerror(errno)); + rsh_exit(-1); + } + } +} + +/* misc output functions */ + +/** + * Print total statistics of crc file checking. + */ +void print_check_stats(void) +{ + if(rhash_data.processed == rhash_data.ok) { + /* NOTE: don't use puts() here cause it mess with printf stdout buffering */ + fprintf(rhash_data.out, "Everything OK\n"); + } else { + fprintf(rhash_data.out, "Errors Occurred: Errors:%-3u Miss:%-3u Success:%-3u Total:%-3u\n", rhash_data.processed-rhash_data.ok-rhash_data.miss, rhash_data.miss, rhash_data.ok, rhash_data.processed); + } + fflush(rhash_data.out); +} + +/** + * Print file processing times. + */ +void print_file_time_stats(struct file_info* info) +{ + print_time_stats(info->time, info->size, 0); +} + +/** + * Print processing time statistics. + */ +void print_time_stats(double time, uint64_t size, int total) +{ + double speed = (time == 0 ? 0 : (double)(int64_t)size / 1048576.0 / time); + fprintf(rhash_data.log, "%s %.3f sec, %4.2f MBps\n", + (total ? "Total" : "Calculated in"), time, speed); + fflush(rhash_data.log); +} diff --git a/output.h b/output.h new file mode 100644 index 00000000..2ecd5f85 --- /dev/null +++ b/output.h @@ -0,0 +1,49 @@ +/* output.h */ +#ifndef OUTPUT_H +#define OUTPUT_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct file_info; +struct timeval; + +struct percents_output_info_t { + /* methods to output percents */ + int (*init)(struct file_info *info); + void (*update)(struct file_info *info, uint64_t offset); + void (*finish)(struct file_info *info, int process_res); + const char* name; +}; + +/* pointer to the selected percents output method */ +extern struct percents_output_info_t *percents_output; +#define init_percents(info) percents_output->init(info) +#define update_percents(info, offset) percents_output->update(info, offset) +#define finish_percents(info, process_res) percents_output->finish(info, process_res) + +/* pointers to functions to print percents and file info */ +#if 0 +extern int (*init_percents)(struct file_info *info); +extern void (*update_percents)(struct file_info *info, uint64_t offset); +extern void (*finish_percents)(struct file_info *info, int process_res); +#endif + +/* method to initialize pointers to output methods */ +void setup_output(void); + +void log_msg(const char* format, ...); +void log_file_error(const char* filepath); +void print_check_stats(void); + +void print_time_stats(double time, uint64_t size, int total); +void print_file_time_stats(struct file_info* info); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* OUTPUT_H */ diff --git a/parse_cmdline.c b/parse_cmdline.c new file mode 100644 index 00000000..ac202d54 --- /dev/null +++ b/parse_cmdline.c @@ -0,0 +1,947 @@ +/* parse_cmdline.c - parsing of command line options */ + +#include "common_func.h" /* should be included before the C library files */ +#include +#include +#include +#include +#include +#include /* stat() */ +#ifdef _WIN32 +#include /* for SetFileApisToOEM(), CharToOem() */ +#endif + +#include "librhash/rhash.h" +#include "librhash/plug_openssl.h" +#include "win_utils.h" +#include "file_mask.h" +#include "crc_print.h" +#include "output.h" +#include "rhash_main.h" +#include "version.h" +#include "parse_cmdline.h" + +#define VERSION_STRING PROGRAM_NAME " v" VERSION "\n" + +typedef struct options_t options_t; +struct options_t conf_opt; /* config file parsed options */ +struct options_t opt; /* command line options */ + +/** + * Print program help. + */ +static void print_help(void) +{ + assert(rhash_data.out != NULL); + fprintf(rhash_data.out, "%s\n%s", VERSION_STRING, + "Usage: " CMD_FILENAME " [