btw, VMAC implementation includes several sets of macroses improving portability:

Code:

/* --------------------------------------------------------------------------
* This implementation uses uint32_t and uint64_t as names for unsigned 32-
* and 64-bit integer types. These are defined in C99 stdint.h. The
* following may need adaptation if you are not running a C99 or
* Microsoft C environment.
* ----------------------------------------------------------------------- */
#ifndef VMAC_USE_STDINT
#define VMAC_USE_STDINT 1 /* Set to zero if system has no stdint.h */
#endif
#if VMAC_USE_STDINT && !_MSC_VER /* Try stdint.h if non-Microsoft */
#ifdef __cplusplus
#define __STDC_CONSTANT_MACROS
#endif
#include <stdint.h>
#ifndef UINT64_C
#define UINT64_C(v) v ## ULL
#endif
#elif (_MSC_VER) /* Microsoft C does not have stdint.h */
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
#define UINT64_C(v) v ## UI64
#else /* Guess sensibly - may need adaptation */
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
#define UINT64_C(v) v ## ULL
#endif

Code:

#if __GNUC__
#define ALIGN(n) __attribute__ ((aligned(n)))
#define NOINLINE __attribute__ ((noinline))
#elif _MSC_VER
#define ALIGN(n) __declspec(align(n))
#define NOINLINE __declspec(noinline)
#else
#define ALIGN(n)
#define NOINLINE
#endif

and even a portable inline assembler!

Code:

// macros from Crypto++ for sharing inline assembly code between MSVC and GNU C#if defined(__GNUC__)
// define these in two steps to allow arguments to be expanded
#define GNU_AS1(x) #x ";"
#define GNU_AS2(x, y) #x ", " #y ";"
#define GNU_AS3(x, y, z) #x ", " #y ", " #z ";"
#define GNU_ASL(x) "\n" #x ":"
#define GNU_ASJ(x, y, z) #x " " #y #z ";"
#define AS1(x) GNU_AS1(x)
#define AS2(x, y) GNU_AS2(x, y)
#define AS3(x, y, z) GNU_AS3(x, y, z)
#define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";"
#define ASL(x) GNU_ASL(x)
#define ASJ(x, y, z) GNU_ASJ(x, y, z)
#else
#define AS2(x, y) __asm {x, y}
#define AS3(x, y, z) __asm {x, y, z}
#define ASS(x, y, a, b, c, d) __asm {x, y, _MM_SHUFFLE(a, b, c, d)}
#define ASL(x) __asm {label##x:}
#define ASJ(x, y, z) __asm {x label##y}
#endif

btw, speed of many cryptohashes (blake and vmac, at least) greatly depends on the availability and version of SSE, using assembler in various forms (intrinsincs, inine asm, external asm files)