/* Very primitive kernel space crc benchmark */

#include <linux/crc32.h>
#include <linux/loop.h>
#include <linux/module.h>
#include <linux/kernel.h>


/* Quick'n'dirty defines to make ffmpeg code compile without
 * modifications */

typedef uint32_t AVCRC;
#define le2me_32(x) (x)
#define bswap_32(x) (x)

/* Following code is from ffmpeg/libavutil/crc.c */

/**
 * Inits a crc table.
 * @param ctx must be an array of sizeof(AVCRC)*257 or sizeof(AVCRC)*1024
 * @param cts_size size of ctx in bytes
 * @return <0 on failure
 */
int av_crc_init(AVCRC *ctx, int le, int bits, uint32_t poly, int ctx_size){
    int i, j;
    uint32_t c;

    if (bits < 8 || bits > 32 || poly >= (1LL<<bits))
        return -1;
    if (ctx_size != sizeof(AVCRC)*257 && ctx_size != sizeof(AVCRC)*1024)
        return -1;

    for (i = 0; i < 256; i++) {
        if (le) {
            for (c = i, j = 0; j < 8; j++)
                c = (c>>1)^(poly & (-(c&1)));
            ctx[i] = c;
        } else {
            for (c = i << 24, j = 0; j < 8; j++)
                c = (c<<1) ^ ((poly<<(32-bits)) & (((int32_t)c)>>31) );
            ctx[i] = bswap_32(c);
        }
    }
    ctx[256]=1;
#ifndef CONFIG_SMALL
    if(ctx_size >= sizeof(AVCRC)*1024)
        for (i = 0; i < 256; i++)
            for(j=0; j<3; j++)
                ctx[256*(j+1) + i]= (ctx[256*j + i]>>8) ^ ctx[ ctx[256*j + i]&0xFF ];
#endif

    return 0;
}

uint32_t av_crc(const AVCRC *ctx, uint32_t crc, const uint8_t *buffer, size_t length){
    const uint8_t *end= buffer+length;

#ifndef CONFIG_SMALL
    if(!ctx[256])
        while(buffer<end-3){
            crc ^= le2me_32(*(uint32_t*)buffer); buffer+=4;
            crc =  ctx[3*256 + ( crc     &0xFF)]
                  ^ctx[2*256 + ((crc>>8 )&0xFF)]
                  ^ctx[1*256 + ((crc>>16)&0xFF)]
                  ^ctx[0*256 + ((crc>>24)     )];
        }
#endif
    while(buffer<end)
        crc = ctx[((uint8_t)crc) ^ *buffer++] ^ (crc >> 8);

    return crc;
}

/* END ffmpeg */

/* Following code is from
 * http://80.101.89.68/~erik/git/blob/blob.tar.gz src/lib/crc32.c
 * where crc32 was renamed to blob_crc32 */

#define POLYNOME        (0xedb88320)



/* calculate CRC32 on len bytes pointed by data. the usual
 * initialisation is to put 0xffffffff in val, but by supplying a
 * previous CRC value into it it can be used to calculate the CRC on
 * streams of data
 */
u32 blob_crc32(u32 crc, const void *data, int len)
{
    u8 *tmp = (u8 *)data;
    u32 polynome = POLYNOME;
    /* The above might look stupid, but it is a cludge to get
     * better code. With the -Os flag, the compiler Moves the
     * POLYNOME into the .rodata segment and reloads it on every
     * run. This kinda defeats the purpose of doing CRC32 without
     * a lookup table. By moving the polynome explicitly out of
     * the loop, the compiler will allocate a register for it
     * making the resulting code faster. JDB commented that the
     * CSE module of GCC is broken, but it works perfectly well
     * with -O2. It's just that memory size is more important than
     * speed with -Os. -- Erik
     */

    #define DO_CRC \
        if(crc & 1) { \
                crc >>= 1; \
                crc ^= polynome; \
        } else { \
                crc >>= 1; \
        }

    while(len--) {
	crc ^= *tmp++;

	DO_CRC;
	DO_CRC;
	DO_CRC;
	DO_CRC;
	DO_CRC;
	DO_CRC;
	DO_CRC;
	DO_CRC;
    }

    return crc;
}



/* END crc32.c */



static int __init init_crcbenchmark(void) {
    AVCRC *av_ctx;
    uint8_t *buf;
    uint32_t crc;
    int i, start, stop, ret;
    
    av_ctx = kmalloc(sizeof(AVCRC)*1024, GFP_KERNEL);
    if (!av_ctx) {
	ret = -ENOMEM;
	goto out1;
    }

    buf = kmalloc(4096, GFP_KERNEL);
    if (!buf) {
	ret = -ENOMEM;
	goto out2;
    }

    ret = av_crc_init(av_ctx, 1, 32, 0xedb88320, sizeof(AVCRC)*1024);
    if (ret) {
	ret = -1;
	goto out3;
    }
    
    start = jiffies;
    crc = ~0;
    for (i = 0; i < 65536; i++) {
	crc = crc32(crc, buf, 4096);
    }
    stop = jiffies;

    printk(KERN_INFO "crc32: %d jiffies, crc %u\n", stop - start, crc);
    
    start = jiffies;
    crc = ~0;
    for (i = 0; i < 65536; i++) {
	crc = av_crc(av_ctx, crc, buf, 4096);
    }
    stop = jiffies;
   
    printk(KERN_INFO "av_crc: %d jiffies, crc %u\n", stop - start, crc);

    start = jiffies;
    crc = ~0;
    for (i = 0; i < 65536; i++) {
	crc = blob_crc32(crc, buf, 4096);
    }
    stop = jiffies;
   
    printk(KERN_INFO "blob_crc32: %d jiffies, crc %u\n", stop - start, crc);

 out3:
    kfree(buf);
 out2:
    kfree(av_ctx);
 out1:
    return ret;
}

static void __exit cleanup_crcbenchmark(void) {
}

MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("simple crc benchmark");
MODULE_AUTHOR("Timo Juhani Lindfors <timo.lindfors@iki.fi>");
module_init(init_crcbenchmark);
module_exit(cleanup_crcbenchmark);
