Revision 13 (by moose, 2008/11/12 09:27:00) * add testing framework
* fix encoding/decoding bugs
#include "cseproto/cseproto.h"
#include "cseproto/cseproto_encode.h"
#include "cseproto/cseproto_zigzag.h"

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

static size_t _encode_fixed32(uint32_t val, size_t len, uint8_t *data)
{
	uint8_t *ptr = data;

	if(len < sizeof(val))
		return 0;

	ptr[0] = (uint8_t)(val      );
	ptr[1] = (uint8_t)(val >>  8);
	ptr[2] = (uint8_t)(val >> 16);
	ptr[3] = (uint8_t)(val >> 24);

	return sizeof(val);
}

static size_t _encode_fixed64(uint64_t val, size_t len, uint8_t *data)
{
	uint8_t *ptr = data;
	uint32_t part0 = (uint32_t)(val);
	uint32_t part1 = (uint32_t)(val >> 32);

	if(len < sizeof(val))
		return 0;

	ptr[0] = (uint8_t)(part0      );
	ptr[1] = (uint8_t)(part0 >>  8);
	ptr[2] = (uint8_t)(part0 >> 16);
	ptr[3] = (uint8_t)(part0 >> 24);
	ptr[4] = (uint8_t)(part1      );
	ptr[5] = (uint8_t)(part1 >>  8);
	ptr[6] = (uint8_t)(part1 >> 16);
	ptr[7] = (uint8_t)(part1 >> 24);

	return sizeof(val);
}

// is inlined by _encode_varint to optimize for smaller numbers
// _encode_varint is optimized for larger numbers.
static size_t _encode_varint32(uint32_t val, size_t len, uint8_t *data)
{
	uint8_t *ptr = data;
	uint32_t llen = 0;

	if(len < 1)
		return 0;

	*ptr = (uint8_t)(val | 0x80); ++ptr;
	if (val >= (1 << 7)) {
		if(len < 2)
			return 0;

		*ptr = (uint8_t)((val >> 7) | 0x80); ++ptr;
		if (val >= (1 << 14)) {
			if(len < 3)
				return 0;

			*ptr = (uint8_t)((val >> 14) | 0x80); ++ptr;
			if (val >= (1 << 21)) {
				if(len < 4)
					return 0;

				*ptr = (uint8_t)((val >> 21) | 0x80); ++ptr;
				if (val >= (1 << 28)) {
					if(len < 5)
						return 0;

					*ptr = (uint8_t)(val >> 28); ++ptr;
					llen += 5;
				} else {
					if(len < 4)
						return 0;

					*ptr &= 0x7F; ++ptr;
					llen += 4;
				}
			} else {
				if(len < 3)
					return 0;

				*ptr &= 0x7F; ++ptr;
				llen += 3;
			}
		} else {
			if(len < 2)
				return 0;

			*ptr &= 0x7F; ++ptr;
			llen += 2;
		}
	} else {
		if(len < 1)
			return 0;

		*ptr &= 0x7F; ++ptr;
		llen += 1;
	}

	return ptr - data;
}

static size_t _encode_varint(uint64_t val, size_t len, uint8_t *data)
{
	uint8_t *ptr = data;
	uint32_t i = 0;

	// Splitting into 32-bit pieces gives better performance on 32-bit
	// processors.
	uint32_t part0 = (uint32_t)(val      );
	uint32_t part1 = (uint32_t)(val >> 28);
	uint32_t part2 = (uint32_t)(val >> 56);
	uint32_t size = 0;

	#ifdef CSEPROTO_VARINT32_OPT
	if(val <= 0xFFFFFFFF) {
		return _encode_varint32(val, len, data);
	}
	#endif

	// Here we can't really optimize for small numbers, since the val is
	// split into three parts.  Cheking for numbers < 128, for instance,
	// would require three comparisons, since you'd have to make sure part1
	// and part2 are zero.  However, if the caller is using 64-bit integers,
	// it is likely that they expect the numbers to often be very large, so
	// we probably don't want to optimize for small numbers anyway.  Thus,
	// we end up with a hardcoded binary search tree...
	#define do_size(s) do { \
		if(len < (s)) { cselog("ERR", "do_size s[%i]>len[%i]", s, len); return(-1); } \
		size = (s); \
		goto size##s; \
	} while(0)

	if (part2 == 0) {
		if (part1 == 0) {
			if (part0 < (1 << 14)) {
				if (part0 < (1 << 7)) {
					do_size(1);
				} else {
					do_size(2);
				}
			} else {
				if (part0 < (1 << 21)) {
					do_size(3);
				} else {
					do_size(4);
				}
			}
		} else {
			if (part1 < (1 << 14)) {
				if (part1 < (1 << 7)) {
					do_size(5);
				} else {
					do_size(6);
				}
			} else {
				if (part1 < (1 << 21)) {
					do_size(7);
				} else {
					do_size(8);
				}
			}
		}
	} else {
		if (part2 < (1 << 7)) {
			do_size(9);
		} else {
			do_size(10);
		}
	}

	printf("Can't get here!\n");
	abort();

	size10: ptr[9] = (uint8_t)((part2 >>  7) | 0x80);
	size9 : ptr[8] = (uint8_t)((part2      ) | 0x80);
	size8 : ptr[7] = (uint8_t)((part1 >> 21) | 0x80);
	size7 : ptr[6] = (uint8_t)((part1 >> 14) | 0x80);
	size6 : ptr[5] = (uint8_t)((part1 >>  7) | 0x80);
	size5 : ptr[4] = (uint8_t)((part1      ) | 0x80);
	size4 : ptr[3] = (uint8_t)((part0 >> 21) | 0x80);
	size3 : ptr[2] = (uint8_t)((part0 >> 14) | 0x80);
	size2 : ptr[1] = (uint8_t)((part0 >>  7) | 0x80);
	size1 : ptr[0] = (uint8_t)((part0      ) | 0x80);

	//cselog("WTF?", "...");
	
	ptr[size-1] &= 0x7F;

	return size;
}

size_t _encode_lendelim(size_t data_len, uint8_t *data, size_t out_len, uint8_t *out)
{
	size_t len_len = 0;

	if(out_len < data_len)
		return 0;

	cselog("LOG", "data_len: %i", data_len);
	len_len = _encode_varint(data_len, out_len, out);
	if(!len_len)
		return 0;

	memcpy(out+len_len, data, data_len);

	return len_len + data_len;
}

static size_t _item_size(cseproto_item_t *item)
{
	size_t item_len = 0;
	size_t key_len = _varint_size((item->key << CSEPROTO_KEYTYPEBITS) | (item->wiretype & CSEPROTO_KEYTYPEMASK));
	
	switch(item->wiretype) {
		case CSE_WIRETYPE_SVARINT: { // int[32,64]_t
			item_len += _varint_size(ZigZagEncode64(item->v.int64));
		}  break;

		case CSE_WIRETYPE_VARINT: { // uint[32,64]_t, bool, enum
			item_len += _varint_size(item->v.uint64);
		}  break;

		case CSE_WIRETYPE_64BIT: { // double, [su]fixed64
			item_len += sizeof(uint64_t);
		}  break;

		case CSE_WIRETYPE_32BIT: { // float, [su]fixed32
			item_len += sizeof(uint32_t);
		}  break;

		case CSE_WIRETYPE_LENDELIM:
			item_len += _varint_size(item->data_len);
			item_len += item->data_len;
			break;
	}

	cselog("LOG", "keylen: %i, itemlen: %i", key_len, item_len);

	return key_len + item_len;
}

size_t cseproto_message_size(cseproto_t *proto)
{
	uint32_t i = 0;
	size_t len = 0;

	for(i = 0; i < proto->bucket_count; ++i) {
		cseproto_item_t *ptr = proto->buckets[i].head;
		while(ptr) {
			len += _item_size(ptr);
			ptr = ptr->next;
		}
	}

	//printf("message size: %i\n", len);

	return len;
}

size_t cseproto_encode_item(cseproto_item_t *item, size_t data_len, uint8_t *data)
{
	size_t item_len = _item_size(item), key_len = 0;

	if(data_len < item_len)
		return 0;

	key_len += _encode_varint((item->key << CSEPROTO_KEYTYPEBITS) | (item->wiretype & CSEPROTO_KEYTYPEMASK), data_len, data);
	data += key_len;
	data_len -= key_len;
	
	// encode item
	switch(item->wiretype) {
		case CSE_WIRETYPE_SVARINT: // int[32,64]_t
			cselog("LOG", "svarint: %llx", item->v.int64);
			{ uint64_t sv = ZigZagEncode64(item->v.int64); cselog("LOG", "zze: %llx", sv); }
			_encode_varint(ZigZagEncode64(item->v.int64), data_len, data);
			//cselog("LOG", "svarint: %llx", (uint64_t)*data);
			break;
      
		case CSE_WIRETYPE_VARINT: // uint[32,64]_t, bool, enum
			cselog("LOG", "varint");
			_encode_varint(item->v.uint64, data_len, data);
			break;
      
		case CSE_WIRETYPE_64BIT: // double, [su]fixed64
			cselog("LOG", "64bit");
			_encode_fixed64(item->v.uint64, sizeof(uint64_t), data);
			break;
      
		case CSE_WIRETYPE_32BIT: // float, [su]fixed32
			cselog("LOG", "32bit");
			_encode_fixed32(item->v.uint32, sizeof(uint32_t), data);
			break;
      
		case CSE_WIRETYPE_LENDELIM: // strings, embeded messages
			cselog("LOG", "lendelim");
			_encode_lendelim(item->data_len, item->v.data, data_len, data);
			break;
	}

	return item_len;
}

size_t cseproto_encode(cseproto_t *cp, uint8_t **data)
{
	uint8_t *ptr = NULL;
	size_t size = 0;
	uint32_t i = 0;
	uint32_t len = 0;
	
	size = cseproto_message_size(cp);
	cselog("LOG", "message size: %i", size);
	ptr = calloc(1, size);
	if(!ptr)
		return 0;
	
	for(i = 0; i < cp->bucket_count; ++i) {
		cseproto_item_t *p = cp->buckets[i].head;
		while(p) {
			uint32_t j = 0;
			size_t item_len = cseproto_encode_item(p, size - len, ptr + len);
			if(!item_len)
				return 0;

			cselog("LOG", "item[%i]: len:%i\n", i, item_len);
			//for(j = 0; j < item_len; ++j) {
			//	printf("%c ", *(ptr+len+j));
			//}
			//printf("\n");

			len += item_len;
			p = p->next;
		}
	}

	*data = ptr;
	return size;
}