/* Written 2005 By Brendan G Bohannon I release this under the public domain. However, I do requst mention in any derived source if applicable. Thought: LZBSS: a modified, vaguely LZSS like algo. Normal extension 'lzb'. Will use a 64kB rotating buffer, and a rotating "codespace" for encoding. 0..127: direct references into the rotating codespace 128: escape code, 8 bit char value 129: 8 bit length, 8 bit relative offset 130..191: lengths 2..63, 8 bit offset 192: reserved 193: 8 bit length, 16 bit offset 194..255: 2..63, 16 bit offset Worst case: 50% inflation. The codespace will rotate, each time an escape is used all codes in the codespace will be shifteded down by 1 value, and the new value (127) will refer to the escaped character. Implementation: Ring buffer; Linked lists; Incremental state. */ #include #include #include #include #define LZBSS_IOBUFSZ 262144 typedef unsigned char byte; typedef unsigned short ushort; typedef struct { byte *winbuf; ushort winpos; ushort *linkbuf; ushort *linkidx; short codebuf[256]; //buffer reflecting the current codespace short rcodebuf[256]; //reverse codespace byte codebufpos; byte *ibuf, *obuf; //current buffer pointers byte *ibufe, *obufe; //current buffer limits }LZBSS_State; LZBSS_State *LZBSS_NewEncodeState() { LZBSS_State *tmp; int i; tmp=malloc(sizeof(LZBSS_State)); memset(tmp, 0, sizeof(LZBSS_State)); tmp->winbuf=malloc(65536); tmp->linkbuf=malloc(65536*sizeof(ushort)); tmp->linkidx=malloc(4096*sizeof(ushort)); memset(tmp->winbuf, 0, 65536); memset(tmp->linkbuf, 0, 65536*sizeof(ushort)); memset(tmp->linkidx, 0, 4096*sizeof(ushort)); for(i=0; i<128; i++) { tmp->codebuf[i]=i; tmp->rcodebuf[i]=i; } for(; i<256; i++) { tmp->codebuf[i]=-1; tmp->rcodebuf[i]=-1; } return(tmp); } LZBSS_State *LZBSS_NewDecodeState() { LZBSS_State *tmp; int i; tmp=malloc(sizeof(LZBSS_State)); memset(tmp, 0, sizeof(LZBSS_State)); tmp->winbuf=malloc(65536); memset(tmp->winbuf, 0, 65536); for(i=0; i<256; i++) tmp->codebuf[i]=i; return(tmp); } int LZBSS_LookupString(LZBSS_State *state, int *offs, int *len) { int i, j, k, l, w, d; int bl, bi, bw; l=state->ibufe-state->ibuf; if(l>319)l=319; if(l<3)return(0); j=state->ibuf[0]^(state->ibuf[1]<<4); i=state->linkidx[j]; w=2; //max wraparounds bl=0; bi=0; bw=0; while(w) { d=state->winpos-i; if(d<0)d+=65536; if(d>=(65536-320))break; if(!d) { j=state->linkbuf[i]; if(j>=i)w--; i=j; continue; } for(j=0; jibuf[j]!=state->winbuf[(i+j)&65535]) break; k=j-2; if(d>255)k--; if(j>63)k--; if(k>bw) { bl=j; bi=i; bw=k; } j=state->linkbuf[i]; if(j>=i)w--; i=j; } if(bl<2)return(0); j=state->winpos-bi; if(j<0)j+=65536; *offs=j; *len=bl; return(1); } void LZBSS_UpdateIndexByte(LZBSS_State *state) { int i; // i=*state->ibuf; i=state->ibuf[0]^(state->ibuf[1]<<4); state->linkbuf[state->winpos]=state->linkidx[i]; state->linkidx[i]=state->winpos; } void LZBSS_UpdateIndexString(LZBSS_State *state, int l) { int i, j, k; for(i=0; iibuf[i]; j=state->ibuf[i+0]^(state->ibuf[i+1]<<4); k=(state->winpos+i)&65535; state->linkbuf[k]=state->linkidx[j]; state->linkidx[j]=k; } } void LZBSS_UpdateWindowByte(LZBSS_State *state) { LZBSS_UpdateIndexByte(state); state->winbuf[state->winpos]=*state->ibuf++; state->winpos=(state->winpos+1)&65535; } void LZBSS_UpdateWindowString(LZBSS_State *state, int l) { int i, j, k; for(i=0; iibuf; k=(state->winpos+i)&65535; state->winbuf[k]=j; j=state->ibuf[0]^(state->ibuf[1]<<4); state->linkbuf[k]=state->linkidx[j]; state->linkidx[j]=k; state->ibuf++; } state->winpos=(state->winpos+l)&65535; } //encode data using current state: //return value: 0=OK, 1=need input, 2=need output int LZBSS_EncodeState(LZBSS_State *state, int end) { int bi, bl, ec, en, gl, tl, tn; int i, j, k, l; en=0; gl=0; tl=0; tn=0; ec=0; while(1) { j=state->ibufe-state->ibuf; if((j<320) && (!end)) { ec=1; break; } if(!j)break; k=state->obufe-state->obuf; if(k<4) { ec=2; break; } //copy run into window bi=(320winbuf[(state->winpos+i)&65535]= state->ibuf[i]; } i=LZBSS_LookupString(state, &bi, &bl); // i=0; if(i) { if(bl>gl)gl=bl; tl+=bl; tn++; if(bl<64) { if(bi<256) { *state->obuf++=128+bl; *state->obuf++=bi; }else { *state->obuf++=192+bl; *state->obuf++=bi&0xFF; *state->obuf++=(bi>>8)&0xFF; } }else { if(bi<256) { *state->obuf++=129; *state->obuf++=bl-64; *state->obuf++=bi; }else { *state->obuf++=193; *state->obuf++=bl-64; *state->obuf++=bi&0xFF; *state->obuf++=(bi>>8)&0xFF; } } LZBSS_UpdateWindowString(state, bl); }else { j=state->codebuf[*state->ibuf]; if(j>=0) { j=(j-state->codebufpos)&0xFF; if(j<128) { *state->obuf++=j; LZBSS_UpdateWindowByte(state); continue; } } en++; j=state->codebufpos&0xFF; k=state->rcodebuf[j]; state->codebuf[k]=-1; state->rcodebuf[j]=-1; j=*state->ibuf; state->codebufpos++; k=(state->codebufpos+127)&0xFF; state->codebuf[j]=k; state->rcodebuf[k]=j; *state->obuf++=128; *state->obuf++=j; LZBSS_UpdateWindowByte(state); } } if(!tn)tn++; printf("escapes %d, pos %d, max len %d, avg len %d\n", en, state->codebufpos, gl, tl/tn); return(ec); } //encode data using current state: //return value: 0=OK, 1=need input, 2=need output, -1=stream error int LZBSS_DecodeState(LZBSS_State *state, int end) { int i, j, k, bi, bl; ushort si; while(1) { i=state->ibufe-state->ibuf; if(i<=8) { if(!end)return(1); if(!i)return(0); } if((state->obuf+320)>=state->obufe) return(2); i=*state->ibuf++; if(i>128) { if(i>=194) { bl=i-192; bi=*state->ibuf++; bi|=(*state->ibuf++)<<8; // bi=state->ibuf[0]|(state->ibuf[1]<<8); // state->ibuf+=2; }else if(i>=192) { if(i==193) { bl=(*state->ibuf++)+64; bi=*state->ibuf++; bi|=(*state->ibuf++)<<8; }else { return(-1); } }else if(i>=130) { bl=i-128; bi=*state->ibuf++; }else if(i==129) { bl=(*state->ibuf++)+64; bi=*state->ibuf++; } si=state->winpos-bi; while(bl--) { k=state->winbuf[si++]; state->winbuf[state->winpos++]=k; *state->obuf++=k; } }else { if(i==128) { j=*state->ibuf++; state->codebufpos++; state->codebuf[(byte) (state->codebufpos+127)]=j; *state->obuf++=j; state->winbuf[state->winpos++]=j; continue; } j=state->codebuf[(byte)(state->codebufpos+i)]; // j=state->codebuf[(state->codebufpos+i)&0xFF]; *state->obuf++=j; state->winbuf[state->winpos++]=j; } } return(0); } unsigned int PDRLELZ_DataAdler32(void *buf, int sz, unsigned int lcrc) { byte *s; int i, c, s1, s2; s=buf; s1=lcrc&0xFFFF; s2=(lcrc>>16)&0xFFFF; for(i=0; iibuf=ibuf; state->obuf=obuf; state->ibufe=ibuf+i; state->obufe=obuf+LZBSS_IOBUFSZ; crc=PDRLELZ_DataAdler32(ibuf, i, 1); sz=i; sz2=0; while(1) { i=LZBSS_EncodeState(state, eof); if(i==1) { j=state->ibufe-state->ibuf; if(j)memcpy(ibuf, state->ibuf, j); k=fread(ibuf+j, 1, LZBSS_IOBUFSZ-j, ifd); eof=(j+k)ibuf=ibuf; state->ibufe=ibuf+j+k; crc=PDRLELZ_DataAdler32(ibuf+j, k, crc); sz+=k; continue; } if(i==2) { j=state->obuf-obuf; fwrite(obuf, 1, j, ofd); sz2+=j; state->obuf=obuf; state->obufe=obuf+LZBSS_IOBUFSZ; continue; } if(!i)break; } j=state->obuf-obuf; if(j)fwrite(obuf, 1, j, ofd); sz2+=j; printf("input size %d csize %d crc %08X\n", sz, sz2, crc); printf("wpos %d cspos %d\n", state->winpos, state->codebufpos); ft=(clock()-t)/(float)CLOCKS_PER_SEC; printf("time %fs %fkB/s\n", ft, sz/(ft*1000)); return(0); } if(!strcmp(argv[1], "-d")) { ifd=fopen(argv[2], "rb"); ofd=fopen(argv[3], "wb"); state=LZBSS_NewDecodeState(); ibuf=malloc(LZBSS_IOBUFSZ); obuf=malloc(LZBSS_IOBUFSZ); tt=0; t=clock(); i=fread(ibuf, 1, LZBSS_IOBUFSZ, ifd); eof=iibuf=ibuf; state->obuf=obuf; state->ibufe=ibuf+i; state->obufe=obuf+LZBSS_IOBUFSZ; crc=1; sz=0; sz2=i; while(1) { ct=clock(); i=LZBSS_DecodeState(state, eof); tt+=clock()-ct; if(i==0)break; if(i==1) { if(eof)break; j=state->ibufe-state->ibuf; if(j)memcpy(ibuf, state->ibuf, j); k=fread(ibuf+j, 1, LZBSS_IOBUFSZ-j, ifd); eof=(j+k)ibuf=ibuf; state->ibufe=ibuf+j+k; sz2+=k; continue; } if(i==2) { j=state->obuf-obuf; fwrite(obuf, 1, j, ofd); crc=PDRLELZ_DataAdler32(obuf, j, crc); sz+=j; state->obuf=obuf; state->obufe=obuf+LZBSS_IOBUFSZ; continue; } if(!i)break; } j=state->obuf-obuf; if(j) { fwrite(obuf, 1, j, ofd); crc=PDRLELZ_DataAdler32(obuf, j, crc); sz+=j; } printf("output size %d csize %d crc %08X\n", sz, sz2, crc); printf("wpos %d cspos %d\n", state->winpos, state->codebufpos); ft=(clock()-t)/(float)CLOCKS_PER_SEC; ft1=tt/(float)CLOCKS_PER_SEC; printf("total: time %fs, %fkB/s\n", ft, sz/(ft*1000)); printf("core: time %fs, %fkB/s\n", ft1, sz/(ft1*1000)); } return(0); }