Jump to content
xisto Community
Sign in to follow this  
Anduril

Interested In Compression Algorithms? Here you have a cute code

Recommended Posts

That's a compressor i made during my university practical lessons on information theory, it's a text compressor, it's only usefull for texts containing only vocals, but it can easily be adapted for the whole alphabet ^^...

 

Good luck with the comment translation!! (Catalan and Spanish...)

 

 

#include <stdio.h>#include <math.h>#include <string.h>//En argv tenemos: (1)selector de compresion/descompresion (2)Tabla de frecuencias (3)Fichero de simbolos// (4)Fichero de salidaint treureDigitsCodificacio (int*,int*,FILE*,FILE*,int*,int*);void calcularAltBaix (int*,int*,int*,int,FILE*,FILE*,int*);int codifica(char *argv[]);int descodifica(char *argv[]);int treureDigitsDescodificacio (int*,int*,int*,FILE*,FILE*);void calcularSimbol(int*,int*,int*,int,int*,FILE*,FILE*);int main (int argc,char *argv[]){	int a;	if (strcmp (argv[1],"-c") == 0)	{		a=codifica(argv);	}	else if (strcmp(argv[1],"-d") == 0)	{		a=descodifica(argv);	}return a;}	int treureDigitsCodificacio (int *al,int *ba,FILE *f_out, FILE *f_control,int *ca,int *uf)	{		//procedemos a eliminar las cifras comunes		int cont=0;		int i=0;		while ((*al/10000) == (*ba/10000))		{						fprintf(f_out,"%d",(*al/10000));			if(*uf!=0)			{				if(*ca==*al/10000)//Afegim 0s				{					for(i=0;i<*uf;i++)					{						fprintf(f_out,"%d",0);					}					i=0;					*uf=0;				}				else//Afegim 9s				{					for(i=0;i<*uf;i++)					{						fprintf(f_out,"%d",9);					}					i=0;					*uf=0;				}			}			//printf("Afegirem %d al fitxer de sortida.\n",(*al/10000));			cont++;			*al = ((*al-((*al/10000)*10000))*10)+9;			*ba = ((*ba-((*ba/10000)*10000))*10);			//printf("El nou Alt es %d.\nEl nou Baix es %d.\n",*al,*ba);			fprintf (f_control,"Alt: %d, Baix: %d\n",*al,*ba);				}		if(((*al/10000)-(*ba/10000))==1)//Calculem underflow del nombre resultant		{			*ca=*al/10000;			int at,bt;			int segundo_a=(*al-(*al/10000)*10000)/1000;			int segundo_b=(*ba-(*ba/10000)*10000)/1000;			while(segundo_a == 0 && segundo_b == 9)			{				*uf= *uf +1;				at=(*al-((*al/10000)*10000))-(segundo_a*1000);				bt=(*ba-((*ba/10000)*10000))-(segundo_b*1000);				*al=(((*al/10000)*10000)+at*10)+9;				*ba=((*ba/10000)*10000)+(bt*10);				segundo_a=(*al-(*al/10000)*10000)/1000;				segundo_b=(*ba-(*ba/10000)*10000)/1000;			}		}		return cont;	}	void calcularAltBaix (int *ra, int *al, int *ba,int N, FILE *f_simbol, FILE *f_control,int v_F[])	{		int k;		char sim;		int alt = *al;		int baix = *ba;		int rang = *ra;		sim=fgetc(f_simbol);		if (sim == 'a') k=0;		else if (sim == 'e') k=1;		else if (sim == 'i') k=2;		else if (sim == 'o') k=3;		else if  (sim == 'u') k=4;		rang = alt - baix +1;		alt = (baix + (rang * v_F[k])/ N)  -1;		if(k!=0) baix = (baix + (rang * v_F[k-1])/ N);		//printf ("Com que el simbol es %c, k val: %d\n",sim, k);		*al = alt;		*ba = baix;		*ra = rang;		fprintf (f_control,"Caracter: %c, Alt: %d, Baix: %d\n",sim,*al,*ba);	}	int codifica(char *argv[])	{		FILE *f_taula;		FILE *f_simbol;		FILE *f_out;		FILE *f_control;		f_taula = fopen (argv[2], "r");		f_simbol = fopen (argv[3], "r");		f_out = fopen (argv[4], "w");		f_control = fopen ("code.txt","w");		int alt = 99999;		int baix = 0;		int rang;		char simbol;		int cont=0;		int underflow=0,control_alt;	/*Obrir fitxers*/		simbol = fgetc(f_simbol);		/*Comprovacio de validesa del fitxer d'entrada:*/		while (simbol!=EOF)		{			cont++;		if (!(simbol == 'a' || simbol == 'e' || simbol == 'i' || simbol == 'o' || simbol == 	'u'))			{				//printf ("Fitxer d'entrada erroni, simbol %c incorrecte\n",simbol);				fclose(f_simbol);				return -1;			}				simbol = fgetc(f_simbol);		}//printf ("\n%d\n%d\n",alt,baix);		rewind (f_simbol);		//printf("Tenim un total de %d simbols per codificar.",cont);		//Escrivim Capçalera		fprintf(f_out,"TI2005-06\n");		fprintf(f_out,"%d\n",cont);		fflush(f_out);		/*Fi de la comprovacio*/		/*Tractament de la taula de freqĂÂźencies*/		int v_taula[5];		fscanf (f_taula,"%d",&v_taula[0]);		int i = 1;		while (!feof(f_taula))		{			fscanf (f_taula,"%d",&v_taula[i]);			i++;		}		/*Tractament del vector taula*/		int v_F [5];		//int v_P [5];		v_F [0]= v_taula [0];		for (i=1;i<5;i++)		{			v_F [i] = v_F [i-1] + v_taula [i];		}		int N = v_F [4];		int caracters_codificats = 0;		int ind;		for(ind = 0; ind<cont; ind++)		{			//printf("\n----------------Simbol %d-------------------\n",(cont-ind));			calcularAltBaix (〉, &alt, &baix,N, f_simbol,f_control, v_F);			//printf ("\n%d\n%d\n",alt,baix);			//procedemos a eliminar las cifras comunescaracters_codificats = caracters_codificats + treureDigitsCodificacio(&alt,&baix,f_out,f_control,&control_alt,&underflow);		}		//Introduim al fitxer de sortida la xifra mes significativa de l'ultim baix.		fprintf(f_out,"%d",(baix/10000));		//cal introduir-la al fitxer control?		caracters_codificats++;		//printf("Hem codificat %d caracters.",caracters_codificats);		//printf("Tancant fitxers...");		fclose (f_taula);		fclose (f_simbol);		fclose (f_control);		float RaoCompressioTeorica = (cont * 8) /( (caracters_codificats) * (log (10) / log (2)));		printf ("Rao de Compressio Teorica: %f\n",RaoCompressioTeorica);		fclose (f_out);		return 0;	}//////////////////////////////////////////////////////////////////////////////////////////////	int descodifica(char *argv[])	{		FILE *f_taula;		FILE *f_out;		FILE *f_control;		FILE *f_desc;		f_taula = fopen (argv[2], "r");		f_desc = fopen (argv[3], "r");		f_out = fopen (argv[4], "w");		f_control = fopen ("decode.txt","w");		int alt = 99999;		int baix = 0;		int valor=0;		int valor_prov;		char texto[10];		int num_caracteres;		int i;		/*Tractament de la taula de freqĂÂźencies*/		int v_taula[5];		fscanf (f_taula,"%d",&v_taula[0]);		i = 1;		while (!feof(f_taula))		{			fscanf (f_taula,"%d",&v_taula[i]);			i++;		}		/*Tractament del vector taula*/		int v_F [5];		//int v_P [5];		v_F [0]= v_taula [0];		printf("%d\n",v_F[0]);		for (i=1;i<5;i++)		{			v_F [i] = v_F [i-1] + v_taula [i];			printf("%d\n",v_F[i]);		}		int N = v_F [4];		fscanf(f_desc,"%s",texto);		if (strcmp (texto,"TI2005-06")==0)		{			fscanf(f_desc,"%d",&num_caracteres);			printf("el num de vueltas es: %d\n",num_caracteres);		}		//inicialitzem valor		valor_prov = getc(f_desc);//ens saltem el salt de linia		valor = 0;		for (i = 0; i<5 ; i++)		{			if ((valor_prov = getc(f_desc))!= -1)			{				valor = (valor*10)+(valor_prov -48);			}			else valor = (valor*10)+9;		}		printf("!!!!!!!!!!!!!!!!!!!!!!!!! ----------------> %d",valor);		for (i=0;i<num_caracteres;i++)		{		//Calculem el simbol de sortida//recalculem alt,baix i valor		calcularSimbol(&alt,&baix,&valor,N,v_F,f_control,f_out);		//treiem els digits iguals.		treureDigitsDescodificacio(&alt,&baix,&valor,f_control,f_desc);		}		fclose(f_desc);		fclose(f_control);		fclose(f_out);		fclose(f_taula);	return 0;	}////////////////////////////////////////////////////////////	int treureDigitsDescodificacio (int *al,int *ba,int *val,FILE *f_control,FILE *f_desc)	{		//procedemos a eliminar las cifras comunes		int cont=0;		int prob = 0;		while ((*al/10000) == (*ba/10000))		{			//fprintf(f_out,"%d",(*al/10000));			//printf("Afegirem %d al fitxer de sortida.\n",(*al/10000));			cont++;			*al = ((*al-((*al/10000)*10000))*10)+9;			*ba = ((*ba-((*ba/10000)*10000))*10);			if ((prob = getc(f_desc))!=-1)			{				*val = ((*val-((*val/10000)*10000))*10)+(prob-48);			}			else	*val = ((*val-((*val/10000)*10000))*10)+9;			fprintf(f_control,"El nou Alt es %d.\tEl nou Baix es %d.\t. Valor %d\n",*al,*ba,*val);		}		if(((*al/10000)-(*ba/10000))==1)//Calculem underflow del nombre resultant		{			int at,bt,vt;			int segundo_a=(*al-(*al/10000)*10000)/1000;			int segundo_b=(*ba-(*ba/10000)*10000)/1000;			int segundo_v=(*val-(*val/10000)*10000)/1000;			while(segundo_a == 0 && segundo_b == 9)			{				at=(*al-((*al/10000)*10000))-(segundo_a*1000);				bt=(*ba-((*ba/10000)*10000))-(segundo_b*1000);				vt=(*val-((*val/10000)*10000))-(segundo_v*1000);				*al=(((*al/10000)*10000)+at*10)+9;				*ba=((*ba/10000)*10000)+(bt*10);				*val=((*val/10000)*10000)+(vt*10);				if ((prob = getc(f_desc))!=-1)				{					*val =*val+(prob-48);				}				else	*val =*val+9;				segundo_a=(*al-(*al/10000)*10000)/1000;				segundo_b=(*ba-(*ba/10000)*10000)/1000;				segundo_v=(*val-(*val/10000)*10000)/1000;			}		}		return cont;	}////////////////////////////////////////////////////////////////////////////////	void calcularSimbol(int *al,int *ba,int *val,int N,int v_F[],FILE *f_control,FILE *f_out)	{		char simbol;		int rang = *al-*ba+1;		int i=0;		while( !(((*val-*ba+1)*v_F[4]) < (v_F[i]*rang)) )		{			i++;		}		if (i==0)simbol='a';		else if (i==1)simbol='e';		else if (i==2)simbol='i';		else if (i==3)simbol='o';		else if (i==4)simbol='u';		printf("------------%d",i);		fprintf (f_control,"Alt: %d, Baix: %d, Valor: %d,Simbol: %c.\n",*al,*ba,*val,simbol);		fprintf (f_out,"%c",simbol);		//actualitzem alt i baix		*al = (*ba + (rang * v_F[i])/ N)  -1;		if(i!=0) *ba = (*ba + (rang * v_F[i-1])/ N);	}

Oh... i almost forgot, it only works on Linux and Unix systems...

 

 

Notice from miCRoSCoPiC^eaRthLinG:
Once again, you lost a massive amount of credits, as you posted such a huge block of code without putting it in CODE or CODEBOX tags. Be VERY careful in future.

 

Reducing Hosting credits worth 30 days


Edited by miCRoSCoPiC^eaRthLinG (see edit history)

Share this post


Link to post
Share on other sites

I can't see what we have to do with this, you haven't told us what compression algorithm you've applied, huffman probably?

Also, the comments are in some language !english, which 99% of the people around here won't get and there isn't enough comment.

Good luck with the comment translation!! (Catalan and Spanish...)


If you post a new topic with only code, at least proper (re)comment it so at least we can read it normally.

Share this post


Link to post
Share on other sites

I even tried a using the Google Translator but I gave up.Anyway, it may be a bit of off topic, but do any of you guys have links or information on the algorithms used for compression. I have been rummaging over things and the net trying to get some information on the same.

Share this post


Link to post
Share on other sites

You might want to take a look at SharpZLib at http://www.icsharpcode.net/OpenSource/Sharib/Default.aspx

 

This isn't about alogorithms - but this site offers you a free opensource library that supports Zip, GZip, Tar and BZip2 compressions and is compatible with the .NET framework. It's meant to be used with C#.

 

Looking into the actual code might help you understand the various algorithms - and trying to implement and use them with your code will give you an even better understanding.

Share this post


Link to post
Share on other sites
Guest
This topic is now closed to further replies.
Sign in to follow this  

×
×
  • Create New...

Important Information

Terms of Use | Privacy Policy | Guidelines | We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.