#include <malloc.h>
#include <stdio.h>

typedef unsigned char Uint8;
typedef long          Sint32;

#define C_WIDTH   3  /* Component width (e.g. 3 for RGB, 4 for RGBA) */
#define C0_INDEX  0  /* Index of first component (e.g. 0 for RGBA or RGB, 1 for ARGB) */
#define C1_INDEX  1  /* Index of second component (e.g. 1 for RGBA or RGB, 2 for ARGB) */
#define C2_INDEX  2  /* Index of third component (e.g. 2 for RGBA or RGB, 3 for ARGB) */

#define C0_BITS   5
#define C1_BITS   6
#define C2_BITS   5

#define C_MASK(bits) ((0xFF00>>bits)&0xFF)
#define C_ADD(bits)  ((1<<(8-bits))>>1)

#define DATA_ROW_BYTE_WIDTH(image_width) ((image_width*C_WIDTH+3)&~3) /* Row alignment, e.g. ((width+3)&3) for double word alignment */

/* Dither applies Floyd-Steinberg dithering on the image data. pInData
 * and pOutData can point to the same memory block
 */

void Dither (Uint8* pInData, Uint8* pOutData, int width, int height) {
   Uint8  clampTab[3 * 256]; 
   Sint32 errorClampTab[2 * 256];

   {  int i;
      for (i = 0; i < 256; i++) {
         clampTab[i] = 0;                         
         clampTab[i + 256] = (Uint8)i;          
         clampTab[i + 2 * 256] = 255;
         }
      }

   {  Sint32 clampVal = 0;
      int    clampIdx = 0;
      for (; clampIdx < 256 / 16; clampIdx++) {
         errorClampTab[clampIdx + 256] = clampVal; 
         errorClampTab[-clampIdx + 256] = -clampVal;
         clampVal++;
         }   
      for (; clampIdx < 3 * 256 / 16; clampIdx++) {
         errorClampTab[clampIdx + 256] = clampVal; 
         errorClampTab[-clampIdx + 256] = -clampVal;
         clampVal += clampIdx & 1;
         }
      for (; clampIdx < 256; clampIdx++) {
         errorClampTab[clampIdx + 256] = clampVal;
         errorClampTab[-clampIdx + 256] = -clampVal;
         }
      }

   {  Sint32* floydSteinbergError = (Sint32*)calloc ((width + 2) * 3, sizeof (Sint32)); /* Memory must be cleared to zero! Use calloc or clear buffer with code! */
      int   dirC = C_WIDTH;
      int   dirE = 3;
      int   row  = height;
      
      while (row--) {
         Uint8* pIn  = pInData  + DATA_ROW_BYTE_WIDTH (width) * row;
         Uint8* pOut = pOutData + DATA_ROW_BYTE_WIDTH (width) * row;
         Sint32* pError = floydSteinbergError;

         Sint32 c0 = 0;
         Sint32 c1 = 0;
         Sint32 c2 = 0;
         Sint32 errBelow0 = 0;
         Sint32 errBelow1 = 0;
         Sint32 errBelow2 = 0;
         Sint32 errPrev0 = 0;
         Sint32 errPrev1 = 0;
         Sint32 errPrev2 = 0;
         int  col = width;

         if (dirC < 0) {
            pIn    += C_WIDTH * (width - 1);
            pOut   += C_WIDTH * (width - 1);
            pError += C_WIDTH * (width + 1);
            }
         
         while (col--) {
            c0 = (c0 + pError[dirC + 0] + 8) >> 4;
            c1 = (c1 + pError[dirC + 1] + 8) >> 4;
            c2 = (c2 + pError[dirC + 2] + 8) >> 4;
            c0 = errorClampTab[c0 + 256];
            c1 = errorClampTab[c1 + 256];
            c2 = errorClampTab[c2 + 256];
            c0 += pIn[C0_INDEX];  
            c1 += pIn[C1_INDEX];
            c2 += pIn[C2_INDEX];
            c0 += C_ADD(C0_BITS);
            c1 += C_ADD(C1_BITS);
            c2 += C_ADD(C2_BITS);
            c0 = clampTab[c0 + 256];
            c1 = clampTab[c1 + 256];
            c2 = clampTab[c2 + 256];

            c0 -= (pOut[C0_INDEX] = (c0 & C_MASK(C0_BITS)));
            c1 -= (pOut[C1_INDEX] = (c1 & C_MASK(C1_BITS)));
            c2 -= (pOut[C2_INDEX] = (c2 & C_MASK(C2_BITS)));

            {
            Sint32 errNext = c0;          /* Process component 0 */
            Sint32 delta = c0 * 2;
            c0 += delta;                 /* form error * 3 */
            pError[0] = errPrev0 + c0;
            c0 += delta;                 /* form error * 5 */
            errPrev0 = errBelow0 + c0;
            errBelow0 = errNext;
            c0 += delta;                 /* form error * 7 */
            }

            {
            Sint32 errNext = c1;          /* Process component 1 */
            Sint32 delta = c1 * 2;
            c1 += delta;                 /* form error * 3 */
            pError[1] = errPrev1 + c1;   
            c1 += delta;                 /* form error * 5 */
            errPrev1 = errBelow1 + c1;
            errBelow1 = errNext;
            c1 += delta;                 /* form error * 7 */
            }

            {
            Sint32 errNext = c2;          /* Process component 2 */
            Sint32 delta = c2 * 2;
            c2 += delta;                 /* form error * 3 */
            pError[2] = errPrev2 + c2;
            c2 += delta;                 /* form error * 5 */
            errPrev2 = errBelow2 + c2;
            errBelow2 = errNext;
            c2 += delta;                 /* form error * 7 */
            }

            pIn    += dirC;
            pOut   += dirC;
            pError += dirE;
            }
         pError[0] = errPrev0;
         pError[1] = errPrev1;
         pError[2] = errPrev2;

         dirC = -dirC;
         dirE = -dirE;
         }

      free ((void*)floydSteinbergError);
      }
   }

/* Truncate cuts off bits to create image data that simulates the
 * truncation done by the display */

void Truncate (Uint8* pInData, Uint8* pOutData, int width, int height) {
   int row  = height;
   while (row--) {
      Uint8* pIn  = pInData  + DATA_ROW_BYTE_WIDTH (width) * row;
      Uint8* pOut = pOutData + DATA_ROW_BYTE_WIDTH (width) * row;
      int col = width;
      while (col--) {          
         pOut[C0_INDEX] = pIn[C0_INDEX] & C_MASK(C0_BITS);
         pOut[C1_INDEX] = pIn[C1_INDEX] & C_MASK(C1_BITS);
         pOut[C2_INDEX] = pIn[C2_INDEX] & C_MASK(C2_BITS);
         pIn  += C_WIDTH;
         pOut += C_WIDTH;
         }
      }
   }   

/* Read a raw PBMPlus (PPM/P6) file */

void ReadPPM (const char* filename, Uint8** pData, int* width, int* height) {
   char linebuffer[256] = { '\0' };
   int  step = 0, levels = 0, row, col;
   FILE* f = fopen (filename, "rb");
   if (f == NULL) {
      printf ("Error opening file %s for reading\n", filename); exit (-1);
      }
   while (step < 3) {
      if (fgets (&linebuffer[0], sizeof (linebuffer) - 1, f) == NULL) {
         printf ("Unexpected end of file %s\n", filename); exit (-1);
         }
      if (linebuffer[0] != '#') {
         switch (step) {
            case 0:
               if ((linebuffer[0] != 'P') || (linebuffer[1] != '6')) {
                  printf ("File %s does not appear to be a raw PPM file (P6)\n", filename); exit (-1);
                  }
               step++;
               break;
            case 1:
               if ((sscanf (linebuffer, "%d %d", width, height) != 2) || (*width <= 0) || (*height <= 0)) {
                  printf ("Unable to determine width and height of file %s\n", filename); exit (-1);
                  }                               
               step++;
               break;
            case 2:
               if ((sscanf (linebuffer, "%d", &levels) != 1) || (levels > 255)) {
                  printf ("Unsupported file %s. Levels=%d\n", filename, levels); exit (-1);
                  }                                             
               step++;
               break;
            }
         }
      }
   *pData = (Uint8*)calloc (*width * *height, C_WIDTH);
   if (*pData == NULL) {
      printf ("Could not allocate memory for file %s data buffer\n", filename); exit (-1);
      }
   for (row = 0; row < *height; row++) {
      Uint8* p = *pData + DATA_ROW_BYTE_WIDTH (*width) * row;
      for (col = 0; col < *width; col++) {
         int c0, c1, c2;
         if (((c0 = fgetc (f)) < 0) || ((c1 = fgetc (f)) < 0) || ((c2 = fgetc (f)) < 0)) {
            printf ("Unexpected end of file %s\n", filename); exit (-1);
            }
         p[C0_INDEX] = (Uint8)c0;
         p[C1_INDEX] = (Uint8)c1;
         p[C2_INDEX] = (Uint8)c2;
         p += C_WIDTH;
         }
      }
   fclose (f);
   }
                          
/* Write a raw PBMPlus (PPM/P6) file */

void WritePPM (const char* filename, Uint8* pData, int width, int height) {
   int row, col;
   FILE* f = fopen (filename, "wb");
   if (f == NULL) {
      printf ("Error opening file %s for writing\n", filename); exit (-1);
      }
   if (fprintf (f, "P6\n%d %d\n255\n", width, height) <= 0) {
      printf ("Error writing file %s\n", filename); exit (-1);
      }
   for (row = 0; row < height; row++) {
      Uint8* p = pData + DATA_ROW_BYTE_WIDTH (width) * row;
      for (col = 0; col < width; col++) {
         if ((fputc (p[C0_INDEX], f) < 0) || (fputc (p[C1_INDEX], f) < 0) || (fputc (p[C2_INDEX], f) < 0)) {
            printf ("Error writing file %s\n", filename); exit (-1);
            }
         p += C_WIDTH;
         }            
      }
   fclose (f);
   }
    
void DitherTest (void) {
   Uint8* data = NULL;
   int width = 0, height = 0;
   ReadPPM ("input888.ppm", &data, &width, &height);
   Dither (data, data, width, height);
   WritePPM ("dither565.ppm", data, width, height);
   free ((void*)data);
   }

void TruncTest (void) {
   Uint8* data = NULL;
   int width = 0, height = 0;
   ReadPPM ("input888.ppm", &data, &width, &height);
   Truncate (data, data, width, height);
   WritePPM ("trunc565.ppm", data, width, height);
   free ((void*)data);
   }


void main (void) {
   DitherTest ();
   TruncTest ();
   }