try this Effective gif/image color quantization? its also histogram color quantization based, very similar to your approach but it create the histogram from 15 bit colors directly to spare space and do not use bins instead it sort colors by occurrence and use min distance to already used colors in palette thresholding to avoid almost duplicate colors... I developed it for my GIF encoder lib some years back...
If I take this as input (converted to jpg):

And use mine algo on it without dithering I got this result:

With dithering enabled I got this result:

as you can see on the cat ear the dithering is much better but even without dithering the result is way better than yours.
However over the years the palette computation code evolves a bit (from the one posted in linked answer) into this (C++):
void gif::compute_palette0()
{
int x,y,r0,g0,b0,r,g,b,a,aa,c,e,hists;
DWORD i,i0,cc;
union { DWORD dd; BYTE db[4]; } c0,c1;
DWORD his[32768];
DWORD idx[32768];
// 15bit histogram
for (x=0;x<32768;x++) { his[x]=0; idx[x]=x; }
for (y=0;y<ys;y++)
for (x=0;x<xs;x++)
{
cc=pic.pyx[y][x];
cc=((cc>>3)&0x1F)|((cc>>6)&0x3E0)|((cc>>9)&0x7C00);
if (his[cc]<0xFFFFFFFF) his[cc]++;
}
// add RGB shades combinations for dithering
if (_dither)
{
x=xs*ys; // max possible count to make as start colors in palette
for (r=0;r<32;r+=10)
for (g=0;g<32;g+=10)
for (b=0;b<32;b+=10,x++)
his[(r<<10)|(g<<5)|( b)]=x;
}
// set recolor as unused
for (r0=0;r0<32;r0++)
for (g0=0;g0<32;g0++)
for (b0=0;b0<32;b0++)
recolor[r0][g0][b0]=255;
// remove zeroes
for (x=0,y=0;y<32768;y++)
{
his[x]=his[y];
idx[x]=idx[y];
if (his[x]) x++;
} hists=x;
// sort by hist
for (i=1,e=hists;i;e--)
for (i=0,x=0,y=1;y<e;x++,y++)
if (his[x]<his[y])
{
i=his[x]; his[x]=his[y]; his[y]=i;
i=idx[x]; idx[x]=idx[y]; idx[y]=i; i=1;
}
// set lcolor color palete
for (i0=0,x=0;x<hists;x++) // main colors
{
cc=idx[x];
b= cc &31;
g=(cc>> 5)&31;
r=(cc>>10)&31;
c0.db[0]=b;
c0.db[1]=g;
c0.db[2]=r;
c0.dd=(c0.dd<<3)&0x00F8F8F8;
// skip if similar color already in lcolor[]
for (a=0,i=0;i<i0;i++)
{
c1.dd=lcolor[i];
aa=int(BYTE(c1.db[0]))-int(BYTE(c0.db[0])); if (aa<=0) aa=-aa; a =aa;
aa=int(BYTE(c1.db[1]))-int(BYTE(c0.db[1])); if (aa<=0) aa=-aa; a+=aa;
aa=int(BYTE(c1.db[2]))-int(BYTE(c0.db[2])); if (aa<=0) aa=-aa; a+=aa;
if (a<=16) { a=1; break; } a=0; // *** treshold ***
}
if (a) recolor[r][g][b]=i;
else{
recolor[r][g][b]=i0;
lcolor[i0]=c0.dd; i0++;
if (i0>=DWORD(lcolors)) { x++; break; }
}
} // i0 = new color table size
for (;x<hists;x++) // minor colors
{
cc=idx[x];
b= cc &31;
g=(cc>> 5)&31;
r=(cc>>10)&31;
c0.db[0]=b;
c0.db[1]=g;
c0.db[2]=r;
c0.dd=(c0.dd<<3)&0x00F8F8F8;
// find closest color
int dc=-1; DWORD ii=0;
for (a=0,i=0;i<i0;i++)
{
c1.dd=lcolor[i];
aa=int(BYTE(c1.db[0]))-int(BYTE(c0.db[0])); if (aa<=0) aa=-aa; a =aa;
aa=int(BYTE(c1.db[1]))-int(BYTE(c0.db[1])); if (aa<=0) aa=-aa; a+=aa;
aa=int(BYTE(c1.db[2]))-int(BYTE(c0.db[2])); if (aa<=0) aa=-aa; a+=aa;
if ((dc<0)||(dc>a)) { dc=a; ii=i; }
}
recolor[r][g][b]=ii;
}
encode_palette_compute(true);
if ((frame)&&(hists<lcolors))
for (lcolor_bits=1,lcolors=1<<lcolor_bits;lcolors<hists;lcolors<<=1,lcolor_bits++);
// compute recolor for 16 base colors for all yet unused colors
for (r0=0;r0<32;r0++)
for (g0=0;g0<32;g0++)
for (b0=0;b0<32;b0++)
if (recolor[r0][g0][b0]==255)
{
// find closest color
for (i=0,c=-1;i<16;i++)
{
c0.dd=lcolor[i];
b=WORD(c0.db[0])>>3;
g=WORD(c0.db[1])>>3;
r=WORD(c0.db[2])>>3;
a=(r-r0); aa =a*a;
a=(g-g0); aa+=a*a;
a=(b-b0); aa+=a*a;
if ((c<0)||(e>aa)) { e=aa; c=i; }
}
recolor[r0][g0][b0]=c;
}
}
Where my gif
class looks like this (so you can extract config and used variables...):
class gif
{
public:
// IO interface
file_cache<4<<20> fi,fo; // file cache
BYTE dat[256]; // internal buffer 256 Bytes needed
// Performance counter
double Tms,tms,tdec,tenc; // perioda citaca [ms], zmerany cas [ms],cas encodovania [ms]
void tbeg(); // zaciatok merania
void tend(); // koniec merania
// image data
gif_frame32 pic,pic0; // actual and restore to 32bit frames
gif_frame8 pic1; // 8bit input conversion frame
int xs,ys; // resolution
int *py; // interlace table
// colors (colors are computed from color_bits)
DWORD gcolor[256]; //hdr
DWORD lcolor[256]; //img
BYTE recolor[32][32][32]; //encode reduce color table
int scolors,scolor_bits; //hdr screen color depth
int gcolors,gcolor_bits; //hdr global pallete
int lcolors,lcolor_bits; //img/hdr local palette
// info
bool _89a; //hdr extensions present?
bool _interlaced; //img interlaced frame?
bool _gcolor_table; //hdr
bool _gcolor_sorted; //hdr
bool _lcolor_table; //img local palette present?
bool _lcolor_sorted; //img local palette colors sorted?
int cpf,cpf_error; //clears per frame counter,clear_errors total
// debug
bool _draw_palette; //draw pallete?
// animation
int frame,disposal; // frame ix,disposal of frame
double t,tn; // animation time,next frame time
// encode config
int _force_disposal; // -1 or forced disposal
bool _precomputed_palette; // if true recolor and global palete is already set before encoding
bool _dither; // dither colors?
// inter thread comm
volatile bool _image_copied; // flag that source image is not needed anymore while encoding
// temp dictionary for dec/enc
gif_str dict[_gif_maxdecode];
DWORD dicts,code_clr,code_end,code_min;
// temp dictionary speed up tables (encoding)
WORD dtab[256][_gif_maxencode],dnum[256],dmask[256]; // dtab[i][dnum[i]] all dictionary codes (sorted by code) starting with i for encode speed up, 1<<dmask[i]<=dnum[i]
#pragma pack(1)
struct __hdr
{
// Header
BYTE Signature[3]; /* Header Signature (always "GIF") */
BYTE Version[3]; /* GIF format version("87a" or "89a") */
// Logical Screen Descriptor
WORD xs;
WORD ys;
BYTE Packed; /* Screen and Color Map Information */
BYTE BackgroundColor; /* Background Color Index */
BYTE AspectRatio; /* Pixel Aspect Ratio */
__hdr(){}; __hdr(__hdr& a){ *this=a; }; ~__hdr(){}; __hdr* operator = (const __hdr *a) { *this=*a; return this; }; /*__hdr* operator = (const __hdr &a) { ...copy... return this; };*/
};
struct _hdr:__hdr
{
DWORD adr,siz;
_hdr(){}; _hdr(_hdr& a){ *this=a; }; ~_hdr(){}; _hdr* operator = (const _hdr *a) { *this=*a; return this; }; /*_hdr* operator = (const _hdr &a) { ...copy... return this; };*/
} hdr;
struct __img
{
// Logical Image Descriptor
BYTE Separator; /* Image Descriptor identifier 0x2C */
WORD x0; /* X position of image on the display */
WORD y0; /* Y position of image on the display */
WORD xs; /* Width of the image in pixels */
WORD ys; /* Height of the image in pixels */
BYTE Packed; /* Image and Color Table Data Information */
__img(){}; __img(__img& a){ *this=a; }; ~__img(){}; __img* operator = (const __img *a) { *this=*a; return this; }; /*__img* operator = (const __img &a) { ...copy... return this; };*/
};
struct _img:__img
{
DWORD adr,siz;
_img(){}; _img(_img& a){ *this=a; }; ~_img(){}; _img* operator = (const _img *a) { *this=*a; return this; }; /*_img* operator = (const _img &a) { ...copy... return this; };*/
} img;
struct __gfxext
{
BYTE Introducer; /* Extension Introducer (always 21h) */
BYTE Label; /* Graphic Control Label (always F9h) */
BYTE BlockSize; /* Size of remaining fields (always 04h) */
BYTE Packed; /* Method of graphics disposal to use */
WORD DelayTime; /* Hundredths of seconds to wait */
BYTE ColorIndex; /* Transparent Color Index */
BYTE Terminator; /* Block Terminator (always 0) */
__gfxext(){}; __gfxext(__gfxext& a){ *this=a; }; ~__gfxext(){}; __gfxext* operator = (const __gfxext *a) { *this=*a; return this; }; /*__gfxext* operator = (const __gfxext &a) { ...copy... return this; };*/
};
struct _gfxext:__gfxext
{
DWORD adr,siz;
_gfxext(){}; _gfxext(_gfxext& a){ *this=a; }; ~_gfxext(){}; _gfxext* operator = (const _gfxext *a) { *this=*a; return this; }; /*_gfxext* operator = (const _gfxext &a) { ...copy... return this; };*/
} gfxext;
struct __txtext
{
BYTE Introducer; /* Extension Introducer (always 21h) */
BYTE Label; /* Extension Label (always 01h) */
BYTE BlockSize; /* Size of Extension Block (always 0Ch) */
WORD TextGridLeft; /* X position of text grid in pixels */
WORD TextGridTop; /* Y position of text grid in pixels */
WORD TextGridWidth; /* Width of the text grid in pixels */
WORD TextGridHeight; /* Height of the text grid in pixels */
BYTE CellWidth; /* Width of a grid cell in pixels */
BYTE CellHeight; /* Height of a grid cell in pixels */
BYTE TextFgColorIndex; /* Text foreground color index value */
BYTE TextBgColorIndex; /* Text background color index value */
// BYTE *PlainTextData; /* The Plain Text data */
// BYTE Terminator; /* Block Terminator (always 0) */
__txtext(){}; __txtext(__txtext& a){ *this=a; }; ~__txtext(){}; __txtext* operator = (const __txtext *a) { *this=*a; return this; }; /*__txtext* operator = (const __txtext &a) { ...copy... return this; };*/
};
struct _txtext:__txtext
{
DWORD adr,siz;
AnsiString dat;
_txtext(){}; _txtext(_txtext& a){ *this=a; }; ~_txtext(){}; _txtext* operator = (const _txtext *a) { *this=*a; return this; }; /*_txtext* operator = (const _txtext &a) { ...copy... return this; };*/
};
List<_txtext> txtext;
struct __remext
{
BYTE Introducer; /* Extension Introducer (always 21h) */
BYTE Label; /* Comment Label (always FEh) */
// BYTE *CommentData; /* Pointer to Comment Data sub-blocks */
// BYTE Terminator; /* Block Terminator (always 0) */
__remext(){}; __remext(__remext& a){ *this=a; }; ~__remext(){}; __remext* operator = (const __remext *a) { *this=*a; return this; }; /*__remext* operator = (const __remext &a) { ...copy... return this; };*/
};
struct _remext:__remext
{
DWORD adr,siz;
AnsiString dat;
_remext(){}; _remext(_remext& a){ *this=a; }; ~_remext(){}; _remext* operator = (const _remext *a) { *this=*a; return this; }; /*_remext* operator = (const _remext &a) { ...copy... return this; };*/
};
List<_remext> remext;
struct __appext
{
BYTE Introducer; /* Extension Introducer (always 21h) */
BYTE Label; /* Extension Label (always FFh) */
BYTE BlockSize; /* Size of Extension Block (always 0Bh) */
CHAR Identifier[8]; /* Application Identifier */
BYTE AuthentCode[3]; /* Application Authentication Code */
// BYTE *ApplicationData; /* Point to Application Data sub-blocks */
// BYTE Terminator; /* Block Terminator (always 0) */
__appext(){}; __appext(__appext& a){ *this=a; }; ~__appext(){}; __appext* operator = (const __appext *a) { *this=*a; return this; }; /*__appext* operator = (const __appext &a) { ...copy... return this; };*/
};
struct _appext:__appext
{
DWORD adr,siz;
AnsiString dat;
_appext(){}; _appext(_appext& a){ *this=a; }; ~_appext(){}; _appext* operator = (const _appext *a) { *this=*a; return this; }; /*_appext* operator = (const _appext &a) { ...copy... return this; };*/
};
List<_appext> appext;
#pragma pack()
gif();
gif(gif& a);
~gif();
gif* operator = (const gif *a);
//gif* operator = (const gif &a);
void _resize(int _xs,int _ys); // resize buffers
void load_beg(AnsiString filename); // open GIF file for decoding
void decode(int _ignore_delay=0); // decode frame from GIF, if _ignore_delay then ignore realtime
void load_end(); // close GIF file
void save_beg(AnsiString filename); // create new GIF file for encoding
void compute_palette0(); // compute palette from frame method 0
void compute_palette1(); // compute palette from frame method 1
void encode_palette_RGB256(); // set RGB combinations as 256 color palette as predefined global only palette
void encode_palette_VGA256(); // set default 256 color VGA palette as predefined global only palette
void encode_palette_compute(bool _local); // compute recolor[][][] from palette
void encode(const gif_frame32 &src,int dt=0); // encode frame to GIF , dt is delay in [ms] instead of realtime in range <10 .. 655350> [ms]
// void encode(int dst_xs,int dst_ys,TCanvas *src,int src_x0,int src_y0,int src_x1,int src_y1,int dt=0); // encode frame to GIF , dt is delay in [ms] instead of realtime in range <10 .. 655350> [ms]
void save_end(); // finalize and close GIF file
void draw_info(int x,int y,TCanvas *can);
void draw_info(int x,int y,Graphics::TBitmap *bmp);
void configure(gif &src); // set all encoding variables from src (for multithreaded encoding)
};