So lets test the intersection with grid lines computed on floats
(as I suggested in comments). I would start with dividing the circle into 4 sections ("quadrants") like this:

In each section only one axis is major (has always bigger or equal difference to the other one) so we can iterate it directly by incrementing (without creating holes in curve). The minor axis is then computed using circles equation:
y = sqrt( r*r - (x-x0)*(x-x0) )
x = sqrt( r*r - (y-y0)*(y-y0) )
so simply loop the major axis in order by 4x (not nested) for
loops and add the points. However whenever the minor axis computed is not directly on the grid we need to add 2 points where the second point has decremented major axis. This can cause duplicate points so to avoid duplicates we should also check if added point is not already added to the end of the list. At the end of last section we should also check against the start of the list (or remove last few duplicate points) I think there would be up to 4 points like this max.
When put all this together in C++/VCL it look like this:
//---------------------------------------------------------------------------
// global grid
float gx0=0.0; // cell origin
float gy0=0.0;
float gxs=30.0; // cell size
float gys=20.0;
//---------------------------------------------------------------------------
// grid -> screen | screen -> grid
// sx = gx0 + gx*gxs | gx = (sx-gx0)/gxs
// sy = gy0 + gy*gys | gy = (sy-gy0)/gys
//---------------------------------------------------------------------------
void draw_cell(TCanvas *scr,int x,int y,DWORD col) // screen, cell, color
{
x=floor(gx0+(float(x)*gxs));
y=floor(gy0+(float(y)*gys));
scr->Pen->Color=clDkGray;
scr->Brush->Color=TColor(col);
scr->Rectangle(x,y,x+gxs,y+gys);
}
//---------------------------------------------------------------------------
void draw_grid(TCanvas *scr,int xs,int ys,DWORD col) // screen, its resolution, color
{
int x0,y0,x1,y1,x,y;
// visible grid
x0=floor((float( 0)-gx0)/gxs);
y0=floor((float( 0)-gy0)/gys);
x1= ceil((float(xs)-gx0)/gxs);
y1= ceil((float(ys)-gy0)/gys);
for (y=y0;y<=y1;y++)
for (x=x0;x<=x1;x++)
draw_cell(scr,x,y,col);
}
//---------------------------------------------------------------------------
void draw_circle(TCanvas *scr,float cx,float cy,float r,DWORD col) // screen, circle, color
{
int i,ix,iy;
int *pnt,pnts=0; // output list of cells { x0,y0, x1,y1, ... }
float x,y,x0,y0,x1,y1,xx,yy,rr=r*r;
// ranges where x or y is major axis
x0=floor(cx-(r*sqrt(0.5)));
y0=floor(cy-(r*sqrt(0.5)));
x1=ceil (cx+(r*sqrt(0.5)));
y1=ceil (cy+(r*sqrt(0.5)));
// prepare list
pnt=new int[4*(x1-x0+1)];
// this adds point (px,py) if the pnt[pi] ... pnt[pi-6] in list is not the same as (px,py)
#define pnt_add(pi,px,py) \
{ \
int e=1; \
if (pi>=0) \
{ \
if ((pi+1<pnts)&&((pnt[pi+0]==px)&&(pnt[pi+1]==py))) e=0; \
if ((pi-1<pnts)&&((pnt[pi-2]==px)&&(pnt[pi-1]==py))) e=0; \
if ((pi-3<pnts)&&((pnt[pi-4]==px)&&(pnt[pi-3]==py))) e=0; \
if ((pi-5<pnts)&&((pnt[pi-6]==px)&&(pnt[pi-5]==py))) e=0; \
} \
if (e){ pnt[pnts]=px; pnts++; pnt[pnts]=py; pnts++; } \
}
// rasterize "quadrants" in order so no sorting is needed later
for (x=x0;x<x1;x++)
{
xx=x-cx; xx*=xx; y=rr-xx; if (y<0.0) continue;
y=sqrt(y); iy=float(cy-y); ix=x;
if (y-floor(y)>1e-10) pnt_add(pnts-2,ix-1,iy);
pnt_add(pnts-4,ix ,iy);
}
for (y=y0;y<y1;y++)
{
yy=y-cy; yy*=yy; x=rr-yy; if (x<0.0) continue;
x=sqrt(x); ix=float(cx+x); iy=y;
if (x-floor(x)>1e-10) pnt_add(pnts-2,ix,iy-1);
pnt_add(pnts-4,ix,iy );
}
for (x=x1;x>x0;x--)
{
xx=x-cx; xx*=xx; y=rr-xx; if (y<0.0) continue;
y=sqrt(y); iy=float(cy+y); ix=x;
pnt_add(pnts-2,ix ,iy);
if (y-floor(y)>1e-10) pnt_add(pnts-4,ix-1,iy);
}
for (y=y1;y>y0;y--)
{
yy=y-cy; yy*=yy; x=rr-yy; if (x<0.0) continue;
x=sqrt(x); ix=float(cx-x); iy=y;
pnt_add(pnts-2,ix,iy );
if (x-floor(x)>1e-10) pnt_add(pnts-4,ix,iy-1);
}
// check duplicity between pnt start and end
for (i=pnts-8;i+1<pnts;i+=2)
if (pnt[0]==pnt[i+0])
if (pnt[1]==pnt[i+1])
{
pnts=i;
break;
}
// render the list (continuously change color to show points order)
for (i=0;i+1<pnts;i+=2) draw_cell(scr,pnt[i+0],pnt[i+1],0x010000*(25+((230*i)/pnts)));
// ---- you can ignore all the rest of the code its my debug rendering -----
// debug numbers
scr->Font->Color=TColor(0x0000FF00);
scr->Font->Height=gys;
scr->Brush->Style=bsClear;
for (i=0;i+1<pnts;i+=2)
{
x=gx0+float(pnt[i+0])*gxs;
y=gy0+float(pnt[i+1])*gys;
scr->TextOutA(x,y,i>>1);
}
scr->Brush->Style=bsSolid;
// debug circle overlay (ignore this)
if (1)
{
int x,y,rx,ry,w=8;
x =floor(gx0+(cx*gxs));
y =floor(gy0+(cy*gys));
rx=floor(r*gxs);
ry=floor(r*gys);
scr->Pen->Color=clYellow;
scr->Brush->Style=bsClear;
scr->Ellipse(x-rx,y-ry,x+rx,y+ry);
scr->MoveTo(x-w,y);
scr->LineTo(x+w,y);
scr->MoveTo(x,y-w);
scr->LineTo(x,y+w);
scr->Brush->Style=bsSolid;
}
// free the list you should store/copy/export it somwhere
delete[] pnt;
#undef pnt_add
}
//---------------------------------------------------------------------------
This is the output (including my debug rendering):

The result is in pnt[pnts]
array holding the x,y
grid coordinates in order ...
This is far from optimized for example: