/************** creating a grid ******************/ %macro grid(minx=,maxx=,miny=,maxy=,dim=,anno=,printN=YES); %let byx=%sysfunc(int(%sysevalf(100*(&maxx-&minx)/&dim)));%put &byx; %let byy=%sysfunc(int(%sysevalf(100*(&maxy-&miny)/&dim)));%put &byy; %let minx=%sysevalf(100*&minx); %let maxx=%sysevalf(100*&maxx); %let miny=%sysevalf(100*&miny); %let maxy=%sysevalf(100*&maxy); proc sql; create table trab&dim (x num, y num); %do i=&minx %to &maxx %by &byx; %do j=&miny %to &maxy %by &byy; %let l=%eval(&i+&byx); %let m=%eval(&j+&byy); %if &l<=&maxx and &m<=&maxy and &i<=&maxx and &j<=&maxy %then %do; insert into trab&dim values (&i,&j) values (&i,&m) values (&l,&m) values (&l,&j); %end; %end; %end; quit; data id&dim; do id=1 to &dim*&dim; do i=1 to 4; output; end; end; run; data trab&dim; merge trab&dim id&dim; x=x/100; y=y/100; drop i; run; proc sql; create table coor as select distinct id, max(x) as maxx,max(y) as maxy, min(x) as minx, min(y) as miny from trab&dim group by id order by id; quit; data coor; set coor; x=(maxx+minx)/2; y=(maxy+miny)/2; position='5'; function='label'; style='simplex'; text=trim(left(put(id,$4.))); size=0.8; color='black'; xsys='2'; ysys='2'; when='a'; run; %let r=&dim; %let n=%eval(&r*&r); data a&dim; %do i=1 %to &n; %if &i<=&r or &i>=%eval(&r*(&r-1)+1) or %qsysfunc(mod(&i,&r))=0 or %qsysfunc(mod(&i,&r))=1 %then %do; id=&i; v=0; output; %end; %do l=1 %to (&r-1)/2; %do j=1 %to &r; %if &i=%eval((&l+1)+(&j*&r)) or &i=%eval((&r-&l)+(&j*&r))or &i=%eval((&l*&r)+(&j+&l)) or &i=%eval((((&r-1)-&l)*&r)+(&j+&l)) %then %do; id=&i; v=&l*10; output; %end; %end; %end; %end; run; %if %upcase(&printN)=YES %then %do; data &anno._; set &anno coor; run; %end; %else %do; data &anno._; set &anno; run; %end; proc sort data=a&dim nodupkey; by id; run; goptions reset=all reset=global ftitle='Verdana'; data a;id=9999;v=2;run; title "Grid of Dimension &dim"; proc gmap data=a map=trab&dim all; id id; choro v / nolegend anno=&anno._; run; quit; %mend grid; /************** defining a neighborhood ******************/ %macro neighborhood(id=,pt=,map=,anno=,out=,type=ROOK); proc iml; use ↦ read all; ptid=&pt; read all var{x y} into point where(&id=ptid); n=nrow(x); do i=1 to 4; do j=1 to n; if point[i,1]=x[j] & point[i,2]=y[j] & ptid ^= &id[j] then do; neighbor=neighbor//&id[j]; end; end; end; %if %upcase(&type)=ROOK or %upcase(&type)= %then %do; call sort(neighbor,{1}); do i=2 to nrow(neighbor); if neighbor[i]=neighbor[i-1] then neighbor2=neighbor2//neighbor[i]; end; %end; %else %do; neighbor2=unique(neighbor)`; %end; neighbor2=neighbor2||j(nrow(neighbor2),1,ptid); create &out from neighbor2[colname={"&id" "v"}]; append from neighbor2; quit; proc sql; insert into &out values(&pt,9999); quit; goptions reset=all reset=global ftitle='Verdana'; title "Adjacent Neighboring Units of &id=&pt"; proc gmap data=&out map=&map all; id &id; choro v / nolegend %if &anno ne %then %do; anno=&anno;%end;; run; quit; %mend neighborhood; /***** point inside grid *******/ %macro ginside(map=,id=,where=,data=,out=); proc iml; use ↦ read all var{x} into _p1; read all var{y} into _p2; read all var{&id} into _id; p=_p1||_p2||_id; free _p1 _p2 _id; start verifyisright(p1,p2,point); isleft=0; interceptasegmento=0; numberofinterceptededges=0; ymin=min(p1[2],p2[2]); ymax=max(p1[2],p2[2]); y=point[2]; if (p2[2]-p1[2])^= 0 then x=p1[1]+(y-p1[2])*(p2[1]-p1[1])/(p2[2]-p1[2]); else x=p1[1]; if point[1]ymin & y<=ymax then interceptasegmento=1; if isleft=1 & interceptasegmento=1 then numberofinterceptededges=1; return(numberofinterceptededges); finish verifyisright; start edge(p,point); p1=p[1,]; p2=p[2,]; point=point; cond=verifyisright(p1,p2,point); return(cond); finish edge; start isinsidepolygon(point) global(p); %if &where = %then %do; id=unique(p[,3]); %end; %else %do; id=&where; %end; do j=1 to ncol(id); _type_=0; point=point; numberofinterceptededges=0; pp=p[loc(p[,3]=id[j]),]; do i=1 to nrow(pp)-1; z=edge(pp[i:i+1,],point); if z=1 then numberofinterceptededges=numberofinterceptededges+1; *print numberofinterceptededges; end; if mod(numberofinterceptededges,2)=1 then _type_=1; if _type_=1 then do; result=point||_type_||id[j]; append from result; end; end; finish isinsidepolygon; use &data; read all into point; result=j(1,4,0); create &out from result[colname={"x" "y" "_type_" "id"}]; do j=1 to nrow(point); run isinsidepolygon(point[j,]); end; close &out; quit; %mend ginside; /***************** adaptive sampling *****************/ %macro as(data=,n=,sample=,out=,strata=,seed=,map=,id=,anno=,typen=ROOK,printN=YES); %if &sample= %then %do; proc surveyselect data=&data sampsize=&n out=&out seed=&seed noprint; run; data &out;set &out; v=2; run; %end; %else %do; data &out;set &sample; v=2; run; %end; title "Selected Sampling"; proc gmap data=&out map=&map all; id &id; choro v / nolegend anno=&anno; run; quit; data &anno.1; set &anno(keep=x y); run; proc sql noprint; select count(*) into:n from &out; quit; %put &n; data _null_; set &out; call symput('id'||trim(left(_n_)),&id); run; %put &id1; proc sql; drop table &out.s1,&out.s2,&out.s3,_freq_as_,&out.neighbor2; create table _freq_as_ (id num, freq num); quit; %do i=1 %to &n; %ginside(map=&map,id=&id,where=&&id&i,data=&anno.1,out=&out.out); %if &i=1 %then %do; %let cond=0; %end; %else %do; proc sql noprint; select count(*) into:cond from &out.s1 where id in (select distinct id from &out.out); quit; %end; %put "cond=" &cond; %if &cond=0 %then %do; proc append base=&out.s1 data=&out.out force; run; %end; %end; proc sql; insert into _freq_as_ select id,count(*) as freq from &out.s1 group by id; quit; proc sort data=&out.s1 nodupkey; by &id; run; proc sql noprint; select count(*) into:m from &out.s1; quit; %put &m; data _null_;set &out.s1; call symput('ids'||trim(left(_n_)),&id); run; %put &ids1; %do j=1 %to &m; %neighborhood(id=&id,pt=&&ids&j,map=&map,anno=&anno,out=&out.neighbor,type=&typen); proc append base=&out.s2 data=&out.neighbor force; run; proc append base=&out.neighbor2 data=&out.neighbor force; run; %end; data &out.s2; set &out.s2; v=2; run; proc append base=&out.s3 data=&out.s2 force; run; proc sql noprint; create table &out.s2 as select * from &out.s2 where &id not in (select &id from &out); select count(*) into:k from &out.s2; quit; %put &k; data _null_; set &out.s2; call symput('id'||trim(left(_n_)),&id); run; %put &id1; %do %while (&k>0); proc sql; drop table &out.s1,&out.s2; quit; %do i=1 %to &k; %ginside(map=&map,id=&id,where=&&id&i,data=&anno.1,out=&out.out); %if &i=1 %then %do; %let cond=0; %end; %else %do; proc sql noprint; select count(*) into:cond from &out.s1 where id in (select distinct id from &out.out); quit; %end; %put "cond=" &cond; %if &cond=0 %then %do; proc append base=&out.s1 data=&out.out force; run; %end; %end; proc sql; insert into _freq_as_ select id,count(*) as freq from &out.s1 group by id; quit; proc sort data=&out.s1 nodupkey; by &id; run; proc sql noprint; select count(*) into:m from &out.s1; quit; %put &m; %if &m>0 %then %do; data _null_; set &out.s1; call symput('ids'||trim(left(_n_)),&id); run; %put &ids1; %do j=1 %to &m; %neighborhood(id=&id,pt=&&ids&j,map=&map,anno=&anno,out=&out.neighbor,type=&typen); proc append base=&out.s2 data=&out.neighbor force; run; proc append base=&out.neighbor2 data=&out.neighbor force; run; %end; %end; data &out.s2;set &out.s2; v=2; run; proc append base=&out.s3 data=&out.s2 force; run; proc sql noprint; %if &strata= %then %do; insert into &out select &id,2 from &out.s1; %end; %else %do; insert into &out select &id,2,0 from &out.s1; %end; create table &out.s2 as select * from &out.s2 where &id not in (select &id from &out); select count(*) into:k from &out.s2; quit; %put k=&k; data _null_;set &out.s2; call symput('id'||trim(left(_n_)),&id); run; %put &id1; %end; proc sort data=&out.s3 nodupkey; by &id; run; proc sort data=_freq_as_; by id; run; data &out.s3; set &out.s3 &out; run; proc sort data=&out.s3 nodupkey; by &id; run; %if %upcase(&printN)=YES %then %do; data &anno._; set &anno coor; run; %end; %else %do; data &anno._; set &anno; run; %end; %if &strata= %then %do; goptions reset=all reset=global ftitle='Verdana'; title "Adaptive Cluster Sampling"; proc gmap data=&out.s3 map=&map all; id &id; choro v / nolegend anno=&anno._; run; quit; %end; %else %do; data &out.s3;set &out.s3(in=a) &data; if a then strata=99999; run; proc sql noprint;select max(strata) into:mstrata from &data;quit;%put &mstrata; title "Stratified Adaptive Cluster Sampling"; proc gmap data=&out.s3 map=&map all; id &id; choro strata / anno=&anno._ legend=legend1; legend1 order=(1 to &mstrata); run; quit; %end; proc freq data=&out.neighbor2 noprint; tables v /out=_sample_(drop=count percent); run; data &out.neighbor2;set &out.neighbor2; source=0; if v ne 9999; run; data &out;set &out; seq=_n_; run; proc sql noprint; create table _sample_ as select v as id from _sample_ where v in (select id from &out where seq<=&n); update &out.neighbor2 set source=v where v in (select id from _sample_); create table _refresh_ as select id as v,source from &out.neighbor2 where v in (select id from &out.neighbor2) and source ne 0; create table &out.neighbor3 as select a.*,b.source as s2 from &out.neighbor2 a left join _refresh_ b on a.v=b.v; update &out.neighbor3 set source=s2 where s2 ne .; alter table &out.neighbor3 drop s2; select count(*) into:zero from &out.neighbor3 where source=0; %put &zero; %do %while (&zero>0); create table _refresh_ as select distinct id as v,source from &out.neighbor3 where v in (select id from &out.neighbor3) and source ne 0; create table &out.neighbor3 as select a.*,b.source as s2 from &out.neighbor3 a left join _refresh_ b on a.v=b.v; update &out.neighbor3 set source=s2 where s2 ne .; alter table &out.neighbor3 drop s2; select count(*) into:zero from &out.neighbor3 where source=0; %put &zero; %end; quit; proc sort data=&out.neighbor3(drop=v) nodupkey; by id source; run; data _freq_as_; merge _freq_as_ &out.neighbor3; by id; run; proc sort data=&out; by id; run; data &out;retain id freq; merge &out(in=a) _freq_as_; by id; if freq=. then freq=0; if source=. then source=0; run; proc sort data=&out nodupkey; by id; run; data &out;retain &id freq v seq; merge &out(in=a) &data(keep=id &strata); by id; if a; run; %if &strata= %then %do; proc sort data=&out; by source; run; %end; %else %do; proc sort data=&out; by &strata source; run; %end; proc iml; use &out; read all into tab; use &data; read all into pop; NN=nrow(pop); n=&n; tab1=tab[loc(tab[,4]^=.),]; *print tab1; %if &strata= %then %do; mu=j(1,3,1); mu[1]=tab[1,ncol(tab)]; mu[2]=tab[1,2]; do j=2 to nrow(tab); if tab[j,ncol(tab)]=tab[j-1,ncol(tab)] then do; mu[nrow(mu),2]=mu[nrow(mu),2]+tab[j,2]; mu[nrow(mu),ncol(mu)]=mu[nrow(mu),ncol(mu)]+1; end; else do; mu=mu//(tab[j,ncol(tab)]||tab[j,2]||j(1,1,1)); end; end; *print mu; mu=mu||choose(mu[,2]=0, mu[,3], 1); u1_1=(1/n)*(mu[,2]/mu[,3])`*j(nrow(mu),1,1); varu1_1=(NN-n)/(NN*n*(n-1))*((mu[,4]#((mu[,2]/mu[,3])-u1_1))`*((mu[,2]/mu[,3])-u1_1)); Totu1_1=NN*u1_1; TotVaru1_1=NN**2*varu1_1; *print u1_1 varu1_1 Totu1_1 TotVaru1_1; mu=j(1,3,1); mu[1]=tab1[1,ncol(tab1)]; mu[2]=tab1[1,2]; do j=2 to nrow(tab1); if tab1[j,ncol(tab1)]=tab1[j-1,ncol(tab1)] then do; mu[nrow(mu),2]=mu[nrow(mu),2]+tab1[j,2]; mu[nrow(mu),ncol(mu)]=mu[nrow(mu),ncol(mu)]+1; end; else do; mu=mu//(tab1[j,ncol(tab1)]||tab1[j,2]||j(1,1,1)); end; end; *print mu; mu=mu||choose(mu[,2]=0, mu[,3], 1); u1=(1/n)*(mu[,2]/mu[,3])`*j(nrow(mu),1,1); varu1=(NN-n)/(NN*n*(n-1))*((mu[,4]#((mu[,2]/mu[,3])-u1))`*((mu[,2]/mu[,3])-u1)); Totu1=NN*u1; TotVaru1=NN**2*varu1; print "Number of Observations: " n[label=' '],, "Population Size: " NN[label=' ']; Print "Adaptive Cluster Sampling",,"Statistics",, u1[label='Mean'] varu1[label='Var of Mean'] Totu1[label='Sum'] TotVaru1[label='Var of Sum']; *SRS; fsample=tab1[loc(tab1[,4]<=n),1:2]; SRS=fsample[+,2]/n; VARSRS=((1-n/NN)/n)*((fsample[,2]-SRS)`*(fsample[,2]-SRS)/(n-1)); TotSRS=NN*SRS; TotVarSRS=NN**2*varSRS; Print "Simple Random Sampling",,"Statistics",, SRS[label='Mean'] VARSRS[label='Var of Mean'] TotSRS[label='Sum'] TotVarSRS[label='Var of Sum']; *adapSRS; adSRS=tab[+,2]/nrow(tab); VARadSRS=((1-nrow(tab)/NN)/nrow(tab))*((tab[,2]-adSRS)`*(tab[,2]-adSRS)/(nrow(tab)-1)); TotadSRS=NN*adSRS; TotVaradSRS=NN**2*VARadSRS; Print "Adaptive Simple Random Sampling (biased)",,"Statistics",, adSRS[label='Mean'] VARadSRS[label='Var of Mean'] TotadSRS[label='Sum'] TotVaradSRS[label='Var of Sum']; create _estimates_&map._ var{u1_1 varu1_1 Totu1_1 TotVaru1_1 u1 varu1 Totu1 TotVaru1 SRS VARSRS TotSRS TotVarSRS adSRS VARadSRS TotadSRS TotVaradSRS}; append; %end; %else %do; /*** no crossing stratum boundaries ****/ NNh=j(1,2,1);NNh[1]=pop[1,ncol(pop)]; do j=2 to nrow(pop); if pop[j,ncol(pop)]=pop[j-1,ncol(pop)] then NNh[nrow(NNh),2]=NNh[nrow(NNh),2]+1; else NNh=NNh//(pop[j,ncol(pop)]||j(1,1,1)); end; *print NNh; nstrata=NNh[<>,1]; print "Number of Observations: " n[label=' '],, "Population Size: " NN[label=' '],, "Number of Strata: " nstrata[label=' ']; fsample=tab1[loc(tab1[,4]<=n),5]; nh=j(1,2,1); nh[1]=fsample[1]; do j=2 to nrow(fsample); if fsample[j]=fsample[j-1] then nh[nrow(nh),2]=nh[nrow(nh),2]+1; else nh=nh//(fsample[j]||j(1,1,1)); end; *print nh; mu=j(1,4,1); mu[1]=tab1[1,ncol(tab1)-1]; mu[2]=tab1[1,ncol(tab1)]; mu[3]=tab1[1,2]; do j=2 to nrow(tab1); if tab1[j,ncol(tab1)-1]=tab1[j-1,ncol(tab1)-1] & tab1[j,ncol(tab1)]=tab1[j-1,ncol(tab1)] then do; mu[nrow(mu),3]=mu[nrow(mu),3]+tab1[j,2]; mu[nrow(mu),ncol(mu)]=mu[nrow(mu),ncol(mu)]+1; end; else do; mu=mu//(tab1[j,ncol(tab1)-1]||tab1[j,ncol(tab1)]||tab1[j,2]||j(1,1,1)); end; end; *print mu; mu=mu||mu[,3]/mu[,4]||choose(mu[,3]=0, mu[,4], 1)||nh[mu[,1],2]||NNh[mu[,1],2]||j(nrow(mu),1,0); do k=1 to nrow(mu); if mu[k,2]^=0 then do; if tab1[loc(tab1[,1]=mu[k,2]),5]^=mu[k,1] then mu[k,5]=0; end; end; kk=1; var=0; do j=2 to nrow(mu); if mu[j,1]=mu[j-1,1] then var=var+mu[j,5]; else do; mu[kk:j-1,ncol(mu)]=repeat(var/mu[j-1,ncol(mu)-2],j-kk); kk=j; var=mu[j,5]; end; if j=nrow(mu) then mu[kk:j,ncol(mu)]=repeat(var/mu[j,ncol(mu)-2],j-kk); end; u1=(1/NN)*((mu[,ncol(mu)-1]/mu[,ncol(mu)-2])#mu[,5])`*j(nrow(mu),1,1); s2h=(1/(mu[,ncol(mu)-2]-1))#mu[,6]#(mu[,5]-mu[,ncol(mu)])##2; varu1=(1/NN**2)*((mu[,ncol(mu)-1]/mu[,ncol(mu)-2])#(mu[,ncol(mu)-1]-mu[,ncol(mu)-2])#s2h)`*j(nrow(mu),1,1); print "No Crossing Stratum Boundaries",,"Statistics",, u1[label='Mean'] varu1[label='Var of Mean']; /*** crossing stratum boundaries ****/ call sort(tab1,{6,5}); *print tab1; tab2=tab1[loc(tab1[,6]=0),]; *print tab2; mu=j(1,3,1); mu[1]=tab1[1,ncol(tab1)]; mu[2]=tab1[1,2]; do j=2 to nrow(tab1); if tab1[j,ncol(tab1)]=tab1[j-1,ncol(tab1)] then do; mu[nrow(mu),2]=mu[nrow(mu),2]+tab1[j,2]; mu[nrow(mu),ncol(mu)]=mu[nrow(mu),ncol(mu)]+1; end; else do; mu=mu//(tab1[j,ncol(tab1)]||tab1[j,2]||j(1,1,1)); end; end; *print mu; mu=mu[loc(mu[,1]=0)+1:nrow(mu),]; mu2=j(1,4,1); mu2[1]=tab1[1,ncol(tab1)-1]; mu2[2]=tab1[1,ncol(tab1)]; mu2[3]=tab1[1,2]; do j=2 to nrow(tab2); if tab2[j,ncol(tab2)-1]=tab2[j-1,ncol(tab2)-1] & tab2[j,ncol(tab2)]=tab2[j-1,ncol(tab2)] then do; mu2[nrow(mu2),3]=mu2[nrow(mu2),3]+tab1[j,2]; mu2[nrow(mu2),ncol(mu2)]=mu2[nrow(mu2),ncol(mu2)]+1; end; else do; mu2=mu2//(tab2[j,ncol(tab2)-1]||tab2[j,ncol(tab2)]||tab2[j,2]||j(1,1,1)); end; end; *print mu2; mu=j(nrow(mu),1,1)||mu||mu[,2]/mu[,3]||choose(mu[,2]=0, mu[,3], 1); mu2=mu2||j(nrow(mu2),1,0)||choose(mu2[,3]=0, mu2[,4], 1); do k=1 to nrow(mu); if mu[k,2]^=0 then mu[k,1]=tab1[loc(tab1[,1]=mu[k,2]),5]; end; mu=mu//mu2; call sort(mu,{1,2}); mu=mu||nh[mu[,1],2]||NNh[mu[,1],2]||j(nrow(mu),1,0); kk=1; var=0; do j=2 to nrow(mu); if mu[j,1]=mu[j-1,1] then var=var+mu[j,5]; else do; mu[kk:j-1,ncol(mu)]=repeat(var/mu[j-1,ncol(mu)-2],j-kk); kk=j; var=mu[j,5]; end; if j=nrow(mu) then do; if kk