00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 #include <stdio.h>
00009 
00010 #include "./dataloop.h"
00011 
00012 static DLOOP_Count DLOOP_Type_blockindexed_count_contig(DLOOP_Count count,
00013                             DLOOP_Count blklen,
00014                             void *disp_array,
00015                             int dispinbytes,
00016                             DLOOP_Offset old_extent);
00017 
00018 static void DLOOP_Type_blockindexed_array_copy(DLOOP_Count count,
00019                            void *disp_array,
00020                            DLOOP_Offset *out_disp_array,
00021                            int dispinbytes,
00022                            DLOOP_Offset old_extent);
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 
00039 
00040 int PREPEND_PREFIX(Dataloop_create_blockindexed)(int icount,
00041                          int iblklen,
00042                          void *disp_array,
00043                          int dispinbytes,
00044                          DLOOP_Type oldtype,
00045                          DLOOP_Dataloop **dlp_p,
00046                          int *dlsz_p,
00047                          int *dldepth_p,
00048                          int flag)
00049 {
00050     int err, is_builtin, is_vectorizable = 1;
00051     int i, new_loop_sz, old_loop_depth;
00052 
00053     DLOOP_Count contig_count, count, blklen;
00054     DLOOP_Offset old_extent, eff_disp0, eff_disp1, last_stride;
00055     DLOOP_Dataloop *new_dlp;
00056 
00057     count  = (DLOOP_Count) icount; 
00058     blklen = (DLOOP_Count) iblklen;
00059 
00060     
00061     if (count == 0 || blklen == 0)
00062     {
00063     err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
00064                              MPI_INT,
00065                              dlp_p,
00066                              dlsz_p,
00067                              dldepth_p,
00068                              flag);
00069     return err;
00070     }
00071 
00072     is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;
00073 
00074     if (is_builtin)
00075     {
00076     DLOOP_Handle_get_size_macro(oldtype, old_extent);
00077     old_loop_depth = 0;
00078     }
00079     else
00080     {
00081     DLOOP_Handle_get_extent_macro(oldtype, old_extent);
00082     DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);
00083     }
00084 
00085     contig_count = DLOOP_Type_blockindexed_count_contig(count,
00086                             blklen,
00087                             disp_array,
00088                             dispinbytes,
00089                             old_extent);
00090 
00091     
00092 
00093 
00094 
00095 
00096     if ((contig_count == 1) &&
00097     ((!dispinbytes && ((int *) disp_array)[0] == 0) ||
00098      (dispinbytes && ((MPI_Aint *) disp_array)[0] == 0)))
00099     {
00100     err = PREPEND_PREFIX(Dataloop_create_contiguous)(icount * iblklen,
00101                              oldtype,
00102                              dlp_p,
00103                              dlsz_p,
00104                              dldepth_p,
00105                              flag);
00106     return err;
00107     }
00108 
00109     
00110 
00111 
00112 
00113 
00114     if (contig_count == 1)
00115     {
00116     
00117     blklen *= count;
00118     count = 1;
00119     iblklen *= icount;
00120     icount = 1;
00121     }
00122 
00123     
00124 
00125 
00126 
00127 
00128     eff_disp0 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[0]) :
00129     (((DLOOP_Offset) ((int *) disp_array)[0]) * old_extent);
00130 
00131     if (count > 1 && eff_disp0 == (DLOOP_Offset) 0)
00132     {
00133     eff_disp1 = (dispinbytes) ?
00134         ((DLOOP_Offset) ((MPI_Aint *) disp_array)[1]) :
00135         (((DLOOP_Offset) ((int *) disp_array)[1]) * old_extent);
00136     last_stride = eff_disp1 - eff_disp0;
00137 
00138     for (i=2; i < count; i++) {
00139         eff_disp0 = eff_disp1;
00140         eff_disp1 = (dispinbytes) ?
00141         ((DLOOP_Offset) ((MPI_Aint *) disp_array)[i]) :
00142         (((DLOOP_Offset) ((int *) disp_array)[i]) * old_extent);
00143         if (eff_disp1 - eff_disp0 != last_stride) {
00144         is_vectorizable = 0;
00145         break;
00146         }
00147     }
00148     if (is_vectorizable)
00149     {
00150         err = PREPEND_PREFIX(Dataloop_create_vector)(count,
00151                              blklen,
00152                              last_stride,
00153                              1, 
00154                              oldtype,
00155                              dlp_p,
00156                              dlsz_p,
00157                              dldepth_p,
00158                              flag);
00159         return err;
00160     }
00161     }
00162 
00163     
00164 
00165 
00166 
00167 
00168 
00169     
00170 
00171 
00172 
00173 
00174 
00175     
00176 
00177     
00178 
00179 
00180 
00181     if (is_builtin)
00182     {
00183     PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_BLOCKINDEXED,
00184                        count,
00185                        &new_dlp,
00186                        &new_loop_sz);
00187     
00188     if (!new_dlp) return -1;
00189     
00190 
00191     new_dlp->kind = DLOOP_KIND_BLOCKINDEXED | DLOOP_FINAL_MASK;
00192 
00193     if (flag == DLOOP_DATALOOP_ALL_BYTES)
00194     {
00195         blklen            *= old_extent;
00196         new_dlp->el_size   = 1;
00197         new_dlp->el_extent = 1;
00198         new_dlp->el_type   = MPI_BYTE;
00199     }
00200     else
00201     {
00202         new_dlp->el_size   = old_extent;
00203         new_dlp->el_extent = old_extent;
00204         new_dlp->el_type   = oldtype;
00205     }
00206     }
00207     else
00208     {
00209     DLOOP_Dataloop *old_loop_ptr = NULL;
00210     int old_loop_sz = 0;
00211 
00212     DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
00213     DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);
00214 
00215     PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_BLOCKINDEXED,
00216                         count,
00217                         old_loop_ptr,
00218                         old_loop_sz,
00219                         &new_dlp,
00220                         &new_loop_sz);
00221     
00222     if (!new_dlp) return -1;
00223     
00224 
00225     new_dlp->kind = DLOOP_KIND_BLOCKINDEXED;
00226 
00227     DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
00228     DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
00229     DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
00230     }
00231 
00232     new_dlp->loop_params.bi_t.count     = count;
00233     new_dlp->loop_params.bi_t.blocksize = blklen;
00234 
00235     
00236 
00237 
00238 
00239     DLOOP_Type_blockindexed_array_copy(count,
00240                        disp_array,
00241                        new_dlp->loop_params.bi_t.offset_array,
00242                        dispinbytes,
00243                        old_extent);
00244 
00245     *dlp_p     = new_dlp;
00246     *dlsz_p    = new_loop_sz;
00247     *dldepth_p = old_loop_depth + 1;
00248 
00249     return 0;
00250 }
00251 
00252 
00253 
00254 
00255 
00256 
00257 static void DLOOP_Type_blockindexed_array_copy(DLOOP_Count count,
00258                            void *in_disp_array,
00259                            DLOOP_Offset *out_disp_array,
00260                            int dispinbytes,
00261                            DLOOP_Offset old_extent)
00262 {
00263     int i;
00264     if (!dispinbytes)
00265     {
00266     for (i=0; i < count; i++)
00267     {
00268         out_disp_array[i] =
00269         ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent;
00270     }
00271     }
00272     else
00273     {
00274     for (i=0; i < count; i++)
00275     {
00276         out_disp_array[i] =
00277         ((DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i]);
00278     }
00279     }
00280     return;
00281 }
00282 
00283 static DLOOP_Count DLOOP_Type_blockindexed_count_contig(DLOOP_Count count,
00284                             DLOOP_Count blklen,
00285                             void *disp_array,
00286                             int dispinbytes,
00287                             DLOOP_Offset old_extent)
00288 {
00289     int i, contig_count = 1;
00290 
00291     if (!dispinbytes)
00292     {
00293     
00294     DLOOP_Offset cur_tdisp = (DLOOP_Offset) ((int *) disp_array)[0];
00295 
00296     for (i=1; i < count; i++)
00297     {
00298         DLOOP_Offset next_tdisp = (DLOOP_Offset) ((int *) disp_array)[i];
00299 
00300         if (cur_tdisp + blklen != next_tdisp)
00301         {
00302         contig_count++;
00303         }
00304         cur_tdisp = next_tdisp;
00305     }
00306     }
00307     else
00308     {
00309     
00310     DLOOP_Offset cur_bdisp = (DLOOP_Offset) ((MPI_Aint *) disp_array)[0];
00311 
00312     for (i=1; i < count; i++)
00313     {
00314         DLOOP_Offset next_bdisp =
00315         (DLOOP_Offset) ((MPI_Aint *) disp_array)[i];
00316 
00317         if (cur_bdisp + (DLOOP_Offset) blklen * old_extent != next_bdisp)
00318         {
00319         contig_count++;
00320         }
00321         cur_bdisp = next_bdisp;
00322     }
00323     }
00324     return contig_count;
00325 }