tarting man page on Oracle
Printed from http://www.polarhome.com/service/man/?qf=tarting&af=0&tf=2&of=Oracle
tarting(3) MPI tarting(3)
NAME
tarting - he RING stages*/ left = (comm_size + rank - 1) % comm_size;
right = (rank + 1) % comm_size; j = rank; jnext = left;
SYNOPSIS
mpi_errno = MPIC_Irecv( ((char *)mv2_cuda_allgather_store_buf +
jnext*recvcount*recvtype_extent), recvcount*recvtype_extent, MPI_BYTE,
left, MPIR_ALLGATHER_TAG, comm, &recv_req ); mpi_errno =
MPIC_Isend(((char *)recvbuf + j*recvcount*recvtype_extent), recv‐
count*recvtype_extent, MPI_BYTE, right, MPIR_ALLGATHER_TAG, comm,
&send_req ); mpi_errno = MPIC_Waitall_ft(1, &recv_req, &status,
errflag); if (mpi_errno) { /* for communication errors, just record the
error but continue */ *errflag = TRUE; MPIU_ERR_SET(mpi_errno,
MPI_ERR_OTHER, "**fail"); MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); }
MPIU_Memcpy_CUDA_Async((void *)((char *)recvbuf + jnext*recvcount*recv‐
type_extent), (void *)((char *)mv2_cuda_allgather_store_buf +
jnext*recvcount*recvtype_extent), recvcount*recvtype_extent, cudaMem‐
cpyHostToDevice, 0 );
mpi_errno = MPIC_Waitall_ft(1, &send_req, &status, errflag); if
(mpi_errno) { /* for communication errors, just record the error but
continue */ *errflag = TRUE; MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER,
"**fail"); MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); }
j = jnext; jnext = (comm_size + jnext - 1) % comm_size;
/*Intermediate steps of communication*/ for (i=2; i<comm_size-1; i++) {
mpi_errno = MPIC_Irecv( ((char *)mv2_cuda_allgather_store_buf +
jnext*recvcount*recvtype_extent), recvcount, recvtype, left, MPIR_ALL‐
GATHER_TAG, comm, &recv_req ); mpi_errno = MPIC_Isend(((char
*)mv2_cuda_allgather_store_buf + j*recvcount*recvtype_extent), recv‐
count, recvtype, right, MPIR_ALLGATHER_TAG, comm, &send_req );
mpi_errno = MPIC_Waitall_ft(1, &recv_req, &status, errflag); if
(mpi_errno) { /* for communication errors, just record the error but
continue */ *errflag = TRUE; MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER,
"**fail"); MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); }
MPIU_Memcpy_CUDA_Async((void *)((char *)recvbuf + jnext*recvcount*recv‐
type_extent), (void *)((char *)mv2_cuda_allgather_store_buf +
jnext*recvcount*recvtype_extent), recvcount*recvtype_extent, cudaMem‐
cpyHostToDevice, 0 );
mpi_errno = MPIC_Waitall_ft(1, &send_req, &status, errflag); if
(mpi_errno) { /* for communication errors, just record the error but
continue */ *errflag = TRUE; MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER,
"**fail"); MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); }
j = jnext; jnext = (comm_size + jnext - 1) % comm_size; }
/*Last stage of communication - copy directly to device*/ if ( i <
comm_size ){ mpi_errno = MPIC_Irecv( ((char *)recvbuf + jnext*recv‐
count*recvtype_extent), recvcount, recvtype, left, MPIR_ALLGATHER_TAG,
comm, &recv_req ); mpi_errno = MPIC_Isend(((char *)mv2_cuda_all‐
gather_store_buf + j*recvcount*recvtype_extent), recvcount, recvtype,
right, MPIR_ALLGATHER_TAG, comm, &send_req ); mpi_errno = MPIC_Wait‐
all_ft(1, &recv_req, &status, errflag); if (mpi_errno) { /* for commu‐
nication errors, just record the error but continue */ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); } mpi_errno = MPIC_Wait‐
all_ft(1, &send_req, &status, errflag); if (mpi_errno) { /* for commu‐
nication errors, just record the error but continue */ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); }
}
} else { /*Recursive Doubling*/ MPI_Request recv_req; MPI_Request
send_req;
if (sendbuf != MPI_IN_PLACE) { mpi_errno = MPIR_Localcopy(sendbuf,
sendcount, sendtype, ((char *) recvbuf + rank * recvcount * recv‐
type_extent), recvcount, recvtype); if (mpi_errno) {
MPIU_ERR_POP(mpi_errno); } }
curr_cnt = recvcount;
mask = 0x1; i = 0;
dst = rank ^ mask; dst_tree_root = dst >> i; dst_tree_root <<= i;
my_tree_root = rank >> i; my_tree_root <<= i;
/* F: saving an MPI_Aint into an int */ send_offset = my_tree_root *
recvcount * recvtype_extent; recv_offset = dst_tree_root * recvcount *
recvtype_extent;
if (dst < comm_size) { MPIU_Memcpy_CUDA((void*)((char *)mv2_cuda_all‐
gather_store_buf + rank*recvcount*recvtype_extent), (void*)((char
*)recvbuf + rank*recvcount*recvtype_extent), recvcount * recv‐
type_extent, cudaMemcpyDeviceToHost);
mpi_errno = MPIC_Irecv( ((char *)mv2_cuda_allgather_store_buf +
recv_offset), (mask)*recvcount, recvtype, dst, MPIR_ALLGATHER_TAG,
comm, &recv_req ); mpi_errno = MPIC_Isend(((char *)mv2_cuda_all‐
gather_store_buf + send_offset), curr_cnt, recvtype, dst, MPIR_ALL‐
GATHER_TAG, comm, &send_req );
mpi_errno = MPIC_Waitall_ft(1, &recv_req, &status, errflag); if
(mpi_errno) { /* for communication errors, just record the error but
continue */ *errflag = TRUE; MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER,
"**fail"); MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); }
MPIU_Memcpy_CUDA_Async((void*)((char *)recvbuf + recv_offset),
(void*)((char *)mv2_cuda_allgather_store_buf + recv_offset),
(mask)*recvcount*recvtype_extent, cudaMemcpyHostToDevice, 0 );
mpi_errno = MPIC_Waitall_ft(1, &send_req, &status, errflag); if
(mpi_errno) { /* for communication errors, just record the error but
continue */ *errflag = TRUE; MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER,
"**fail"); MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); }
curr_cnt += mask*recvcount; }
mask <<= 1; i++;
while (mask < comm_size) { dst = rank ^ mask;
/* find offset into send and recv buffers. zero out the least signifi‐
cant "i" bits of rank and dst to find root of src and dst subtrees. Use
ranks of roots as index to send from and recv into buffer */
dst_tree_root = dst >> i; dst_tree_root <<= i;
my_tree_root = rank >> i; my_tree_root <<= i;
/* FIXME: saving an MPI_Aint into an int */ send_offset = my_tree_root
* recvcount * recvtype_extent; recv_offset = dst_tree_root * recvcount
* recvtype_extent;
if (dst < comm_size) { if (mask == comm_size/2) { mpi_errno =
MPIC_Irecv( ((char *)recvbuf + recv_offset), (mask)*recvcount, recv‐
type, dst, MPIR_ALLGATHER_TAG, comm, &recv_req ); } else { mpi_errno =
MPIC_Irecv( ((char *)mv2_cuda_allgather_store_buf + recv_offset),
(mask)*recvcount, recvtype, dst, MPIR_ALLGATHER_TAG, comm, &recv_req );
} mpi_errno = MPIC_Isend(((char *)mv2_cuda_allgather_store_buf +
send_offset), curr_cnt, recvtype, dst, MPIR_ALLGATHER_TAG, comm,
&send_req ); mpi_errno = MPIC_Waitall_ft(1, &recv_req, &status,
errflag); if (mpi_errno) { /* for communication errors, just record the
error but continue */ *errflag = TRUE; MPIU_ERR_SET(mpi_errno,
MPI_ERR_OTHER, "**fail"); MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); }
if (mask < comm_size/2) { MPIU_Memcpy_CUDA_Async(((void*) ((char
*)recvbuf + recv_offset)), (void *)((char *)mv2_cuda_all‐
gather_store_buf + recv_offset), (mask)*recvcount*recvtype_extent, cud‐
aMemcpyHostToDevice, 0 ); } mpi_errno = MPIC_Waitall_ft(1, &send_req,
&status, errflag); if (mpi_errno) { /* for communication errors, just
record the error but continue */ *errflag = TRUE;
MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); }
curr_cnt += mask*recvcount; }
mask <<= 1; i++; } }
/* wait for the receive copies into the device to complete */ cudaerr =
cudaEventRecord(*mv2_cuda_sync_event, 0); if (cudaerr != cudaSuccess) {
mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE,
FCNAME, __LINE__, MPI_ERR_OTHER, "**cudaEventRecord", 0); return
mpi_errno; } cudaEventSynchronize(*mv2_cuda_sync_event);
/* check if multiple threads are calling this collective function */
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT(comm_ptr);
FN_FAIL
return (mpi_errno); } /* end:nested */ #endif /* #if defined(_OSU_MVA‐
PICH_) || defined(_OSU_PSM_) */ #endif /*#ifdef(_ENABLE_CUDA_)*/
LOCATION
src/mpi/coll/allgather_cuda_osu.c
8/22/2013 tarting(3)
[top]
List of man pages available for Oracle
Copyright (c) for man pages and the logo by the respective OS vendor.
For those who want to learn more, the polarhome community provides shell access and support.
[legal]
[privacy]
[GNU]
[policy]
[cookies]
[netiquette]
[sponsors]
[FAQ]
Polarhome, production since 1999.
Member of Polarhome portal.
Based on Fawad Halim's script.
....................................................................
|
Vote for polarhome
|