ack man page on RedHat
[printable version]
ack(3) MPI ack(3)
NAME
ack - ata for non-contiguous buffer*/ if ((!sendtype_iscontig) &&
(sendbuf != MPI_IN_PLACE)) { int sendtype_size=0; int position = 0;
MPIR_Pack_size_impl(1, sendtype, &sendtype_size); send_nbytes= sendcnt
* sendtype_size; MPIU_CHKLMEM_MALLOC(local_sendbuf, void *,
send_nbytes, mpi_errno, "local_sendbuf"); MPIR_Pack_impl(sendbuf, send‐
cnt, sendtype, local_sendbuf, send_nbytes, &position); } else {
local_sendbuf = (void *)sendbuf; send_nbytes=nbytes; }
SYNOPSIS
#include ntra socket gather*/
#include nter socket gather*/
if (intra_sock_rank == 0) { if (intra_node_leader_rank == 0) { /* Node
leaders, allocate large buffers which is used to gather * data for the
entire node. The same buffer is used for inter-node * gather as well.
This saves us a memcpy operation*/ if (rank == root) { intra_tmp_buf =
MPIU_Malloc(recvcnt * MPIR_MAX(recvtype_extent, recvtype_true_extent) *
local_size); } else { intra_tmp_buf = MPIU_Malloc(send_nbytes *
local_size); }
} else {
/* Socket leader, allocate tmp_buffer */ if (rank == root) {
intra_tmp_buf = MPIU_Malloc(recvcnt * MPIR_MAX(recvtype_extent, recv‐
type_true_extent) * intra_sock_comm_size); } else { intra_tmp_buf =
MPIU_Malloc(send_nbytes * intra_sock_comm_size); }
}
if (intra_tmp_buf == NULL) { mpi_errno = MPIR_Err_create_code(MPI_SUC‐
CESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem",
0); return mpi_errno; }
/*Local copy of buffer*/ if(sendbuf != MPI_IN_PLACE) { MPIU_Mem‐
cpy(intra_tmp_buf, local_sendbuf, send_nbytes); } else { MPIR_Local‐
copy(((char *) recvbuf +rank * recvcnt * recvtype_extent), recvcnt,
recvtype, intra_tmp_buf, send_nbytes, MPI_BYTE); } }
if(local_rank !=0 && sendbuf == MPI_IN_PLACE) { mpi_errno =
MPIR_Limic_Gather_OSU(intra_tmp_buf, (intra_sock_comm_size *
send_nbytes), (recvbuf + (rank*nbytes)), nbytes, intra_sock_commptr );
} else { mpi_errno = MPIR_Limic_Gather_OSU(intra_tmp_buf,
(intra_sock_comm_size * send_nbytes), local_sendbuf, send_nbytes,
intra_sock_commptr ); } if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
/*Inter socket gather*/ if(intra_sock_rank == 0) { /*When data in each
socket is different*/ if (shmem_commptr->ch.is_socket_uniform != 1) {
int *displs = NULL; int *recvcnts = NULL; int *socket_sizes; int i = 0;
socket_sizes = shmem_commptr->ch.socket_size;
if (intra_node_leader_rank == 0) { tmp_buf = intra_tmp_buf;
displs = MPIU_Malloc(sizeof (int) * intra_node_leader_comm_size);
recvcnts = MPIU_Malloc(sizeof (int) * intra_node_leader_comm_size); if
(!displs || !recvcnts) { mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0);
return mpi_errno; }
recvcnts[0] = socket_sizes[0] * nbytes; displs[0] = 0;
for (i = 1; i < intra_node_leader_comm_size; i++) { displs[i] = dis‐
pls[i - 1] + socket_sizes[i - 1] * nbytes; recvcnts[i] =
socket_sizes[i] * nbytes; }
mpi_errno = MPIR_Gatherv(MPI_IN_PLACE, intra_sock_comm_size * nbytes,
MPI_BYTE, tmp_buf, recvcnts, displs, MPI_BYTE, 0,
intra_node_leader_commptr, errflag);
/*Free the displacement and recvcnts buffer*/ MPIU_Free(displs);
MPIU_Free(recvcnts);
} else { mpi_errno = MPIR_Gatherv(intra_tmp_buf, intra_sock_comm_size *
nbytes, MPI_BYTE, tmp_buf, recvcnts, displs, MPI_BYTE, 0,
intra_node_leader_commptr, errflag);
} } else {
if (intra_node_leader_rank == 0) { tmp_buf = intra_tmp_buf;
/*We have now completed the intra_sock gather and all the * socket
level leaders have data in their tmp_buf. So we * set sendbuf =
MPI_IN_PLACE and also explicity set the * is_data_avail=
TEMP_BUF_HAS_DATA*/ mpi_errno = MPIR_pt_pt_intra_gather(MPI_IN_PLACE,
(send_nbytes*intra_sock_comm_size), MPI_BYTE, recvbuf, recvcnt, recv‐
type, root, rank, tmp_buf, (send_nbytes*intra_sock_comm_size),
TEMP_BUF_HAS_DATA, intra_node_leader_commptr, intra_node_fn_ptr,
errflag); } else {
/*After the intra_sock gather, all the node level leaders * have the
data in intra_tmp_buf(sendbuf) and this is gathered into * tmp_buf.
Since the tmp_buf(in non-root processes) does not have * the data in
tmp_buf is_data_avail = TEMP_BUF_HAS_NO_DATA*/ mpi_errno =
MPIR_pt_pt_intra_gather(intra_tmp_buf,
(send_nbytes*intra_sock_comm_size), MPI_BYTE, recvbuf, recvcnt, recv‐
type, root, rank, tmp_buf, (send_nbytes*intra_sock_comm_size),
TEMP_BUF_HAS_NO_DATA, intra_node_leader_commptr, intra_node_fn_ptr,
errflag); } }
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } }
FN_FAIL
/*Free the intra socket leader buffers*/ if (intra_sock_rank == 0) { if
((intra_node_leader_rank != 0) && (intra_tmp_buf != NULL)) {
MPIU_Free(intra_tmp_buf); } } MPIU_CHKLMEM_FREEALL(); return
(mpi_errno); }
#undef FUNCNAME #define FUNCNAME MPIR_Limic_Gather_Scheme_LINEAR_PT
#undef FCNAME #define FCNAME MPIU_QUOTE(FUNCNAME) static int
MPIR_Limic_Gather_Scheme_LINEAR_PT( const void *sendbuf,int sendcnt,
MPI_Datatype sendtype, void *recvbuf, int recvcnt,MPI_Datatype recv‐
type, int root, MPID_Comm * comm_ptr, MV2_Gather_function_ptr
intra_node_fn_ptr, int *errflag) { void *intra_tmp_buf = NULL; int
rank; int local_size; int mpi_errno = MPI_SUCCESS; int recvtype_size =
0, sendtype_size = 0, nbytes=0; int sendtype_iscontig; int
intra_sock_rank=0, intra_sock_comm_size=0; int
intra_node_leader_rank=0; MPI_Aint sendtype_extent = 0, recvtype_extent
= 0; /* Datatype extent */ MPI_Aint true_lb, sendtype_true_extent,
recvtype_true_extent; MPI_Comm shmem_comm; MPID_Comm *shmem_commptr;
MPID_Comm *intra_sock_commptr = NULL, *intra_node_leader_commptr=NULL;
rank = comm_ptr->rank;
if (((rank == root) && (recvcnt == 0)) || ((rank != root) && (sendcnt
== 0))) { return MPI_SUCCESS; }
if (sendtype != MPI_DATATYPE_NULL) { MPIR_Datatype_iscontig(sendtype,
&sendtype_iscontig); MPID_Datatype_get_extent_macro(sendtype, send‐
type_extent); MPID_Datatype_get_size_macro(sendtype, sendtype_size);
MPIR_Type_get_true_extent_impl(sendtype, &true_lb, &send‐
type_true_extent); } if (recvtype != MPI_DATATYPE_NULL) {
MPID_Datatype_get_extent_macro(recvtype, recvtype_extent);
MPID_Datatype_get_size_macro(recvtype, recvtype_size);
MPIR_Type_get_true_extent_impl(recvtype, &true_lb, &recv‐
type_true_extent); }
/* extract the rank,size information for the intra-node * communicator
*/ shmem_comm = comm_ptr->ch.shmem_comm; MPID_Comm_get_ptr(shmem_comm,
shmem_commptr); local_size = shmem_commptr->local_size;
if (rank == root) { nbytes = recvcnt * recvtype_size;
} else { nbytes = sendcnt * sendtype_size; }
if(shmem_commptr->ch.use_intra_sock_comm == 1) {
MPID_Comm_get_ptr(shmem_commptr->ch.intra_sock_comm,
intra_sock_commptr);
MPID_Comm_get_ptr(shmem_commptr->ch.intra_sock_leader_comm,
intra_node_leader_commptr);
intra_sock_rank = intra_sock_commptr->rank; intra_sock_comm_size =
intra_sock_commptr->local_size; if(intra_sock_rank == 0) {
intra_node_leader_rank = intra_node_leader_commptr->rank; } }
if (intra_sock_rank == 0) { if (intra_node_leader_rank == 0) { /* Node
leaders, allocate large buffers which is used to gather * data for the
entire node. The same buffer is used for inter-node * gather as well.
This saves us a memcpy operation*/ if (rank == root) { intra_tmp_buf =
MPIU_Malloc(recvcnt * MPIR_MAX(recvtype_extent, recvtype_true_extent) *
local_size); } else { intra_tmp_buf = MPIU_Malloc(sendcnt *
MPIR_MAX(sendtype_extent, sendtype_true_extent) * local_size); } } else
{
/* Socket leader, allocate tmp_buffer */ if (rank == root) {
intra_tmp_buf = MPIU_Malloc(recvcnt * MPIR_MAX(recvtype_extent, recv‐
type_true_extent) * intra_sock_comm_size); } else { intra_tmp_buf =
MPIU_Malloc(sendcnt * MPIR_MAX(sendtype_extent, sendtype_true_extent) *
intra_sock_comm_size); } } if (intra_tmp_buf == NULL) { mpi_errno =
MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME,
__LINE__, MPI_ERR_OTHER, "**nomem", 0); return mpi_errno; } }
/*Intra socket gather*/ /*We are gathering the data into intra_tmp_buf
and the output * will be of MPI_BYTE datatype. Since the tmp_buf has no
* local data, we pass is_data_avail = TEMP_BUF_HAS_NO_DATA*/ mpi_errno
= MPIR_pt_pt_intra_gather(sendbuf, sendcnt, sendtype, recvbuf, recvcnt,
recvtype, root, rank, intra_tmp_buf, nbytes, TEMP_BUF_HAS_NO_DATA,
intra_sock_commptr, intra_node_fn_ptr, errflag);
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
/*Inter socket gather*/ if(intra_sock_rank == 0) { if
(intra_node_leader_rank == 0) { tmp_buf = intra_tmp_buf; } mpi_errno =
MPIR_Limic_Gather_OSU(tmp_buf, (local_size * nbytes), intra_tmp_buf,
(intra_sock_comm_size * nbytes), intra_node_leader_commptr); }
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
FN_FAIL
/*Free the intra socket leader buffers*/ if (intra_sock_rank == 0) { if
((intra_node_leader_rank != 0) && (intra_tmp_buf != NULL)) {
MPIU_Free(intra_tmp_buf); } }
return (mpi_errno); }
#undef FUNCNAME #define FUNCNAME MPIR_Limic_Gather_Scheme_LINEAR_LINEAR
#undef FCNAME #define FCNAME MPIU_QUOTE(FUNCNAME) static int
MPIR_Limic_Gather_Scheme_LINEAR_LINEAR( const void *sendbuf,int send‐
cnt, MPI_Datatype sendtype, void *recvbuf, int recvcnt,MPI_Datatype
recvtype, int root, MPID_Comm * comm_ptr, int *errflag) { void
*intra_tmp_buf = NULL; void *local_sendbuf=NULL; int rank; int
local_rank, local_size; int mpi_errno = MPI_SUCCESS; int recvtype_size
= 0, sendtype_size = 0, nbytes=0; int sendtype_iscontig; int
intra_sock_rank=0, intra_sock_comm_size=0; int
intra_node_leader_rank=0; int send_nbytes=0; MPI_Aint recvtype_extent =
0; /* Datatype extent */ MPI_Aint true_lb, sendtype_true_extent, recv‐
type_true_extent; MPI_Comm shmem_comm; MPID_Comm *shmem_commptr;
MPID_Comm *intra_sock_commptr = NULL, *intra_node_leader_commptr=NULL;
MPIU_CHKLMEM_DECL(1);
rank = comm_ptr->rank;
if (((rank == root) && (recvcnt == 0)) || ((rank != root) && (sendcnt
== 0))) { return MPI_SUCCESS; }
if (sendtype != MPI_DATATYPE_NULL) { MPIR_Datatype_iscontig(sendtype,
&sendtype_iscontig); MPID_Datatype_get_size_macro(sendtype, send‐
type_size); MPIR_Type_get_true_extent_impl(sendtype, &true_lb, &send‐
type_true_extent); } if (recvtype != MPI_DATATYPE_NULL) {
MPID_Datatype_get_extent_macro(recvtype, recvtype_extent);
MPID_Datatype_get_size_macro(recvtype, recvtype_size);
MPIR_Type_get_true_extent_impl(recvtype, &true_lb, &recv‐
type_true_extent); }
/* extract the rank,size information for the intra-node * communicator
*/ shmem_comm = comm_ptr->ch.shmem_comm; MPID_Comm_get_ptr(shmem_comm,
shmem_commptr); local_rank = shmem_commptr->rank; local_size =
shmem_commptr->local_size;
if (rank == root) { nbytes = recvcnt * recvtype_size;
} else { nbytes = sendcnt * sendtype_size; }
if(shmem_commptr->ch.use_intra_sock_comm == 1) {
MPID_Comm_get_ptr(shmem_commptr->ch.intra_sock_comm,
intra_sock_commptr);
MPID_Comm_get_ptr(shmem_commptr->ch.intra_sock_leader_comm,
intra_node_leader_commptr);
intra_sock_rank = intra_sock_commptr->rank; intra_sock_comm_size =
intra_sock_commptr->local_size; if(intra_sock_rank == 0) {
intra_node_leader_rank = intra_node_leader_commptr->rank; } }
/*Pack data for non-contiguous buffer*/ if ((!sendtype_iscontig) &&
(sendbuf != MPI_IN_PLACE)) {
int sendtype_size=0; int position = 0; MPIR_Pack_size_impl(1, sendtype,
&sendtype_size); send_nbytes= sendcnt * sendtype_size;
MPIU_CHKLMEM_MALLOC(local_sendbuf, void *, send_nbytes, mpi_errno,
"local_sendbuf"); MPIR_Pack_impl(sendbuf, sendcnt, sendtype,
local_sendbuf, send_nbytes, &position); } else { local_sendbuf = (void
*)sendbuf; send_nbytes = nbytes; }
if (intra_sock_rank == 0) { if (intra_node_leader_rank == 0) { /* Node
leaders, allocate large buffers which is used to gather * data for the
entire node. The same buffer is used for inter-node * gather as well.
This saves us a memcpy operation*/ if (rank == root) { intra_tmp_buf =
MPIU_Malloc(recvcnt * MPIR_MAX(recvtype_extent, recvtype_true_extent) *
local_size); } else { intra_tmp_buf = MPIU_Malloc(send_nbytes *
local_size); }
} else {
/* Socket leader, allocate tmp_buffer */ if (rank == root) {
intra_tmp_buf = MPIU_Malloc(recvcnt * MPIR_MAX(recvtype_extent, recv‐
type_true_extent) * intra_sock_comm_size); } else { intra_tmp_buf =
MPIU_Malloc(send_nbytes * intra_sock_comm_size); }
} if (intra_tmp_buf == NULL) { mpi_errno = MPIR_Err_cre‐
ate_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__,
MPI_ERR_OTHER, "**nomem", 0); return mpi_errno; }
/*Local copy of buffer*/ if(sendbuf != MPI_IN_PLACE) { MPIU_Mem‐
cpy(intra_tmp_buf, local_sendbuf, send_nbytes); } else { MPIR_Local‐
copy(((char *) recvbuf +rank * recvcnt * recvtype_extent), recvcnt,
recvtype, intra_tmp_buf, send_nbytes, MPI_BYTE); } }
if(local_rank !=0 && sendbuf == MPI_IN_PLACE) { mpi_errno =
MPIR_Limic_Gather_OSU(intra_tmp_buf, (intra_sock_comm_size *
send_nbytes), (recvbuf + (rank*nbytes)), nbytes, intra_sock_commptr); }
else { mpi_errno = MPIR_Limic_Gather_OSU(intra_tmp_buf,
(intra_sock_comm_size * send_nbytes), local_sendbuf, send_nbytes,
intra_sock_commptr ); } if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
/*Inter socket gather*/ if(intra_sock_rank == 0) { if
(intra_node_leader_rank == 0) { tmp_buf = intra_tmp_buf; } mpi_errno =
MPIR_Limic_Gather_OSU(tmp_buf, (local_size * send_nbytes),
intra_tmp_buf, (intra_sock_comm_size * send_nbytes),
intra_node_leader_commptr ); }
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
FN_FAIL
/*Free the intra socket leader buffers*/ if (intra_sock_rank == 0) { if
((intra_node_leader_rank != 0) && (intra_tmp_buf != NULL)) {
MPIU_Free(intra_tmp_buf); } }
MPIU_CHKLMEM_FREEALL(); return (mpi_errno); }
#undef FUNCNAME #define FUNCNAME MPIR_Limic_Gather_Scheme_SINGLE_LEADER
#undef FCNAME #define FCNAME MPIU_QUOTE(FUNCNAME) static int
MPIR_Limic_Gather_Scheme_SINGLE_LEADER( const void *sendbuf,int send‐
cnt, MPI_Datatype sendtype, void *recvbuf, int recvcnt,MPI_Datatype
recvtype, int root, MPID_Comm * comm_ptr, int *errflag) { void
*local_sendbuf=NULL; int rank; int local_rank, local_size; int
mpi_errno = MPI_SUCCESS; int recvtype_size = 0, sendtype_size = 0,
nbytes=0; int sendtype_iscontig; int send_nbytes=0; MPI_Aint recv‐
type_extent = 0; /* Datatype extent */ MPI_Aint true_lb, send‐
type_true_extent, recvtype_true_extent; MPI_Comm shmem_comm; MPID_Comm
*shmem_commptr; MPIU_CHKLMEM_DECL(1);
rank = comm_ptr->rank;
if (((rank == root) && (recvcnt == 0)) || ((rank != root) && (sendcnt
== 0))) { return MPI_SUCCESS; }
if (sendtype != MPI_DATATYPE_NULL) { MPIR_Datatype_iscontig(sendtype,
&sendtype_iscontig); MPID_Datatype_get_size_macro(sendtype, send‐
type_size); MPIR_Type_get_true_extent_impl(sendtype, &true_lb, &send‐
type_true_extent); } if (recvtype != MPI_DATATYPE_NULL) {
MPID_Datatype_get_extent_macro(recvtype, recvtype_extent);
MPID_Datatype_get_size_macro(recvtype, recvtype_size);
MPIR_Type_get_true_extent_impl(recvtype, &true_lb, &recv‐
type_true_extent); }
/* extract the rank,size information for the intra-node * communicator
*/ shmem_comm = comm_ptr->ch.shmem_comm; MPID_Comm_get_ptr(shmem_comm,
shmem_commptr); local_rank = shmem_commptr->rank; local_size =
shmem_commptr->local_size;
if (rank == root) { nbytes = recvcnt * recvtype_size;
} else { nbytes = sendcnt * sendtype_size; }
/*Pack data for non-contiguous buffer*/ if ((!sendtype_iscontig) &&
(sendbuf != MPI_IN_PLACE)) {
int sendtype_size=0; int position = 0; MPIR_Pack_size_impl(1, sendtype,
&sendtype_size); send_nbytes= sendcnt * sendtype_size;
MPIU_CHKLMEM_MALLOC(local_sendbuf, void *, send_nbytes, mpi_errno,
"local_sendbuf"); MPIR_Pack_impl(sendbuf, sendcnt, sendtype,
local_sendbuf, send_nbytes, &position); } else { local_sendbuf = (void
*)sendbuf; send_nbytes = nbytes; }
if (local_rank == 0) { /* Node leader, allocate tmp_buffer */ if (rank
== root) { tmp_buf = MPIU_Malloc(recvcnt * MPIR_MAX(recvtype_extent,
recvtype_true_extent) * local_size); } else { tmp_buf = MPIU_Malloc(
send_nbytes * local_size); } if (tmp_buf == NULL) { mpi_errno =
MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME,
__LINE__, MPI_ERR_OTHER, "**nomem", 0); return mpi_errno; }
/*Local copy of buffer*/ if(sendbuf != MPI_IN_PLACE) { MPIU_Mem‐
cpy(tmp_buf, local_sendbuf, send_nbytes); } else { MPIR_Local‐
copy(((char *) recvbuf +rank * recvcnt * recvtype_extent), recvcnt,
recvtype, tmp_buf, send_nbytes, MPI_BYTE); } }
if(local_rank !=0 && sendbuf == MPI_IN_PLACE) { mpi_errno =
MPIR_Limic_Gather_OSU(tmp_buf, (local_size * send_nbytes), (recvbuf +
(rank*nbytes)), nbytes, shmem_commptr ); } else { mpi_errno =
MPIR_Limic_Gather_OSU(tmp_buf, (local_size * send_nbytes), local_send‐
buf, nbytes, shmem_commptr ); }
if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
FN_FAIL
MPIU_CHKLMEM_FREEALL(); return (mpi_errno); }
#undef FUNCNAME #define FUNCNAME MPIR_Intra_node_LIMIC_Gather_MV2
#undef FCNAME #define FCNAME MPIU_QUOTE(FUNCNAME) int
MPIR_Intra_node_LIMIC_Gather_MV2( const void *sendbuf,int sendcnt,
MPI_Datatype sendtype, void *recvbuf, int recvcnt,MPI_Datatype recv‐
type, int root, MPID_Comm * comm_ptr, int *errflag) { int mpi_errno =
MPI_SUCCESS; MPI_Comm shmem_comm; MPID_Comm *shmem_commptr;
/* extract the rank,size information for the intra-node * communicator
*/ shmem_comm = comm_ptr->ch.shmem_comm; MPID_Comm_get_ptr(shmem_comm,
shmem_commptr);
/*This case uses the PT-PT scheme with binomial * algorithm */
if((shmem_commptr->ch.use_intra_sock_comm == 1) && (num_scheme ==
USE_GATHER_PT_PT_BINOMIAL)) {
mpi_errno = MPIR_Limic_Gather_Scheme_PT_PT(sendbuf, sendcnt, sendtype,
recvbuf, recvcnt, recvtype, root, comm_ptr, MPIR_Gather_intra,
errflag); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } } /*This case
uses the PT-PT scheme with DIRECT * algorithm */ else
if((shmem_commptr->ch.use_intra_sock_comm == 1) && (num_scheme ==
USE_GATHER_PT_PT_DIRECT)) {
mpi_errno = MPIR_Limic_Gather_Scheme_PT_PT(sendbuf, sendcnt, sendtype,
recvbuf, recvcnt, recvtype, root, comm_ptr, MPIR_Gather_MV2_Direct,
errflag); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } } /*This case
uses the PT-LINEAR scheme with binomial * algorithm */ else
if((shmem_commptr->ch.use_intra_sock_comm == 1) && (num_scheme ==
USE_GATHER_PT_LINEAR_BINOMIAL)) {
mpi_errno = MPIR_Limic_Gather_Scheme_PT_LINEAR(sendbuf, sendcnt, send‐
type, recvbuf, recvcnt, recvtype, root, comm_ptr, MPIR_Gather_intra,
errflag); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
} /*This case uses the PT-LINEAR scheme with DIRECT * algorithm */ else
if((shmem_commptr->ch.use_intra_sock_comm == 1) && (num_scheme ==
USE_GATHER_PT_LINEAR_DIRECT)) {
mpi_errno = MPIR_Limic_Gather_Scheme_PT_LINEAR(sendbuf, sendcnt, send‐
type, recvbuf, recvcnt, recvtype, root, comm_ptr,
MPIR_Gather_MV2_Direct, errflag); if (mpi_errno) {
MPIU_ERR_POP(mpi_errno); }
} /*This case uses the LINEAR-PT scheme with binomial * algorithm */
else if((shmem_commptr->ch.use_intra_sock_comm == 1) && (num_scheme ==
USE_GATHER_LINEAR_PT_BINOMIAL)) {
mpi_errno = MPIR_Limic_Gather_Scheme_LINEAR_PT(sendbuf, sendcnt, send‐
type, recvbuf, recvcnt, recvtype, root, comm_ptr, MPIR_Gather_intra,
errflag); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
} /*This case uses the LINEAR-PT scheme with DIRECT * algorithm */ else
if((shmem_commptr->ch.use_intra_sock_comm == 1) && (num_scheme ==
USE_GATHER_LINEAR_PT_DIRECT)) {
mpi_errno = MPIR_Limic_Gather_Scheme_LINEAR_PT(sendbuf, sendcnt, send‐
type, recvbuf, recvcnt, recvtype, root, comm_ptr,
MPIR_Gather_MV2_Direct, errflag); if (mpi_errno) {
MPIU_ERR_POP(mpi_errno); }
} else if((shmem_commptr->ch.use_intra_sock_comm == 1) && (num_scheme
== USE_GATHER_LINEAR_LINEAR)) {
mpi_errno = MPIR_Limic_Gather_Scheme_LINEAR_LINEAR(sendbuf, sendcnt,
sendtype, recvbuf, recvcnt, recvtype, root, comm_ptr, errflag); if
(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
} else if(((comm_ptr->ch.shmem_coll_ok == 1) ||
(shmem_commptr->ch.use_intra_sock_comm == 1)) && (num_scheme ==
USE_GATHER_SINGLE_LEADER)) {
mpi_errno = MPIR_Limic_Gather_Scheme_SINGLE_LEADER(sendbuf, sendcnt,
sendtype, recvbuf, recvcnt, recvtype, root, comm_ptr, errflag); if
(mpi_errno) { MPIU_ERR_POP(mpi_errno); } } else { /*This is a invalid
case, if we are in LIMIC Gather * the code flow should be in one of the
if-else case*/ mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**badcase", 0);
}
FN_FAIL
return (mpi_errno); }
#endif /*#if defined(_SMP_LIMIC_) */
#endif /* #if defined(_OSU_MVAPICH_) || defined(_OSU_PSM_) */
#undef FUNCNAME #define FUNCNAME MPIR_Gather_MV2 #undef FCNAME #define
FCNAME MPIU_QUOTE(FUNCNAME) int MPIR_Gather_MV2(const void *sendbuf,
int sendcnt, MPI_Datatype sendtype, void *recvbuf, int recvcnt,
MPI_Datatype recvtype, int root, MPID_Comm * comm_ptr, int *errflag) {
int mpi_errno = MPI_SUCCESS; #if defined(_OSU_MVAPICH_) ||
defined(_OSU_PSM_) int range = 0; int range_threshold = 0; int
range_intra_threshold = 0; int nbytes = 0; int comm_size = 0; int recv‐
type_size, sendtype_size; int rank = -1; #endif
/* #if defined(_OSU_MVAPICH_) */ MPIU_THREADPRIV_DECL;
MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER(comm_ptr); #if defined(_OSU_MVA‐
PICH_) || defined(_OSU_PSM_) comm_size = comm_ptr->local_size; rank =
comm_ptr->rank; #endif /* #if
defined(_OSU_MVAPICH_) */
MPIU_THREADPRIV_GET; #if defined(_OSU_MVAPICH_) || defined(_OSU_PSM_)
if (rank == root) { MPID_Datatype_get_size_macro(recvtype, recv‐
type_size); nbytes = recvcnt * recvtype_size; } else {
MPID_Datatype_get_size_macro(sendtype, sendtype_size); nbytes = sendcnt
* sendtype_size; } /* Search for the corresponding system size inside
the tuning table */ while ((range < (mv2_size_gather_tuning_table - 1))
&& (comm_size > mv2_gather_thresholds_table[range].numproc)) { range++;
} /* Search for corresponding inter-leader function */ while
((range_threshold < (mv2_gather_thresholds_table[range].size_inter_ta‐
ble - 1)) && (nbytes > mv2_gather_thresholds_ta‐
ble[range].inter_leader[range_threshold].max) && (mv2_gather_thresh‐
olds_table[range].inter_leader[range_threshold].max != -1)) {
range_threshold++; }
/* Search for corresponding intra node function */ while
((range_intra_threshold < (mv2_gather_thresholds_ta‐
ble[range].size_intra_table - 1)) && (nbytes > mv2_gather_thresh‐
olds_table[range].intra_node[range_intra_threshold].max) &&
(mv2_gather_thresholds_table[range].intra_node[range_intra_thresh‐
old].max != -1)) { range_intra_threshold++; } #if defined(_SMP_LIMIC_)
int range_limic_scheme = 0; if (use_limic_gather){ /* Search for corre‐
sponding limic-scheme function */ while ((range_limic_scheme <
(mv2_gather_thresholds_table[range].nb_limic_scheme - 1)) && (nbytes >
mv2_gather_thresholds_ta‐
ble[range].limic_gather_scheme[range_limic_scheme].max) &&
(mv2_gather_thresholds_ta‐
ble[range].limic_gather_scheme[range_limic_scheme].max != -1)) {
range_limic_scheme++; } num_scheme = mv2_gather_thresholds_ta‐
ble[range].limic_gather_scheme[range_limic_scheme].scheme; } #endif
/*#if defined(_SMP_LIMIC_)*/ #ifdef _ENABLE_CUDA_ MPI_Aint send‐
type_extent; MPID_Datatype_get_extent_macro(sendtype, sendtype_extent);
int recvtype_extent = 0; MPID_Datatype_get_extent_macro(recvtype, recv‐
type_extent); int send_mem_type = 0; int recv_mem_type = 0; if
(rdma_enable_cuda) { send_mem_type = is_device_buffer(sendbuf);
recv_mem_type = is_device_buffer(recvbuf); } if (rdma_enable_cuda &&
(send_mem_type || recv_mem_type) && rdma_cuda_use_naive && (nbytes <=
rdma_cuda_gather_naive_limit/comm_size)) { if (sendbuf != MPI_IN_PLACE)
{ if (rank == root) { mpi_errno = cuda_stage_alloc (NULL, 0, &recvbuf,
recvcnt*recvtype_extent*comm_size, 0, recv_mem_type, 0); } else {
mpi_errno = cuda_stage_alloc ((void **)&sendbuf, sendcnt*send‐
type_extent, NULL, 0, send_mem_type, 0, 0); } } else { mpi_errno =
cuda_stage_alloc ((void **)&sendbuf, recvcnt*recvtype_extent, &recvbuf,
recvcnt*recvtype_extent*comm_size, 0, recv_mem_type, rank*recvcnt*recv‐
type_extent); } if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } }
/* Use Direct algorithm in cuda configuration */ if (rdma_enable_cuda
&& (((nbytes > rdma_cuda_gather_naive_limit/comm_size) &&
rdma_cuda_use_naive) || !rdma_cuda_use_naive)) { mpi_errno =
MPIR_Gather_MV2_Direct(sendbuf, sendcnt, sendtype, recvbuf, recvcnt,
recvtype, root, comm_ptr, errflag); } else #endif /*_ENABLE_CUDA_*/
if (comm_ptr->ch.is_global_block == 1 && mv2_use_direct_gather == 1 &&
mv2_use_two_level_gather == 1 && comm_ptr->ch.shmem_coll_ok == 1) { /*
Set intra-node function pt for gather_two_level */
MV2_Gather_intra_node_function = mv2_gather_thresholds_ta‐
ble[range].intra_node[range_intra_threshold]. MV2_pt_Gather_function;
/* Set inter-leader pt */ MV2_Gather_inter_leader_function =
mv2_gather_thresholds_table[range].inter_leader[range_threshold].
MV2_pt_Gather_function; /* We call Gather function */ mpi_errno =
MV2_Gather_inter_leader_function(sendbuf, sendcnt, sendtype, recvbuf,
recvcnt, recvtype, root, comm_ptr, errflag);
} else { #endif /* #if defined(_OSU_MVAPICH_)
*/ mpi_errno = MPIR_Gather_intra(sendbuf, sendcnt, sendtype, recvbuf,
recvcnt, recvtype, root, comm_ptr, errflag); #if defined(_OSU_MVAPICH_)
|| defined(_OSU_PSM_) }
#ifdef _ENABLE_CUDA_ if (rdma_enable_cuda && (send_mem_type ||
recv_mem_type) && rdma_cuda_use_naive && (nbytes <=
rdma_cuda_gather_naive_limit/comm_size)){ if (rank == root) {
cuda_stage_free (NULL, &recvbuf, recvcnt*recvtype_extent*comm_size, 0,
recv_mem_type); } else { cuda_stage_free ((void **)&sendbuf, NULL, 0,
send_mem_type, 0); } } #endif /*#ifdef
_ENABLE_CUDA_*/
#endif /* #if defined(_OSU_MVAPICH_) */ if
(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
/* check if multiple threads are calling this collective function */
MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT(comm_ptr);
FN_EXIT
return mpi_errno;
FN_FAIL
goto fn_exit; }
/* end:nested */
LOCATION
src/mpi/coll/gather_osu.c
8/22/2013 ack(3)
[top]
List of man pages available for RedHat
Copyright (c) for man pages and the logo by the respective OS vendor.
For those who want to learn more, the polarhome community provides shell access and support.
[legal]
[privacy]
[GNU]
[policy]
[cookies]
[netiquette]
[sponsors]
[FAQ]
Polarhome, production since 1999.
Member of Polarhome portal.
Based on Fawad Halim's script.
....................................................................
|
Vote for polarhome
|