/* @cond INNERDOC */
/*!
 @file
 @brief
 Performance kernels dispatching code, for each type, submatrix size, operation.
 But for block compressed sparse stripes format.
 Kernels unrolled, with no loops, for only user-specified blockings.
 */

/*

Copyright (C) 2008-2018 Michele Martone

This file is part of librsb.

librsb is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.

librsb is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
License for more details.

You should have received a copy of the GNU Lesser General Public
License along with librsb; see the file COPYING.
If not, see <http://www.gnu.org/licenses/>.

*/
/*
 The code in this file was generated automatically by an M4 script. 
 It is not meant to be used as an API (Application Programming Interface).
 p.s.: right now, only row major matrix access is considered.

 */
#include "rsb_internals.h"
#include "rsb.h"


rsb_err_t rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_C__tT_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_C__tT_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tT_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tT_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_C__tC_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_C__tC_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tC_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tC_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_C__tT_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sS_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_double_C__tT_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sS_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tT_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sS_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tT_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sS_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_double_C__tC_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sS_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_double_C__tC_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sS_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tC_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sS_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tC_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sS_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += fabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_C__tT_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sH_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_double_C__tT_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sH_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tT_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sH_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tT_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sH_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_double_C__tC_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sH_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_double_C__tC_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sH_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tC_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sH_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_double_H__tC_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sH_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_C__tT_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_C__tT_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_H__tT_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_H__tT_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_C__tC_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_C__tC_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_H__tC_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_H__tC_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_C__tT_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sS_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_double_C__tT_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sS_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_double_H__tT_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sS_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_double_H__tT_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sS_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_double_C__tC_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sS_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_double_C__tC_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sS_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_double_H__tC_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sS_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_double_H__tC_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sS_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_C__tT_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sH_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_double_C__tT_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sH_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_double_H__tT_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sH_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_double_H__tT_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sH_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_double_C__tC_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sH_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_double_C__tC_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sH_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_double_H__tC_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sH_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_double_H__tC_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sH_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_scale_double_C__tN_r1_c1_uu_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_C__tN_r1_c1_uu_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_H__tN_r1_c1_uu_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_H__tN_r1_c1_uu_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_C__tT_r1_c1_uu_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_C__tT_r1_c1_uu_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_H__tT_r1_c1_uu_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_H__tT_r1_c1_uu_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_C__tC_r1_c1_uu_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_C__tC_r1_c1_uu_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_H__tC_r1_c1_uu_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_H__tC_r1_c1_uu_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_C__tN_r1_c1_uu_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_C__tN_r1_c1_uu_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_H__tN_r1_c1_uu_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_H__tN_r1_c1_uu_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_C__tT_r1_c1_uu_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_double_C__tN_r1_c1_uu_sS_dE_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_double_C__tT_r1_c1_uu_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_double_C__tN_r1_c1_uu_sS_dI_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_double_H__tT_r1_c1_uu_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_double_H__tN_r1_c1_uu_sS_dE_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_double_H__tT_r1_c1_uu_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_double_H__tN_r1_c1_uu_sS_dI_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_double_C__tC_r1_c1_uu_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_double_C__tN_r1_c1_uu_sS_dE_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_double_C__tC_r1_c1_uu_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_double_C__tN_r1_c1_uu_sS_dI_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_double_H__tC_r1_c1_uu_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_double_H__tN_r1_c1_uu_sS_dE_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_double_H__tC_r1_c1_uu_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_double_H__tN_r1_c1_uu_sS_dI_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_double_C__tN_r1_c1_uu_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_C__tN_r1_c1_uu_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_H__tN_r1_c1_uu_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_H__tN_r1_c1_uu_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_C__tT_r1_c1_uu_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_double_C__tN_r1_c1_uu_sH_dE_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_double_C__tT_r1_c1_uu_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_double_C__tN_r1_c1_uu_sH_dI_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_double_H__tT_r1_c1_uu_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_double_H__tN_r1_c1_uu_sH_dE_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_double_H__tT_r1_c1_uu_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_double_H__tN_r1_c1_uu_sH_dI_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_double_C__tC_r1_c1_uu_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_double_C__tN_r1_c1_uu_sH_dE_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_double_C__tC_r1_c1_uu_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_double_C__tN_r1_c1_uu_sH_dI_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_double_H__tC_r1_c1_uu_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_double_H__tN_r1_c1_uu_sH_dE_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_double_H__tC_r1_c1_uu_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_double_H__tN_r1_c1_uu_sH_dI_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tT_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tT_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tT_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tT_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tC_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tC_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tC_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tC_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tT_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sS_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tT_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sS_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tT_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sS_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tT_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sS_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tC_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sS_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tC_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sS_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tC_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sS_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tC_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sS_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += fabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tT_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sH_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tT_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sH_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tT_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sH_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tT_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sH_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tC_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sH_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_float_C__tC_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sH_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tC_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sH_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_infty_norm_float_H__tC_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sH_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_C__tT_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_C__tT_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_H__tT_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_H__tT_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_C__tC_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_C__tC_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_H__tC_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_H__tC_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(j*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_C__tT_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sS_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_float_C__tT_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sS_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_float_H__tT_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sS_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_float_H__tT_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sS_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_float_C__tC_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sS_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_float_C__tC_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sS_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_float_H__tC_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sS_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_float_H__tC_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sS_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float *local_row_sums = row_sums+(1*(i*1));
{

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_C__tT_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sH_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_float_C__tT_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sH_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_float_H__tT_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sH_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_float_H__tT_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sH_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_float_C__tC_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sH_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_float_C__tC_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sH_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_float_H__tC_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sH_dE_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_rowssums_float_H__tC_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sH_dI_uG(VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags);
}



rsb_err_t rsb__BCSR_scale_float_C__tN_r1_c1_uu_sU_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_C__tN_r1_c1_uu_sU_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_H__tN_r1_c1_uu_sU_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_H__tN_r1_c1_uu_sU_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_C__tT_r1_c1_uu_sU_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_C__tT_r1_c1_uu_sU_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_H__tT_r1_c1_uu_sU_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_H__tT_r1_c1_uu_sU_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_C__tC_r1_c1_uu_sU_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_C__tC_r1_c1_uu_sU_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_H__tC_r1_c1_uu_sU_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_H__tC_r1_c1_uu_sU_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_C__tN_r1_c1_uu_sS_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_C__tN_r1_c1_uu_sS_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_H__tN_r1_c1_uu_sS_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_H__tN_r1_c1_uu_sS_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_C__tT_r1_c1_uu_sS_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_float_C__tN_r1_c1_uu_sS_dE_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_float_C__tT_r1_c1_uu_sS_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_float_C__tN_r1_c1_uu_sS_dI_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_float_H__tT_r1_c1_uu_sS_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_float_H__tN_r1_c1_uu_sS_dE_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_float_H__tT_r1_c1_uu_sS_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_float_H__tN_r1_c1_uu_sS_dI_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_float_C__tC_r1_c1_uu_sS_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_float_C__tN_r1_c1_uu_sS_dE_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_float_C__tC_r1_c1_uu_sS_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_float_C__tN_r1_c1_uu_sS_dI_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_float_H__tC_r1_c1_uu_sS_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_float_H__tN_r1_c1_uu_sS_dE_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_float_H__tC_r1_c1_uu_sS_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_float_H__tN_r1_c1_uu_sS_dI_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_float_C__tN_r1_c1_uu_sH_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_C__tN_r1_c1_uu_sH_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_H__tN_r1_c1_uu_sH_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_H__tN_r1_c1_uu_sH_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_C__tT_r1_c1_uu_sH_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_float_C__tN_r1_c1_uu_sH_dE_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_float_C__tT_r1_c1_uu_sH_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_float_C__tN_r1_c1_uu_sH_dI_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_float_H__tT_r1_c1_uu_sH_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_float_H__tN_r1_c1_uu_sH_dE_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_float_H__tT_r1_c1_uu_sH_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_float_H__tN_r1_c1_uu_sH_dI_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_float_C__tC_r1_c1_uu_sH_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_float_C__tN_r1_c1_uu_sH_dE_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_float_C__tC_r1_c1_uu_sH_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_float_C__tN_r1_c1_uu_sH_dI_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_float_H__tC_r1_c1_uu_sH_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_float_H__tN_r1_c1_uu_sH_dE_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_scale_float_H__tC_r1_c1_uu_sH_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCSR_scale_float_H__tN_r1_c1_uu_sH_dI_uG(VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors);
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tN_r1_c1_uu_sU_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tN_r1_c1_uu_sU_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tN_r1_c1_uu_sU_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tN_r1_c1_uu_sU_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tT_r1_c1_uu_sU_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tT_r1_c1_uu_sU_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tT_r1_c1_uu_sU_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tT_r1_c1_uu_sU_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tC_r1_c1_uu_sU_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(conjf(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tC_r1_c1_uu_sU_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(conjf(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tC_r1_c1_uu_sU_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(conjf(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tC_r1_c1_uu_sU_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(conjf(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tN_r1_c1_uu_sS_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tN_r1_c1_uu_sS_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tN_r1_c1_uu_sS_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tN_r1_c1_uu_sS_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tT_r1_c1_uu_sS_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tT_r1_c1_uu_sS_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tT_r1_c1_uu_sS_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tT_r1_c1_uu_sS_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tC_r1_c1_uu_sS_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(conjf(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tC_r1_c1_uu_sS_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(conjf(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tC_r1_c1_uu_sS_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(conjf(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tC_r1_c1_uu_sS_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(conjf(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tN_r1_c1_uu_sH_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tN_r1_c1_uu_sH_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tN_r1_c1_uu_sH_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tN_r1_c1_uu_sH_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tT_r1_c1_uu_sH_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tT_r1_c1_uu_sH_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tT_r1_c1_uu_sH_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tT_r1_c1_uu_sH_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tC_r1_c1_uu_sH_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(conjf(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_C__tC_r1_c1_uu_sH_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(conjf(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tC_r1_c1_uu_sH_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(conjf(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_float_complex_H__tC_r1_c1_uu_sH_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += cabsf(conjf(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tN_r1_c1_uu_sU_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tN_r1_c1_uu_sU_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tN_r1_c1_uu_sU_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tN_r1_c1_uu_sU_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tT_r1_c1_uu_sU_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tT_r1_c1_uu_sU_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tT_r1_c1_uu_sU_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tT_r1_c1_uu_sU_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tC_r1_c1_uu_sU_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += conjf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tC_r1_c1_uu_sU_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += conjf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tC_r1_c1_uu_sU_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += conjf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tC_r1_c1_uu_sU_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += conjf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tN_r1_c1_uu_sS_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tN_r1_c1_uu_sS_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tN_r1_c1_uu_sS_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tN_r1_c1_uu_sS_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tT_r1_c1_uu_sS_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tT_r1_c1_uu_sS_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tT_r1_c1_uu_sS_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tT_r1_c1_uu_sS_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tC_r1_c1_uu_sS_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += conjf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tC_r1_c1_uu_sS_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += conjf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tC_r1_c1_uu_sS_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += conjf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tC_r1_c1_uu_sS_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += conjf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tN_r1_c1_uu_sH_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tN_r1_c1_uu_sH_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tN_r1_c1_uu_sH_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tN_r1_c1_uu_sH_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tT_r1_c1_uu_sH_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tT_r1_c1_uu_sH_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tT_r1_c1_uu_sH_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tT_r1_c1_uu_sH_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tC_r1_c1_uu_sH_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += conjf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_C__tC_r1_c1_uu_sH_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += conjf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tC_r1_c1_uu_sH_dE_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += conjf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_float_complex_H__tC_r1_c1_uu_sH_dI_uG(const float complex * VA, float complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const float complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			float complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register float complex sum_0=0;
	
	sum_0 += conjf(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tN_r1_c1_uu_sU_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tN_r1_c1_uu_sU_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tN_r1_c1_uu_sU_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tN_r1_c1_uu_sU_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tT_r1_c1_uu_sU_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tT_r1_c1_uu_sU_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tT_r1_c1_uu_sU_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tT_r1_c1_uu_sU_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tC_r1_c1_uu_sU_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tC_r1_c1_uu_sU_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tC_r1_c1_uu_sU_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tC_r1_c1_uu_sU_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tN_r1_c1_uu_sS_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tN_r1_c1_uu_sS_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tN_r1_c1_uu_sS_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tN_r1_c1_uu_sS_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tT_r1_c1_uu_sS_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tT_r1_c1_uu_sS_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tT_r1_c1_uu_sS_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tT_r1_c1_uu_sS_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tC_r1_c1_uu_sS_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tC_r1_c1_uu_sS_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tC_r1_c1_uu_sS_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tC_r1_c1_uu_sS_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tN_r1_c1_uu_sH_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tN_r1_c1_uu_sH_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tN_r1_c1_uu_sH_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tN_r1_c1_uu_sH_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tT_r1_c1_uu_sH_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tT_r1_c1_uu_sH_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tT_r1_c1_uu_sH_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tT_r1_c1_uu_sH_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tC_r1_c1_uu_sH_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_C__tC_r1_c1_uu_sH_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tC_r1_c1_uu_sH_dE_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_float_complex_H__tC_r1_c1_uu_sH_dI_uG(float complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type float complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	float complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const float complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tN_r1_c1_uu_sU_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tN_r1_c1_uu_sU_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tN_r1_c1_uu_sU_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tN_r1_c1_uu_sU_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tT_r1_c1_uu_sU_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tT_r1_c1_uu_sU_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tT_r1_c1_uu_sU_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tT_r1_c1_uu_sU_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tC_r1_c1_uu_sU_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(conj(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tC_r1_c1_uu_sU_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(conj(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tC_r1_c1_uu_sU_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(conj(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tC_r1_c1_uu_sU_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(conj(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tN_r1_c1_uu_sS_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tN_r1_c1_uu_sS_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tN_r1_c1_uu_sS_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tN_r1_c1_uu_sS_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tT_r1_c1_uu_sS_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tT_r1_c1_uu_sS_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tT_r1_c1_uu_sS_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tT_r1_c1_uu_sS_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tC_r1_c1_uu_sS_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(conj(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tC_r1_c1_uu_sS_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(conj(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tC_r1_c1_uu_sS_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(conj(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tC_r1_c1_uu_sS_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(conj(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tN_r1_c1_uu_sH_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tN_r1_c1_uu_sH_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tN_r1_c1_uu_sH_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tN_r1_c1_uu_sH_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tT_r1_c1_uu_sH_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tT_r1_c1_uu_sH_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tT_r1_c1_uu_sH_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tT_r1_c1_uu_sH_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tC_r1_c1_uu_sH_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(conj(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_C__tC_r1_c1_uu_sH_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(conj(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tC_r1_c1_uu_sH_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(conj(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_complex_H__tC_r1_c1_uu_sH_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* INFTY_NORM KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += cabs(conj(a[(0*1)+0]));
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	



}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tN_r1_c1_uu_sU_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tN_r1_c1_uu_sU_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tN_r1_c1_uu_sU_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tN_r1_c1_uu_sU_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tT_r1_c1_uu_sU_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tT_r1_c1_uu_sU_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tT_r1_c1_uu_sU_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tT_r1_c1_uu_sU_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tC_r1_c1_uu_sU_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += conj(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tC_r1_c1_uu_sU_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += conj(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tC_r1_c1_uu_sU_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += conj(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tC_r1_c1_uu_sU_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += conj(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tN_r1_c1_uu_sS_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tN_r1_c1_uu_sS_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tN_r1_c1_uu_sS_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tN_r1_c1_uu_sS_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tT_r1_c1_uu_sS_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tT_r1_c1_uu_sS_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tT_r1_c1_uu_sS_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tT_r1_c1_uu_sS_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tC_r1_c1_uu_sS_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += conj(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tC_r1_c1_uu_sS_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += conj(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tC_r1_c1_uu_sS_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += conj(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tC_r1_c1_uu_sS_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += conj(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tN_r1_c1_uu_sH_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tN_r1_c1_uu_sH_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tN_r1_c1_uu_sH_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tN_r1_c1_uu_sH_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(i*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[roff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[coff+0+(j*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tT_r1_c1_uu_sH_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tT_r1_c1_uu_sH_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tT_r1_c1_uu_sH_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tT_r1_c1_uu_sH_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += a[(0*1)+0];
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tC_r1_c1_uu_sH_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += conj(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_C__tC_r1_c1_uu_sH_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += conj(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tC_r1_c1_uu_sH_dE_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += conj(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_rowssums_double_complex_H__tC_r1_c1_uu_sH_dI_uG(const double complex * VA, double complex * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	const double complex *a=VA;
	const rsb_coo_idx_t incx=1,incy=1;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* ROWSSUMS KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[k];*/
			double complex *local_row_sums = row_sums+(1*(j*1));
{/* FIXME : THE FOLLOWING CODE IS NOT CORRECT */

	/* NOTE : should better use some intrinsic here. */
/* generated by the RSB_M4_INFTY_NORM_FUNCTION_BODY_UNROLLED macro */
	register double complex sum_0=0;
	
	sum_0 += conj(a[(0*1)+0]);
	
	
	local_row_sums[coff+0]+=sum_0;
	
	if(roff!=coff || i!=j)
		row_sums[roff+0+(i*1)]+=sum_0;
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tN_r1_c1_uu_sU_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tN_r1_c1_uu_sU_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tN_r1_c1_uu_sU_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tN_r1_c1_uu_sU_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tT_r1_c1_uu_sU_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tT_r1_c1_uu_sU_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tT_r1_c1_uu_sU_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tT_r1_c1_uu_sU_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tC_r1_c1_uu_sU_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tC_r1_c1_uu_sU_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tC_r1_c1_uu_sU_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tC_r1_c1_uu_sU_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tN_r1_c1_uu_sS_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tN_r1_c1_uu_sS_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tN_r1_c1_uu_sS_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tN_r1_c1_uu_sS_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tT_r1_c1_uu_sS_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tT_r1_c1_uu_sS_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tT_r1_c1_uu_sS_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tT_r1_c1_uu_sS_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tC_r1_c1_uu_sS_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tC_r1_c1_uu_sS_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tC_r1_c1_uu_sS_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tC_r1_c1_uu_sS_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tN_r1_c1_uu_sH_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tN_r1_c1_uu_sH_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tN_r1_c1_uu_sH_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tN_r1_c1_uu_sH_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(i*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tT_r1_c1_uu_sH_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tT_r1_c1_uu_sH_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tT_r1_c1_uu_sH_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tT_r1_c1_uu_sH_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tC_r1_c1_uu_sH_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_C__tC_r1_c1_uu_sH_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tC_r1_c1_uu_sH_dE_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal explicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_scale_double_complex_H__tC_r1_c1_uu_sH_dI_uG(double complex * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double complex *scale_factors)
{

	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * Matrix A should be blocked 1 x 1, stored in BCSR format, diagonal implicit, of type double complex, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	register rsb_nnz_idx_t k=0;
	const register rsb_coo_idx_t columns=1,rows=1;
	double complex *a=VA;
	for(i=br;RSB_LIKELY(i<bc);++i)	/* experimental, for the bounded box patch */
	{
		const rsb_nnz_idx_t fk=bpntr[i],lk=bpntr[i+1];
 		/* SCALE KERNEL HERE */
		for(k=fk;k<lk;++k,a += rows*columns)
		{
		j=bindx[k];
		{
			/*a=VA+indptr[(k)];*/
			const double complex *d=scale_factors+(1*(j*1));
{/* generated by the RSB_M4_ROW_SCALE_FUNCTION_BODY_UNROLLED macro */

	a[(0*1)+0]*=d[0];
	
	


}
		}
		}
	}

	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCSR_infty_norm_double_C_u_tN_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_C_u_tN_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tN_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tN_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_C_u_tT_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_C_u_tT_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tT_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tT_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_C_u_tC_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_C_u_tC_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tC_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tC_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_C_u_tN_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_C_u_tN_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tN_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tN_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_C_u_tT_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_C_u_tT_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tT_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tT_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_C_u_tC_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_C_u_tC_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tC_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tC_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_C_u_tN_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_C_u_tN_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tN_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tN_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tN_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tN_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_C_u_tT_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_C_u_tT_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tT_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tT_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tT_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tT_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_C_u_tC_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_C_u_tC_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_C__tC_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tC_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_double_H_u_tC_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_double_H__tC_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tN_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tN_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tN_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tN_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tT_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tT_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tT_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tT_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tC_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tC_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tC_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tC_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tN_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tN_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tN_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tN_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tT_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tT_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tT_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tT_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tC_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tC_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tC_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tC_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tN_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tN_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tN_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tN_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tN_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tN_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tT_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tT_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tT_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tT_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tT_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tT_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tC_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_C_u_tC_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_C__tC_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tC_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_double_H_u_tC_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_double_H__tC_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tN_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_uu_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tN_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_uu_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_ul_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_ul_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tN_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_uu_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tN_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_uu_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_ul_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_ul_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tT_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_uu_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tT_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_uu_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_ul_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_ul_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tT_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_uu_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tT_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_uu_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_ul_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_ul_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tC_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_uu_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tC_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_uu_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_ul_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_ul_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tC_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_uu_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tC_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_uu_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_ul_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_ul_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tN_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_uu_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_ul_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_ul_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tN_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_uu_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_ul_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_ul_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tN_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_uu_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_ul_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_ul_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tN_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_uu_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_ul_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_ul_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tT_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_uu_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_ul_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_ul_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tT_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_uu_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_ul_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_ul_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tT_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_uu_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_ul_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_ul_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tT_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_uu_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_ul_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_ul_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tC_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_uu_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_ul_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_ul_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tC_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_uu_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_ul_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_ul_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tC_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_uu_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_ul_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_ul_sS_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tC_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_uu_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_ul_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_ul_sS_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tN_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_uu_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_ul_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_ul_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tN_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_uu_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_ul_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tN_r1_c1_ul_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tN_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_uu_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_ul_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_ul_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tN_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_uu_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_ul_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tN_r1_c1_ul_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tT_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_uu_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_ul_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_ul_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tT_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_uu_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_ul_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tT_r1_c1_ul_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tT_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_uu_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_ul_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_ul_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tT_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_uu_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_ul_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tT_r1_c1_ul_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tC_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_uu_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_ul_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_ul_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_C_u_tC_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_uu_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_ul_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_C__tC_r1_c1_ul_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tC_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_uu_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_ul_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_ul_sH_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_double_H_u_tC_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const double *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_uu_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_ul_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_double_H__tC_r1_c1_ul_sH_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tN_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tN_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tN_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tN_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tT_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tT_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tT_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tT_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tC_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tC_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tC_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tC_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tN_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tN_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tN_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tN_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tT_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tT_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tT_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tT_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tC_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tC_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tC_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tC_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tN_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tN_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tN_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tN_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tN_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tN_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tT_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tT_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tT_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tT_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tT_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tT_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tC_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_C_u_tC_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_C__tC_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tC_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_infty_norm_float_H_u_tC_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("infty_norm") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_infty_norm_float_H__tC_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tN_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tN_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tN_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tN_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tT_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tT_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tT_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tT_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tC_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tC_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tC_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_uu_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_ul_sU_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tC_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_uu_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_ul_sU_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tN_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tN_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tN_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tN_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tT_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tT_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tT_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tT_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tC_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tC_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tC_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_uu_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_ul_sS_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tC_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_uu_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_ul_sS_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tN_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tN_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tN_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tN_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tN_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tN_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tT_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tT_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tT_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tT_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tT_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tT_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tC_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_C_u_tC_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_C__tC_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tC_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_uu_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_ul_sH_dE_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_rowssums_float_H_u_tC_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("rowssums") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_uu_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_rowssums_float_H__tC_r1_c1_ul_sH_dI_uG( VA,row_sums,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_float_C_u_tN_sU_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_float_C__tN_r1_c1_uu_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_float_C__tN_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_float_C__tN_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_float_C_u_tN_sU_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_float_C__tN_r1_c1_uu_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_float_C__tN_r1_c1_ul_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_float_C__tN_r1_c1_ul_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_float_H_u_tN_sU_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_float_H__tN_r1_c1_uu_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_float_H__tN_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_float_H__tN_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_float_H_u_tN_sU_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_float_H__tN_r1_c1_uu_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_float_H__tN_r1_c1_ul_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_float_H__tN_r1_c1_ul_sU_dI_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
};
		return errval;
}





rsb_err_t rsb__BCSR_scale_float_C_u_tT_sU_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags, const float *scale_factors)
{
	/* generated by the RSB_M4_BCSS_KERNEL_SIZE_DISPATCH_FUNCTION macro */
	/*
	 * This function will dispatch the specialized looped kernel function for 
	 * performing the desired matrix operation ("scale") for the current fixed
	 * block size.
	 *
	 * \return \rsb_errval_inp_param_msg
	 *
	 * Since this is strictly blocked code, you should allow the rhs and the out
	 * vector to accept a small overflow not bigger, respectively, than
	 *       mod(blockrows-mod(matrixrows,blockrows),blockrows)
	 * and
	 *       mod(blockcols-mod(matrixcols,blockcols),blockcols)
	 */
	rsb_err_t errval = RSB_ERR_NO_ERROR;


	register rsb_coo_idx_t columns,rows;
	if(cpntr && rpntr)
	{
		columns=cpntr[1]-cpntr[0];
		rows   =rpntr[1]-rpntr[0];
	}
	else
		columns = rows=1;	/* experimental, for the bounded box patch */

switch(rows)
{
	case 1:
	{switch(columns)
	{
		case 1:/* 1 1 BCSR */
		errval = rsb__BCSR_scale_float_C__tT_r1_c1_uu_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
		break;
	default: 
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_float_C__tT_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR_UNSUPPORTED_OPERATION;
#endif /* RSB_WANT_LOOPING_KERNELS */
	}}
	break;
	default:
#ifdef RSB_WANT_LOOPING_KERNELS 
		errval = rsb__BCSR_scale_float_C__tT_r1_c1_ul_sU_dE_uG( VA,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,scale_factors );
#else /* RSB_WANT_LOOPING_KERNELS */
	errval = RSB_ERR