Source code for mriqc.reports.group

# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
#
# Copyright 2021 The NiPreps Developers <nipreps@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# We support and encourage derived works from this project, please read
# about our expectations at
#
#     https://www.nipreps.org/community/licensing/
#
"""Encapsulates report generation functions."""

from itertools import product
from sys import version_info

import pandas as pd

from .. import config
from ..utils.misc import BIDS_COMP


[docs]def gen_html(csv_file, mod, csv_failed=None, out_file=None): import os.path as op from datetime import datetime, timezone from niworkflows.data import Loader from mriqc.data.config import GroupTemplate from .. import __version__ as ver if version_info[0] > 2: from io import StringIO as TextIO else: from io import BytesIO as TextIO UTC = timezone.utc load_data = Loader('mriqc') if csv_file.suffix == '.csv': dataframe = pd.read_csv( csv_file, index_col=False, dtype={comp: object for comp in BIDS_COMP} ) id_labels = list(set(BIDS_COMP) & set(dataframe.columns)) dataframe['label'] = dataframe[id_labels].apply(_format_labels, args=(id_labels,), axis=1) else: dataframe = pd.read_csv(csv_file, index_col=False, sep='\t', dtype={'bids_name': object}) dataframe = dataframe.rename(index=str, columns={'bids_name': 'label'}) shells_id = list(range(1, 1 + dataframe.columns.str.startswith('efc_shell').sum())) QCGROUPS = { 'T1w': [ (['cjv'], None), (['cnr'], None), (['efc'], None), (['fber'], None), (['wm2max'], None), (['snr_csf', 'snr_gm', 'snr_wm'], None), (['snrd_csf', 'snrd_gm', 'snrd_wm'], None), (['fwhm_avg', 'fwhm_x', 'fwhm_y', 'fwhm_z'], 'vox'), (['qi_1', 'qi_2'], None), (['inu_range', 'inu_med'], None), (['icvs_csf', 'icvs_gm', 'icvs_wm'], None), (['rpve_csf', 'rpve_gm', 'rpve_wm'], None), (['tpm_overlap_csf', 'tpm_overlap_gm', 'tpm_overlap_wm'], None), ( [ 'summary_bg_mean', 'summary_bg_median', 'summary_bg_stdv', 'summary_bg_mad', 'summary_bg_k', 'summary_bg_p05', 'summary_bg_p95', ], None, ), ( [ 'summary_csf_mean', 'summary_csf_median', 'summary_csf_stdv', 'summary_csf_mad', 'summary_csf_k', 'summary_csf_p05', 'summary_csf_p95', ], None, ), ( [ 'summary_gm_mean', 'summary_gm_median', 'summary_gm_stdv', 'summary_gm_mad', 'summary_gm_k', 'summary_gm_p05', 'summary_gm_p95', ], None, ), ( [ 'summary_wm_mean', 'summary_wm_median', 'summary_wm_stdv', 'summary_wm_mad', 'summary_wm_k', 'summary_wm_p05', 'summary_wm_p95', ], None, ), ], 'T2w': [ (['cjv'], None), (['cnr'], None), (['efc'], None), (['fber'], None), (['wm2max'], None), (['snr_csf', 'snr_gm', 'snr_wm'], None), (['snrd_csf', 'snrd_gm', 'snrd_wm'], None), (['fwhm_avg', 'fwhm_x', 'fwhm_y', 'fwhm_z'], 'mm'), (['qi_1', 'qi_2'], None), (['inu_range', 'inu_med'], None), (['icvs_csf', 'icvs_gm', 'icvs_wm'], None), (['rpve_csf', 'rpve_gm', 'rpve_wm'], None), (['tpm_overlap_csf', 'tpm_overlap_gm', 'tpm_overlap_wm'], None), ( [ 'summary_bg_mean', 'summary_bg_stdv', 'summary_bg_k', 'summary_bg_p05', 'summary_bg_p95', ], None, ), ( [ 'summary_csf_mean', 'summary_csf_stdv', 'summary_csf_k', 'summary_csf_p05', 'summary_csf_p95', ], None, ), ( [ 'summary_gm_mean', 'summary_gm_stdv', 'summary_gm_k', 'summary_gm_p05', 'summary_gm_p95', ], None, ), ( [ 'summary_wm_mean', 'summary_wm_stdv', 'summary_wm_k', 'summary_wm_p05', 'summary_wm_p95', ], None, ), ], 'bold': [ (['efc'], None), (['fber'], None), (['fwhm', 'fwhm_x', 'fwhm_y', 'fwhm_z'], 'mm'), (['gsr_%s' % a for a in ('x', 'y')], None), (['snr'], None), (['dvars_std', 'dvars_vstd'], None), (['dvars_nstd'], None), (['fd_mean'], 'mm'), (['fd_num'], '# timepoints'), (['fd_perc'], '% timepoints'), (['spikes_num'], '# slices'), (['dummy_trs'], '# TRs'), (['gcor'], None), (['tsnr'], None), (['aor'], None), (['aqi'], None), ( [ 'summary_bg_mean', 'summary_bg_stdv', 'summary_bg_k', 'summary_bg_p05', 'summary_bg_p95', ], None, ), ( [ 'summary_fg_mean', 'summary_fg_stdv', 'summary_fg_k', 'summary_fg_p05', 'summary_fg_p95', ], None, ), ], 'dwi': [ ([f'bdiffs_{sub}' for sub in ('max', 'mean', 'median', 'min')], 'rad'), ([f'efc_shell{i:02d}' for i in shells_id], None), (['fa_degenerate', 'fa_nans'], 'ppm'), ([f'fber_shell{i:02d}' for i in shells_id], None), (['fd_mean'], 'mm'), (['fd_num'], '# timepoints'), (['fd_perc'], '% timepoints'), (['ndc'], None), (['sigma_cc'], None), (['sigma_pca', 'sigma_piesno'], None), ( ['snr_cc_shell0'] + [f'snr_cc_shell{s}_{t}' for s, t in product(shells_id, ('best', 'worst'))], None, ), (['spikes_global'] + [f'spikes_slice_{ax}' for ax in 'ijk'], 'ppm'), ( [ 'summary_bg_p05', 'summary_fg_p05', 'summary_wm_p05', 'summary_bg_mean', 'summary_fg_mean', 'summary_wm_mean', 'summary_bg_median', 'summary_fg_median', 'summary_wm_median', ], 'a.u.', ), ( [ 'summary_bg_p95', 'summary_fg_p95', 'summary_wm_p95', ], 'a.u.', ), ( [ 'summary_bg_stdv', 'summary_fg_stdv', 'summary_wm_stdv', 'summary_bg_mad', 'summary_fg_mad', 'summary_wm_mad', ], None, ), ( [ 'summary_bg_k', ], None, ), ( [ 'summary_fg_k', 'summary_wm_k', ], None, ), ], } nPart = len(dataframe) failed = None if csv_failed is not None and op.isfile(csv_failed): config.loggers.cli.warning(f'Found failed-workflows table "{csv_failed}"') failed_df = pd.read_csv(csv_failed, index_col=False) cols = list(set(id_labels) & set(failed_df.columns)) try: failed_df = failed_df.sort_values(by=cols) except AttributeError: failed_df = failed_df.sort(columns=cols) # myfmt not defined # failed = failed_df[cols].apply(myfmt, args=(cols,), axis=1).ravel().tolist() csv_groups = [] datacols = dataframe.columns.tolist() for group, units in QCGROUPS[mod]: dfdict = {'iqm': [], 'value': [], 'label': [], 'units': []} for iqm in group: if iqm in datacols: values = dataframe[[iqm]].values.ravel().tolist() if values: dfdict['iqm'] += [iqm] * nPart dfdict['units'] += [units] * nPart dfdict['value'] += values dfdict['label'] += dataframe[['label']].values.ravel().tolist() # Save only if there are values if dfdict['value']: csv_df = pd.DataFrame(dfdict) csv_str = TextIO() csv_df[['iqm', 'value', 'label', 'units']].to_csv(csv_str, index=False) csv_groups.append(csv_str.getvalue()) if out_file is None: out_file = op.abspath('group.html') tpl = GroupTemplate() tpl.generate_conf( { 'modality': mod, 'timestamp': datetime.now(tz=UTC).strftime('%Y-%m-%d, %H:%M'), 'version': ver, 'csv_groups': csv_groups, 'failed': failed, 'boxplots_js': load_data('data/reports/embed_resources/boxplots.js').read_text(), 'd3_js': load_data('data/reports/embed_resources/d3.min.js').read_text(), 'boxplots_css': load_data('data/reports/embed_resources/boxplots.css').read_text(), }, out_file, ) return out_file
def _format_labels(row, id_labels): """format participant labels""" crow = ( f'{prefix}-{row[[col_id]].values[0]}' for col_id, prefix in list(BIDS_COMP.items()) if col_id in id_labels ) return '_'.join(crow)