Source code for analyze

"""
.. module:: analyze
   :synopsis: Extract data from chains and produce plots

.. moduleauthor:: Karim Benabed <benabed@iap.fr>
.. moduleauthor:: Benjamin Audren <benjamin.audren@epfl.ch>

Collection of functions needed to analyze the Markov chains.

This module defines as well a class :class:`Information`, that stores useful
quantities, and shortens the argument passing between the functions.

.. note::
    Some of the methods used in this module are directly adapted from the
    `CosmoPmc <http://www.cosmopmc.info>`_ code from Kilbinger et. al.

"""
import os
import math
import numpy as np
from itertools import count
# The root plotting module, to change options like font sizes, etc...
import matplotlib
# The following line suppresses the need for an X server
matplotlib.use("Agg")
# Module for handling display
import matplotlib.pyplot as plt
# Module to handle warnings from matplotlib
import warnings
import importlib
import io_mp
from itertools import ifilterfalse
from itertools import ifilter

# Defined to remove the burnin for all the points that were produced before the
# first time where -log-likelihood <= min-minus-log-likelihood+LOG_LKL_CUTOFF
LOG_LKL_CUTOFF = 3

NUM_COLORS = 4


[docs]def analyze(command_line):
    """
    Main function, does the entire analysis.

    It calls in turn all the other routines from this module. To limit the
    arguments of each function to a reasonnable size, a :class:`Information`
    instance is used. This instance is initialized in this function, then
    appended by the other routines.

    """
    # Check if the scipy module has the interpolate method correctly
    # installed (should be the case on every linux distribution with
    # standard numpy)
    try:
        from scipy.interpolate import interp1d
        Information.has_interpolate_module = True
    except ImportError:
        Information.has_interpolate_module = False
        warnings.warn(
            'No cubic interpolation done (no interpolate method found ' +
            'in scipy), only linear')

    # Determine how many different folders are asked through the 'info'
    # command, and create as many Information instances
    files = separate_files(command_line.files)

    # Create an instance of the Information class for each subgroup found in
    # the previous function. They will each hold all relevant information, and
    # be used as a compact way of exchanging information between functions
    information_instances = []
    for item in files:
        info = Information(command_line)
        information_instances.append(info)

        # Prepare the files, according to the case, load the log.param, and
        # prepare the output (plots folder, .covmat, .info and .log files).
        # After this step, info.files will contain all chains.
        status = prepare(item, info)
        # If the preparation step generated new files (for instance,
        # translating from NS or CH to Markov Chains) this routine should stop
        # now.
        if not status:
            return

        # Compute the mean, maximum of likelihood, 1-sigma variance for this
        # main folder. This will create the info.chain object, which contains
        # all the points computed stacked in one big array.
        convergence(info)

        # check if analyze() is called directly by the user, or by the mcmc loop during an updating phase
        try:
            # command_line.update is defined when called by the mcmc loop
            command_line.update
        except:
            # in case it was not defined (i.e. when analyze() is called directly by user), set it to False
            command_line.update = 0

        # compute covariance matrix, excepted when we are in update mode and convergence is too bad or too good
        if command_line.update and (np.amax(info.R) > 3. or np.amax(info.R) < 0.4):
            print '--> Not computing covariance matrix'
        else:
            try:
                if command_line.want_covmat:
                    print '--> Computing covariance matrix'
                    info.covar = compute_covariance_matrix(info)
                    # Writing it out in name_of_folder.covmat
                    io_mp.write_covariance_matrix(
                        info.covar, info.backup_names, info.cov_path)
            except:
                print '--> Computing covariance matrix failed'
                pass

        # Store an array, sorted_indices, containing the list of indices
        # corresponding to the line with the highest likelihood as the first
        # element, and then as decreasing likelihood
        info.sorted_indices = info.chain[:, 1].argsort(0)

        # Writing the best-fit model in name_of_folder.bestfit
        bestfit_line = [elem*info.scales[i, i] for i, elem in
                        enumerate(info.chain[info.sorted_indices[0], 2:])]
        io_mp.write_bestfit_file(bestfit_line, info.backup_names,
                                 info.best_fit_path)

    if not command_line.minimal:
        # Computing 1,2 and 3-sigma errors, and plot. This will create the
        # triangle and 1d plot by default.
        compute_posterior(information_instances)

        print '--> Writing .info and .tex files'
        for info in information_instances:
            info.write_information_files()

    # when called by MCMC in update mode, return R values so that they can be written for information in the chains
    if command_line.update:
        return info.R

[docs]def prepare(files, info):
    """
    Scan the whole input folder, and include all chains in it.

    Since you can decide to analyze some file(s), or a complete folder, this
    function first needs to separate between the two cases.

    .. warning::
        If someday you change the way the chains are named, remember to change
        here too, because this routine assumes the chains have a double
        underscore in their names.

    .. note::
        Only files ending with .txt will be selected, to keep compatibility
        with CosmoMC format

    .. note::
        New in version 2.0.0: if you ask to analyze a Nested Sampling
        sub-folder (i.e. something that ends in `NS` with capital letters), the
        analyze module will translate the output from Nested Sampling to
        standard chains for Monte Python, and stops. You can then run the
        `-- info` flag on the whole folder. **This procedure is not necessary
        if the run was complete, but only if the Nested Sampling run was killed
        before completion**.

    Parameters
    ----------
    files : list
        list of potentially only one element, containing the files to analyze.
        This can be only one file, or the encompassing folder, files
    info : Information instance
        Used to store the result

    """
    # First test if the folder is a Nested Sampling or CosmoHammer folder. If
    # so, call the module's own routine through the clean conversion function,
    # which will translate the output of this other sampling into MCMC chains
    # that can then be analyzed.
    modules = ['nested_sampling', 'cosmo_hammer']
    tags = ['NS', 'CH']
    for module_name, tag in zip(modules, tags):
        action_done = clean_conversion(module_name, tag, files[0])
        if action_done:
            return False

    # If the input command was an entire folder, then grab everything in it.
    # Too small files (below 600 octets) and subfolders are automatically
    # removed.
    folder, files, basename = recover_folder_and_files(files)

    info.files = files
    info.folder = folder
    info.basename = basename

    # Check if the log.param file exists
    parameter_file_path = os.path.join(folder, 'log.param')
    if os.path.isfile(parameter_file_path):
        if os.path.getsize(parameter_file_path) == 0:
            raise io_mp.AnalyzeError(
                "The log param file %s " % os.path.join(folder, 'log.param') +
                "seems empty")
    else:
        raise io_mp.AnalyzeError(
            "The log param file %s " % os.path.join(folder, 'log.param') +
            "is missing in the analyzed folder?")

    # If the folder has no subdirectory, then go for a simple infoname,
    # otherwise, call it with the last name
    basename = (os.path.basename(folder) if os.path.basename(folder) != '.'
                else os.path.basename(os.path.abspath(
                    os.path.join(folder, '..'))))

    info.v_info_path = os.path.join(folder, basename+'.v_info')
    info.h_info_path = os.path.join(folder, basename+'.h_info')
    info.tex_path = os.path.join(folder, basename+'.tex')
    info.cov_path = os.path.join(folder, basename+'.covmat')
    info.log_path = os.path.join(folder, basename+'.log')
    info.best_fit_path = os.path.join(folder, basename+'.bestfit')
    info.param_path = parameter_file_path

    return True


[docs]def convergence(info):
    """
    Compute convergence for the desired chains, using Gelman-Rubin diagnostic

    Chains have been stored in the info instance of :class:`Information`. Note
    that the G-R diagnostic can be computed for a single chain, albeit it will
    most probably give absurd results. To do so, it separates the chain into
    three subchains.

    """
    # Recovering parameter names and scales, creating tex names,
    extract_parameter_names(info)
    # Now that the number of parameters is known, the array containing bounds
    # can be initialised
    info.bounds = np.zeros((len(info.ref_names), len(info.levels), 2))

    # Circle through all files to find the global maximum of likelihood
    print '--> Finding global maximum of likelihood'
    find_maximum_of_likelihood(info)

    # Restarting the circling through files, this time removing the burnin,
    # given the maximum of likelihood previously found and the global variable
    # LOG_LKL_CUTOFF. spam now contains all the accepted points that were
    # explored once the chain moved within min_minus_lkl - LOG_LKL_CUTOFF.
    # If the user asks for a keep_fraction <1, this is also the place where
    # a fraction (1-keep_fraction) is removed at the beginning of each chain.
    print '--> Removing burn-in'
    spam = remove_bad_points(info)

    info.remap_parameters(spam)
    # Now that the list spam contains all the different chains removed of
    # their respective burn-in, proceed to the convergence computation

    # 2D arrays for mean and var, one column will contain the total (over
    # all chains) mean (resp. variance), and each other column the
    # respective chain mean (resp. chain variance). R only contains the
    # values for each parameter. Therefore, mean and var will have len(spam)+1
    # as a first dimension
    mean = np.zeros((len(spam)+1, info.number_parameters))
    var = np.zeros((len(spam)+1, info.number_parameters))
    R = np.zeros(info.number_parameters)

    # Store the total number of points, and the total in each chain
    total = np.zeros(len(spam)+1)
    for j in xrange(len(spam)):
        total[j+1] = spam[j][:, 0].sum()
    total[0] = total[1:].sum()

    # Compute mean and variance for each chain
    print '--> Computing mean values'
    compute_mean(mean, spam, total)

    print '--> Computing variance'
    compute_variance(var, mean, spam, total)

    print '--> Computing convergence criterium (Gelman-Rubin)'
    # Gelman Rubin Diagnostic:
    # Computes a quantity linked to the ratio of the mean of the variances of
    # the different chains (within), and the variance of the means (between)
    # Note: This is not strictly speaking the Gelman Rubin test, defined for
    # same-length MC chains. Our quantity is defined without the square root,
    # which should not change much the result: a small sqrt(R) will still be a
    # small R. The same convention is used in CosmoMC, except for the weighted
    # average: we decided to do the average taking into account that longer
    # chains should count more
    within = 0
    between = 0
    for i in xrange(np.shape(mean)[1]):
        for j in xrange(len(spam)):
            within += total[j+1]*var[j+1, i]
            between += total[j+1]*(mean[j+1, i]-mean[0, i])**2
        within /= total[0]
        between /= (total[0]-1)

        R[i] = between/within
        if i == 0:
            print ' -> R-1 is %.6f' % R[i], '\tfor ', info.ref_names[i]
        else:
            print '           %.6f' % R[i], '\tfor ', info.ref_names[i]

    # Log finally the total number of steps, and absolute loglikelihood
    with open(info.log_path, 'a') as log:
        log.write("--> Total    number    of    steps: %d\n" % (
            info.steps))
        log.write("--> Total number of accepted steps: %d\n" % (
            info.accepted_steps))
        log.write("--> Minimum of -logLike           : %.2f" % (
            info.min_minus_lkl))

    # Store the remaining members in the info instance, for further writing to
    # files, storing only the mean and total of all the chains taken together
    info.mean = mean[0]
    info.R = R
    info.total = total[0]

    # Create the main chain, which consists in all elements of spam
    # put together. This will serve for the plotting.
    info.chain = np.vstack(spam)


[docs]def compute_posterior(information_instances):
    """
    computes the marginalized posterior distributions, and optionnally plots
    them

    Parameters
    ----------
    information_instances : list
        list of information objects, initialised on the given folders, or list
        of file, in input. For each of these instance, plot the 1d and 2d
        posterior distribution, depending on the flags stored in the instances,
        comming from command line arguments or read from a file.
    """
    # For convenience, store as `conf` the first element of the list
    # information_instances, since it will be called often to check for
    # configuration parameters
    conf = information_instances[0]

    # Pre configuration of the output, note that changes to the font size
    # will occur later on as well, to obtain a nice scaling.
    matplotlib.rc('text', usetex=True)
    matplotlib.rc('font', size=11)
    matplotlib.rc('xtick', labelsize='8')
    matplotlib.rc('ytick', labelsize='8')

    # Recover max and min values for each instance, defining the a priori place
    # of ticks (in case of a comparison, this should change)
    for info in information_instances:
        info.define_ticks()
        # If plots/ folder in output folder does not exist, create it
        if os.path.isdir(os.path.join(info.folder, 'plots')) is False:
            os.mkdir(os.path.join(info.folder, 'plots'))

    # Determine the total number of parameters to plot, based on the list
    # without duplicates of the plotted parameters of all information instances
    plotted_parameters = []
    # For printing not in latex
    ref_names = []
    for info in information_instances:
        for index, name in enumerate(info.plotted_parameters):
            if name not in plotted_parameters:
                plotted_parameters.append(name)
                ref_names.append(info.ref_names[index])

    if len(plotted_parameters) == 0:
        raise io_mp.AnalyzeError(
            "You provided no parameters to analyze, probably by selecting"
            " wrong parameters names in the '--extra' file.")
    # Find the appropriate number of columns and lines for the 1d posterior
    # plot
    num_columns = int(round(math.sqrt(len(plotted_parameters))))
    num_lines = int(math.ceil(len(plotted_parameters)*1.0/num_columns))

    # For special needs, you can impose here a different number of columns and lines in the 1d plot
    # Here is a commented example:
    # if (len(plotted_parameters) == 10):
    #     num_columns = 5
    #     num_lines = 2

    # Create the figures
    # which will be 3*3 inches per subplot, quickly growing!
    if conf.plot:
        fig1d = plt.figure(num=1, figsize=(
            3*num_columns,
            3*num_lines), dpi=80)
    if conf.plot_2d:
        fig2d = plt.figure(num=2, figsize=(
            3*len(plotted_parameters),
            3*len(plotted_parameters)), dpi=80)

    # Create the name of the files, concatenating the basenames with
    # underscores.
    file_name = "_".join(
        [info.basename for info in information_instances])

    # Loop over all the plotted parameters
    # There will be two indices at all time, the one running over the plotted
    # parameters, `index`, and the one corresponding to the actual column in
    # the actual file, `native_index`. For instance, if you try to plot only
    # two columns of a several columns file, index will vary from 0 to 1, but
    # the corresponding native indices might be anything.
    # Obviously, since plotted parameters contain potentially names not
    # contained in some files (in case of a comparison), native index might be
    # undefined.
    # Defined the legends object, which will store the plot style, to display
    # at the level of the figure
    legends = [None for _ in range(len(information_instances))]
    if not conf.legendnames:
        legend_names = [info.basename.replace('_', ' ')
                        for info in information_instances]
    else:
        legend_names = conf.legendnames
    print '-----------------------------------------------'
    for index, name in enumerate(plotted_parameters):

        # Adding the subplots to the respective figures, this will correspond
        # to the diagonal on the triangle plot.
        if conf.plot_2d:
            ax2d = fig2d.add_subplot(
                len(plotted_parameters),
                len(plotted_parameters),
                index*(len(plotted_parameters)+1)+1,
                yticks=[])
            ax2d.set_color_cycle(conf.cm)
        if conf.plot:
            ax1d = fig1d.add_subplot(
                num_lines, num_columns, index+1, yticks=[])
            ax1d.set_color_cycle(conf.cm)

        # check for each instance if the name is part of the list of plotted
        # parameters, and if yes, store the native_index. If not, store a flag
        # to ignore any further plotting or computing issues concerning this
        # particular instance.
        for info in information_instances:
            try:
                info.native_index = info.ref_names.index(name)
                info.ignore_param = False
                standard_name = info.backup_names[info.native_index]
            except ValueError:
                info.ignore_param = True

        adjust_ticks(name, information_instances)

        # normalized histogram
        print ' -> Computing histograms for ', name
        for info in information_instances:
            if not info.ignore_param:
                info.hist, info.bin_edges = np.histogram(
                    info.chain[:, info.native_index+2], bins=info.bins,
                    weights=info.chain[:, 0], normed=False)
                info.bincenters = 0.5*(info.bin_edges[1:]+info.bin_edges[:-1])

                # interpolated histogram (if available)
                info.interp_hist, info.interp_grid = cubic_interpolation(
                    info, info.hist, info.bincenters)
                info.interp_hist /= np.max(info.interp_hist)

                # minimum credible interval (method by Jan Haman). Fails for
                # multimodal histograms #FIXME
                bounds = minimum_credible_intervals(info)
                info.bounds[info.native_index] = bounds

        # plotting
        for info in information_instances:
            if not info.ignore_param:
                if conf.plot_2d:
                    plot = ax2d.plot(
                        info.interp_grid, info.interp_hist,
                        linewidth=info.line_width, ls='-')
                    legends[info.id] = plot[0]
                    ax2d.set_xticks(info.ticks[info.native_index])
                    if conf.legend_style == 'top':
                        ax2d.set_title(
                            '%s=$%.{0}g^{{+%.{0}g}}_{{%.{0}g}}$'.format(
                                info.decimal) % (
                                info.tex_names[info.native_index],
                                info.mean[info.native_index],
                                info.bounds[info.native_index, 0, -1],
                                info.bounds[info.native_index, 0, 0]),
                            fontsize=info.fontsize)
                        ax2d.set_xticklabels(
                            ['%.{0}g'.format(info.decimal) % s
                             for s in info.ticks[info.native_index]],
                            fontsize=info.ticksize)
                    elif conf.legend_style == 'sides':
                        # Except for the last 1d plot (bottom line), don't
                        # print ticks
                        if index == len(plotted_parameters)-1:
                            ax2d.set_xticklabels(
                                ['%.{0}g'.format(info.decimal) % s
                                 for s in info.ticks[info.native_index]],
                                fontsize=info.ticksize)
                            ax2d.set_xlabel(
                                info.tex_names[info.native_index],
                                fontsize=info.fontsize)
                        else:
                            ax2d.set_xticklabels([])
                    ax2d.axis([info.x_range[info.native_index][0],
                               info.x_range[info.native_index][1],
                               0, 1.05])

                if conf.plot:
                    # Note the use of double curly brackets {{ }} to produce
                    # the desired LaTeX output. This is necessary because the
                    # format function would otherwise understand single
                    # brackets as fields.
                    ax1d.set_title(
                        '%s=$%.{0}g^{{+%.{0}g}}_{{%.{0}g}}$'.format(
                            info.decimal) % (
                            info.tex_names[info.native_index],
                            info.mean[info.native_index],
                            info.bounds[info.native_index, 0, -1],
                            info.bounds[info.native_index, 0, 0]),
                        fontsize=info.fontsize)
                    ax1d.set_xticks(info.ticks[info.native_index])
                    ax1d.set_xticklabels(
                        ['%.{0}g'.format(info.decimal) % s
                         for s in info.ticks[info.native_index]],
                        fontsize=info.ticksize)
                    ax1d.axis([info.x_range[info.native_index][0],
                               info.x_range[info.native_index][1],
                               0, 1.05])

                    ax1d.plot(
                        info.interp_grid, info.interp_hist,
                        lw=info.line_width, ls='-')

        # mean likelihood (optional, if comparison, it will not be printed)
        # The color cycle has to be reset, before
        if conf.plot_2d:
            ax2d.set_color_cycle(conf.cm)
        if conf.plot:
            ax1d.set_color_cycle(conf.cm)
        if conf.mean_likelihood:
            for info in information_instances:
                if not info.ignore_param:
                    try:
                        lkl_mean, _ = np.histogram(
                            info.chain[:, info.native_index+2],
                            bins=info.bin_edges,
                            normed=False,
                            weights=np.exp(
                                conf.min_minus_lkl-info.chain[:, 1])*info.chain[:, 0])
                        lkl_mean /= lkl_mean.max()
                        interp_lkl_mean, interp_grid = cubic_interpolation(
                            info, lkl_mean, info.bincenters)
                        if conf.plot_2d:
                            ax2d.plot(interp_grid, interp_lkl_mean,
                                      ls='--', lw=conf.line_width)
                        if conf.plot:
                            ax1d.plot(interp_grid, interp_lkl_mean,
                                      ls='--', lw=conf.line_width)
                    except:
                        print 'could not find likelihood contour for ',
                        print info.ref_parameters[info.native_index]

        if conf.subplot is True:
            if conf.plot_2d:
                extent2d = ax2d.get_window_extent().transformed(
                    fig2d.dpi_scale_trans.inverted())
                fig2d.savefig(os.path.join(
                    conf.folder, 'plots', file_name+'.'+conf.extension),
                    bbox_inches=extent2d.expanded(1.1, 1.4))
            if conf.plot:
                extent1d = ax1d.get_window_extent().transformed(
                    fig1d.dpi_scale_trans.inverted())
                fig1d.savefig(os.path.join(
                    conf.folder, 'plots', file_name+'.'+conf.extension),
                    bbox_inches=extent1d.expanded(1.1, 1.4))
            # Store the function in a file
            for info in information_instances:
                if not info.ignore_param:
                    hist_file_name = os.path.join(
                        info.folder, 'plots',
                        info.basename+'_%s.hist' % (
                            standard_name))
                    write_histogram(hist_file_name,
                                    info.interp_grid, info.interp_hist)

        # Now do the rest of the triangle plot
        if conf.plot_2d:
            for second_index in xrange(index):
                second_name = plotted_parameters[second_index]
                for info in information_instances:
                    if not info.ignore_param:
                        try:
                            info.native_second_index = info.ref_names.index(
                                plotted_parameters[second_index])
                            info.has_second_param = True
                            second_standard_name = info.backup_names[
                                info.native_second_index]
                        except ValueError:
                            info.has_second_param = False
                    else:
                        info.has_second_param = False
                ax2dsub = fig2d.add_subplot(
                    len(plotted_parameters),
                    len(plotted_parameters),
                    (index)*len(plotted_parameters)+second_index+1)
                for info in information_instances:
                    if info.has_second_param:
                        info.n, info.xedges, info.yedges = np.histogram2d(
                            info.chain[:, info.native_index+2],
                            info.chain[:, info.native_second_index+2],
                            weights=info.chain[:, 0],
                            bins=(info.bins, info.bins),
                            normed=False)
                        info.extent = [
                            info.x_range[info.native_second_index][0],
                            info.x_range[info.native_second_index][1],
                            info.x_range[info.native_index][0],
                            info.x_range[info.native_index][1]]
                        info.x_centers = 0.5*(info.xedges[1:]+info.xedges[:-1])
                        info.y_centers = 0.5*(info.yedges[1:]+info.yedges[:-1])

                        # plotting contours, using the ctr_level method (from Karim
                        # Benabed). Note that only the 1 and 2 sigma contours are
                        # displayed (due to the line with info.levels[:2])
                        try:
                            contours = ax2dsub.contourf(
                                info.y_centers, info.x_centers, info.n,
                                extent=info.extent, levels=ctr_level(
                                    info.n, info.levels[:2]),
                                zorder=4, cmap=info.cmaps[info.id],
                                alpha=info.alphas[info.id])
                        except Warning:
                            warnings.warn(
                                "The routine could not find the contour of the " +
                                "'%s-%s' 2d-plot" % (
                                    info.plotted_parameters[info.native_index],
                                    info.plotted_parameters[info.native_second_index]))

                        ax2dsub.set_xticks(info.ticks[info.native_second_index])
                        if index == len(plotted_parameters)-1:
                            ax2dsub.set_xticklabels(
                                ['%.{0}g'.format(info.decimal) % s for s in
                                 info.ticks[info.native_second_index]],
                                fontsize=info.ticksize)
                            if conf.legend_style == 'sides':
                                ax2dsub.set_xlabel(
                                    info.tex_names[info.native_second_index],
                                    fontsize=info.fontsize)
                        else:
                            ax2dsub.set_xticklabels([''])

                        ax2dsub.set_yticks(info.ticks[info.native_index])
                        if second_index == 0:
                            ax2dsub.set_yticklabels(
                                ['%.{0}g'.format(info.decimal) % s for s in
                                 info.ticks[info.native_index]],
                                fontsize=info.ticksize)
                        else:
                            ax2dsub.set_yticklabels([''])

                        if conf.legend_style == 'sides':
                            if second_index == 0:
                                ax2dsub.set_ylabel(
                                    info.tex_names[info.native_index],
                                    fontsize=info.fontsize)

                if conf.subplot is True:
                    # Store the individual 2d plots.
                    if conf.plot_2d:
                        area = ax2dsub.get_window_extent().transformed(
                            fig2d.dpi_scale_trans.inverted())
                        # Pad the saved area by 10% in the x-direction and 20% in
                        # the y-direction
                        fig2d.savefig(os.path.join(
                            conf.folder, 'plots',
                            file_name+'_2d_%s-%s.%s' % (
                                standard_name, second_standard_name,
                                conf.extension)),
                            bbox_inches=area.expanded(1.4, 1.4))

                    # store the coordinates of the points for further
                    # plotting.
                    store_contour_coordinates(
                        conf, standard_name, second_standard_name, contours)

                    for info in information_instances:
                        if not info.ignore_param and info.has_second_param:
                            info.hist_file_name = os.path.join(
                                info.folder, 'plots',
                                '{0}_2d_{1}-{2}.hist'.format(
                                    info.basename,
                                    standard_name,
                                    second_standard_name))
                    write_histogram_2d(
                        info.hist_file_name, info.x_centers, info.y_centers,
                        info.extent, info.n)

    print '-----------------------------------------------'
    if conf.plot:
        print '--> Saving figures to .{0} files'.format(info.extension)
        plot_name = '-vs-'.join([os.path.split(elem.folder)[-1]
                                for elem in information_instances])
        if conf.plot_2d:
            if len(legends) > 1:
                try:
                    fig2d.legend(legends, legend_names, 'upper right',
                                 fontsize=info.legendsize)
                except TypeError:
                    fig2d.legend(legends, legend_names, 'upper right',
                                 prop={'fontsize': info.legendsize})
            fig2d.tight_layout()
            fig2d.savefig(
                os.path.join(
                    conf.folder, 'plots', '{0}_triangle.{1}'.format(
                        plot_name, info.extension)),
                bbox_inches=0, )
        if conf.plot:
            fig1d.tight_layout()
            fig1d.savefig(
                os.path.join(
                    conf.folder, 'plots', '{0}_1d.{1}'.format(
                        plot_name, info.extension)),
                bbox_inches=0)


[docs]def ctr_level(histogram2d, lvl, infinite=False):
    """
    Extract the contours for the 2d plots (Karim Benabed)

    """

    hist = histogram2d.flatten()*1.
    hist.sort()
    cum_hist = np.cumsum(hist[::-1])
    cum_hist /= cum_hist[-1]

    alvl = np.searchsorted(cum_hist, lvl)[::-1]
    clist = [0]+[hist[-i] for i in alvl]+[hist.max()]
    if not infinite:
        return clist[1:]
    return clist


[docs]def minimum_credible_intervals(info):
    """
    Extract minimum credible intervals (method from Jan Haman) FIXME
    """
    histogram = info.hist
    bincenters = info.bincenters
    levels = info.levels

    bounds = np.zeros((len(levels), 2))
    j = 0
    delta = bincenters[1]-bincenters[0]
    left_edge = np.max(histogram[0] - 0.5*(histogram[1]-histogram[0]), 0.)
    right_edge = np.max(histogram[-1] + 0.5*(histogram[-1]-histogram[-2]), 0.)
    failed = False
    for level in levels:
        norm = float(
            (np.sum(histogram)-0.5*(histogram[0]+histogram[-1]))*delta)
        norm += 0.25*(left_edge+histogram[0])*delta
        norm += 0.25*(right_edge+histogram[-1])*delta
        water_level_up = np.max(histogram)*1.0
        water_level_down = np.min(histogram)*1.0
        top = 0.

        iterations = 0
        while (abs((top/norm)-level) > 0.0001) and not failed:
            top = 0.
            water_level = (water_level_up + water_level_down)/2.
            #ontop = [elem for elem in histogram if elem > water_level]
            indices = [i for i in range(len(histogram))
                       if histogram[i] > water_level]
            # check for multimodal posteriors
            if ((indices[-1]-indices[0]+1) != len(indices)):
                warnings.warn(
                    "could not derive minimum credible intervals " +
                    "for this multimodal posterior")
                failed = True
                break
            top = (np.sum(histogram[indices]) -
                   0.5*(histogram[indices[0]]+histogram[indices[-1]]))*(delta)

            # left
            if indices[0] > 0:
                top += (0.5*(water_level+histogram[indices[0]]) *
                        delta*(histogram[indices[0]]-water_level) /
                        (histogram[indices[0]]-histogram[indices[0]-1]))
            else:
                if (left_edge > water_level):
                    top += 0.25*(left_edge+histogram[indices[0]])*delta
                else:
                    top += (0.25*(water_level + histogram[indices[0]]) *
                            delta*(histogram[indices[0]]-water_level) /
                            (histogram[indices[0]]-left_edge))

            # right
            if indices[-1] < (len(histogram)-1):
                top += (0.5*(water_level + histogram[indices[-1]]) *
                        delta*(histogram[indices[-1]]-water_level) /
                        (histogram[indices[-1]]-histogram[indices[-1]+1]))
            else:
                if (right_edge > water_level):
                    top += 0.25*(right_edge+histogram[indices[-1]])*delta
                else:
                    top += (0.25*(water_level + histogram[indices[-1]]) *
                            delta * (histogram[indices[-1]]-water_level) /
                            (histogram[indices[-1]]-right_edge))

            if top/norm >= level:
                water_level_down = water_level
            else:
                water_level_up = water_level
            # safeguard, just in case
            iterations += 1
            if (iterations > 1000):
                warnings.warn(
                    "the loop to check for sigma deviations was " +
                    "taking too long to converge")
                break

        # min
        if indices[0] > 0:
            bounds[j][0] = bincenters[indices[0]] - delta*(histogram[indices[0]]-water_level)/(histogram[indices[0]]-histogram[indices[0]-1])
        else:
            if (left_edge > water_level):
                bounds[j][0] = bincenters[0]-0.5*delta
            else:
                bounds[j][0] = bincenters[indices[0]] - 0.5*delta*(histogram[indices[0]]-water_level)/(histogram[indices[0]]-left_edge)

        # max
        if indices[-1] < (len(histogram)-1):
            bounds[j][1] = bincenters[indices[-1]] + delta*(histogram[indices[-1]]-water_level)/(histogram[indices[-1]]-histogram[indices[-1]+1])
        else:
            if (right_edge > water_level):
                bounds[j][1] = bincenters[-1]+0.5*delta
            else:
                bounds[j][1] = bincenters[indices[-1]] + \
                    0.5*delta*(histogram[indices[-1]]-water_level) / \
                    (histogram[indices[-1]]-right_edge)

        j += 1

    for elem in bounds:
        for j in (0, 1):
            elem[j] -= info.mean[info.native_index]
    return bounds


[docs]def write_h(info_file, indices, name, string, quantity, modifiers=None):
    """
    Write one horizontal line of output

    """
    info_file.write('\n '+name+'\t: ')
    for i in indices:
        info_file.write(string % quantity[i]+'\t')


[docs]def cubic_interpolation(info, hist, bincenters):
    """
    Small routine to accomodate the absence of the interpolate module

    """
    if info.has_interpolate_module:
        interp_grid = np.linspace(
            bincenters[0], bincenters[-1], len(bincenters)*10)
        from scipy.interpolate import interp1d
        f = interp1d(bincenters, hist, kind='cubic')
        interp_hist = f(interp_grid)
        return interp_hist, interp_grid
    else:
        return hist, bincenters


[docs]def write_histogram(hist_file_name, x_centers, hist):
    """
    Store the posterior distribution to a file

    """
    with open(hist_file_name, 'w') as hist_file:
        hist_file.write("# 1d posterior distribution\n")
        hist_file.write("\n# x_centers\n")
        hist_file.write(", ".join(
            [str(elem) for elem in x_centers])+"\n")
        hist_file.write("\n# Histogram\n")
        hist_file.write(", ".join(
            [str(elem) for elem in hist])+"\n")
    print 'wrote ', hist_file_name


[docs]def read_histogram(histogram_path):
    """
    Recover a stored 1d posterior

    """
    with open(histogram_path, 'r') as hist_file:
        for line in hist_file:
            if line:
                if line.find("# x_centers") != -1:
                    x_centers = [float(elem) for elem in
                                 hist_file.next().split(",")]
                elif line.find("# Histogram") != -1:
                    hist = [float(elem) for elem in
                            hist_file.next().split(",")]
    x_centers = np.array(x_centers)
    hist = np.array(hist)

    return x_centers, hist


[docs]def write_histogram_2d(hist_file_name, x_centers, y_centers, extent, hist):
    """
    Store the histogram information to a file, to plot it later

    """
    with open(hist_file_name, 'w') as hist_file:
        hist_file.write("# Interpolated histogram\n")
        hist_file.write("\n# x_centers\n")
        hist_file.write(", ".join(
            [str(elem) for elem in x_centers])+"\n")

        hist_file.write("\n# y_centers\n")
        hist_file.write(", ".join(
            [str(elem) for elem in y_centers])+"\n")

        hist_file.write("\n# Extent\n")
        hist_file.write(", ".join(
            [str(elem) for elem in extent])+"\n")

        hist_file.write("\n# Histogram\n")
        for line in hist:
            hist_file.write(", ".join(
                [str(elem) for elem in line])+"\n")


[docs]def read_histogram_2d(histogram_path):
    """
    Read the histogram information that was stored in a file.

    To use it, call something like this:

    .. code::

        x_centers, y_centers, extent, hist = read_histogram_2d_from_file(path)
        fig, ax = plt.subplots()
        ax.contourf(
            y_centers, x_centers, hist, extent=extent,
            levels=ctr_level(hist, [0.68, 0.95]),
            zorder=5, cma=plt.cm.autumn_r)
        plt.show()

    """
    with open(histogram_path, 'r') as hist_file:
        length = 0
        for line in hist_file:
            if line:
                if line.find("# x_centers") != -1:
                    x_centers = [float(elem) for elem in
                                 hist_file.next().split(",")]
                    length = len(x_centers)
                elif line.find("# y_centers") != -1:
                    y_centers = [float(elem) for elem in
                                 hist_file.next().split(",")]
                elif line.find("# Extent") != -1:
                    extent = [float(elem) for elem in
                              hist_file.next().split(",")]
                elif line.find("# Histogram") != -1:
                    hist = []
                    for index in range(length):
                        hist.append([float(elem) for elem in
                                     hist_file.next().split(",")])
    x_centers = np.array(x_centers)
    y_centers = np.array(y_centers)
    extent = np.array(extent)
    hist = np.array(hist)

    return x_centers, y_centers, extent, hist


[docs]def clean_conversion(module_name, tag, folder):
    """
    Execute the methods "convert" from the different sampling algorithms

    Returns True if something was made, False otherwise
    """
    has_module = False
    subfolder_name = tag+"_subfolder"
    try:
        module = importlib.import_module(module_name)
        subfolder = getattr(module, subfolder_name)
        has_module = True
    except ImportError:
        # The module is not installed, the conversion can not take place
        pass

    if has_module and os.path.isdir(folder):
        # Remove any potential trailing slash
        folder = os.path.join(
            *[elem for elem in folder.split(os.path.sep) if elem])
        if folder.split(os.path.sep)[-1] == subfolder:
            try:
                getattr(module, 'from_%s_output_to_chains' % tag)(folder)
            except IOError:
                raise io_mp.AnalyzeError(
                    "You asked to analyze a %s folder which " % tag +
                    "seems to come from an unfinished run, or to be empty " +
                    "or corrupt. Please make sure the run went smoothly " +
                    "enough.")
            warnings.warn(
                "The content of the %s subfolder has been " % tag +
                "translated for Monte Python. Please run an "
                "analysis of the entire folder now.")
            return True
    else:
        return False


[docs]def separate_files(files):
    """
    Separate the input files in folder

    Given all input arguments to the command line files entry, separate them in
    a list of lists, grouping them by folders. The number of identified folders
    will determine the number of information instances to create
    """
    final_list = []
    temp = [files[0]]
    folder = (os.path.dirname(files[0]) if os.path.isfile(files[0])
              else files[0])
    if len(files) > 1:
        for elem in files[1:]:
            new_folder = (os.path.dirname(elem) if os.path.isfile(elem)
                          else elem)
            if new_folder == folder:
                temp.append(elem)
            else:
                folder = new_folder
                final_list.append(temp)
                temp = [elem]
    final_list.append(temp)

    return final_list


[docs]def recover_folder_and_files(files):
    """
    Distinguish the cases when analyze is called with files or folder

    Note that this takes place chronologically after the function
    `separate_files`"""
    # The following list defines the substring that a chain should contain for
    # the code to recognise it as a proper chain.
    substrings = ['.txt', '__']
    limit = 10
    # If the first element is a folder, grab all chain files inside
    if os.path.isdir(files[0]):
        folder = os.path.normpath(files[0])
        files = [os.path.join(folder, elem) for elem in os.listdir(folder)
                 if not os.path.isdir(os.path.join(folder, elem))
                 and not os.path.getsize(os.path.join(folder, elem)) < limit
                 and all([x in elem for x in substrings])]
    # Otherwise, extract the folder from the chain file-name.
    else:
        # If the name is completely wrong, say it
        if not os.path.exists(files[0]):
            raise io_mp.AnalyzeError(
                "You provided a non-existant folder/file to analyze")
        folder = os.path.relpath(
            os.path.dirname(os.path.realpath(files[0])), os.path.curdir)
        files = [os.path.join(folder, elem) for elem in os.listdir(folder)
                 if os.path.join(folder, elem) in np.copy(files)
                 and not os.path.isdir(os.path.join(folder, elem))
                 and not os.path.getsize(os.path.join(folder, elem)) < limit
                 and all([x in elem for x in substrings])]
    basename = os.path.basename(folder)
    return folder, files, basename


[docs]def extract_array(line):
    """
    Return the array on the RHS of the line

    >>> extract_array("toto = ['one', 'two']\n")
    ['one', 'two']
    >>> extract_array('toto = ["one", 0.2]\n')
    ['one', 0.2]

    """
    # Recover RHS of the equal sign, and remove surrounding spaces
    rhs = line.split('=')[-1].strip()
    # Remove array signs
    rhs = rhs.strip(']').lstrip('[')
    # Recover each element of the list
    sequence = [e.strip().strip('"').strip("'") for e in rhs.split(',')]
    for index, elem in enumerate(sequence):
        try:
            sequence[index] = int(elem)
        except ValueError:
            try:
                sequence[index] = float(elem)
            except ValueError:
                pass
    return sequence


[docs]def extract_dict(line):
    """
    Return the key and value of the dictionary element contained in line

    >>> extract_dict("something['toto'] = [0, 1, 2, -2, 'cosmo']")
    'toto', [0, 1, 2, -2, 'cosmo']

    """
    # recovering the array
    sequence = extract_array(line)
    # Recovering only the LHS
    lhs = line.split('=')[0].strip()
    # Recovering the name from the LHS
    name = lhs.split('[')[-1].strip(']')
    name = name.strip('"').strip("'")

    return name, sequence


[docs]def extract_parameter_names(info):
    """
    Reading the log.param, store in the Information instance the names
    """
    backup_names = []
    plotted_parameters = []
    boundaries = []
    ref_names = []
    tex_names = []
    scales = []
    with open(info.param_path, 'r') as param:
        for line in param:
            if line.find('#') == -1:
                if line.find('data.experiments') != -1:
                    info.experiments = extract_array(line)
                if line.find('data.parameters') != -1:
                    name, array = extract_dict(line)
                    original = name
                    # Rename the names according the .extra file (opt)
                    if name in info.to_change.iterkeys():
                        name = info.to_change[name]
                    # If the name corresponds to a varying parameter (fourth
                    # entry in the initial array being non-zero, or a derived
                    # parameter (could be designed as fixed, it does not make
                    # any difference)), then continue the process of analyzing.
                    if array[3] != 0 or array[5] == 'derived':
                        # The real name is always kept, to have still the class
                        # names in the covmat
                        backup_names.append(original)
                        # With the list "to_plot", we can potentially restrict
                        # the variables plotted. If it is empty, though, simply
                        # all parameters will be plotted.
                        if info.to_plot == []:
                            plotted_parameters.append(name)
                        else:
                            if name in info.to_plot:
                                plotted_parameters.append(name)

                        # Append to the boundaries array
                        boundaries.append([
                            None if elem == 'None' or (isinstance(elem, int)
                                                       and elem == -1)
                            else elem for elem in array[1:3]])
                        ref_names.append(name)
                        # Take care of the scales
                        scale = array[4]
                        if name in info.new_scales.iterkeys():
                            scale = info.new_scales[name]
                        scales.append(scale)

                        # Given the scale, decide for the pretty tex name
                        number = 1./scale
                        tex_names.append(
                            io_mp.get_tex_name(name, number=number))
    scales = np.diag(scales)

    info.ref_names = ref_names
    info.tex_names = tex_names
    info.boundaries = boundaries
    info.backup_names = backup_names
    info.scales = scales
    # Beware, the following two numbers are different. The first is the total
    # number of parameters stored in the chain, whereas the second is for
    # plotting purpose only.
    info.number_parameters = len(ref_names)
    info.plotted_parameters = plotted_parameters


[docs]def find_maximum_of_likelihood(info):
    """
    Finding the global maximum of likelihood

    min_minus_lkl will be appended with all the maximum likelihoods of files,
    then will be replaced by its own maximum. This way, the global
    maximum likelihood will be used as a reference, and not each chain's
    maximum.
    """
    min_minus_lkl = []
    for chain_file in info.files:
        # cheese will brutally contain everything (- log likelihood) in the
        # file chain_file being scanned.
        # This could potentially be faster with pandas, but is already quite
        # fast
        #
        # This would read the chains including comment lines:
        #cheese = (np.array([float(line.split()[1].strip())
        #                    for line in open(chain_file, 'r')]))
        #
        # This reads the chains excluding comment lines:
        with open(chain_file, 'r') as f:
            cheese = (np.array([float(line.split()[1].strip())
                                for line in ifilterfalse(iscomment,f)]))

        try:
            min_minus_lkl.append(cheese[:].min())
        except ValueError:
            pass
    # beware, it is the min because we are talking about
    # '- log likelihood'
    # Selecting only the true maximum.
    try:
        min_minus_lkl = min(min_minus_lkl)
    except ValueError:
        raise io_mp.AnalyzeError(
            "No decently sized chain was found in the desired folder. " +
            "Please wait to have more accepted point before trying " +
            "to analyze it.")

    info.min_minus_lkl = min_minus_lkl


[docs]def remove_bad_points(info):
    """
    Create an array with all the points from the chains, after removing non-markovian, burn-in and fixed fraction

    """
    # spam will brutally contain all the chains with sufficient number of
    # points, after the burn-in was removed.
    spam = list()

    # Recover the longest file name, for pleasing display
    max_name_length = max([len(e) for e in info.files])

    # Total number of steps done:
    steps = 0
    accepted_steps = 0

    # Open the log file
    log = open(info.log_path, 'w')

    for index, chain_file in enumerate(info.files):
        # To improve presentation, and print only once the full path of the
        # analyzed folder, we recover the length of the path name, and
        # create an empty complementary string of this length
        total_length = 18+max_name_length
        empty_length = 18+len(os.path.dirname(chain_file))+1

        basename = os.path.basename(chain_file)
        if index == 0:
            exec "print '--> Scanning file %-{0}s' % chain_file,".format(
                max_name_length)
        else:
            exec "print '%{0}s%-{1}s' % ('', basename),".format(
                empty_length, total_length-empty_length)
        # cheese will brutally contain everything in the chain chain_file being
        # scanned
        #
        # This would read the chains including comment lines:
        #cheese = (np.array([[float(elem) for elem in line.split()]
        #                    for line in open(chain_file, 'r')]))
        #
        # This read the chains excluding comment lines:
        with open(chain_file, 'r') as f:
            cheese = (np.array([[float(elem) for elem in line.split()]
                                for line in ifilterfalse(iscomment,f)]))
        # If the file contains a broken line with a different number of
        # elements, the previous array generation might fail, and will not have
        # the correct shape. Hence the following command will fail. To avoid
        # that, the error is caught.
        try:
            local_min_minus_lkl = cheese[:, 1].min()
        except IndexError:
            raise io_mp.AnalyzeError(
                "Error while scanning %s." % chain_file +
                " This file most probably contains "
                "an incomplete line, rendering the analysis impossible. "
                "I think that the following line(s) is(are) wrong:\n %s" % (
                    '\n '.join(
                        ['-> %s' % line for line in
                         open(chain_file, 'r') if
                         len(line.split()) != len(info.backup_names)+2])))
        line_count = float(sum(1 for line in open(chain_file, 'r')))

        # Logging the information obtained until now.
        number_of_steps = cheese[:, 0].sum()
        log.write("%s\t " % os.path.basename(chain_file))
        log.write(" Number of steps:%d\t" % number_of_steps)
        log.write(" Steps accepted:%d\t" % line_count)
        log.write(" acc = %.2g\t" % (float(line_count)/number_of_steps))
        log.write("min(-loglike) = %.2f\n" % local_min_minus_lkl)
        steps += number_of_steps
        accepted_steps += line_count

        # check if analyze() is called directly by the user, or by the mcmc loop during an updating phase
        try:
            # command_line.update is defined when called by the mcmc loop
            info.update
        except:
            # in case it was not defined (i.e. when analyze() is called directly by user), set it to False
            info.update = 0

        # Removing non-markovian part, burn-in, and fraction= (1 - keep-fraction)
        start = 0
        markovian=0
        try:
            # Read all comments in chains about times when proposal was updated
            # The last of these comments gives the number of lines to be skipped in the files
            if info.markovian and not info.update:
                with open(chain_file, 'r') as f:
                    for line in ifilter(iscomment,f):
                        start = int(line.split()[2])
                markovian = start

            # Remove burn-in, defined as all points until the likelhood reaches min_minus_lkl+LOG_LKL_CUTOFF
            while cheese[start, 1] > info.min_minus_lkl+LOG_LKL_CUTOFF:
                start += 1
            burnin = start-markovian

            # Remove fixed fraction as requested by user (usually not useful if non-markovian is also removed)
            if info.keep_fraction < 1:
                start = start + (1-info.keep_fraction)*(line_count - start)

            print ": Removed",
            if info.markovian:
                print "%d non-markovian points," % markovian,
            print "%d points of burn-in," % burnin,
            if info.keep_fraction < 1:
                print "and first %.0f percent," % (100.*(1-info.keep_fraction)),
            print "keep %d steps" % (line_count-start)

            # JL debug
            if markovian > 0:
                print "first chain line taken in consideration:"
                print cheese[markovian]

        except IndexError:
            print ': Removed everything: chain not converged'


        # ham contains cheese without the burn-in, if there are any points
        # left (more than 5)
        if np.shape(cheese)[0] > start+5:
            ham = np.copy(cheese[start::])

            # Deal with single file case
            if len(info.files) == 1:
                warnings.warn("Convergence computed for a single file")
                bacon = np.copy(cheese[::3, :])
                egg = np.copy(cheese[1::3, :])
                sausage = np.copy(cheese[2::3, :])

                spam.append(bacon)
                spam.append(egg)
                spam.append(sausage)
                continue

            # Adding resulting table to spam
            spam.append(ham)

    # Test the length of the list
    if len(spam) == 0:
        raise io_mp.AnalyzeError(
            "No decently sized chain was found. " +
            "Please wait a bit to analyze this folder")

    # Applying now new rules for scales, if the name is contained in the
    # referenced names
    for name in info.new_scales.iterkeys():
        try:
            index = info.ref_names.index(name)
            for i in xrange(len(spam)):
                spam[i][:, index+2] *= 1./info.scales[index, index]
        except ValueError:
            # there is nothing to do if the name is not contained in ref_names
            pass

    info.steps = steps
    info.accepted_steps = accepted_steps

    return spam


[docs]def compute_mean(mean, spam, total):
    """
    """
    for i in xrange(np.shape(mean)[1]):
        for j in xrange(len(spam)):
            submean = np.sum(spam[j][:, 0]*spam[j][:, i+2])
            mean[j+1, i] = submean / total[j+1]
            mean[0, i] += submean
        mean[0, i] /= total[0]


[docs]def compute_variance(var, mean, spam, total):
    """
    """
    for i in xrange(np.shape(var)[1]):
        for j in xrange(len(spam)):
            var[0, i] += np.sum(
                spam[j][:, 0]*(spam[j][:, i+2]-mean[0, i])**2)
            var[j+1, i] = np.sum(
                spam[j][:, 0]*(spam[j][:, i+2]-mean[j+1, i])**2) / \
                (total[j+1]-1)
        var[0, i] /= (total[0]-1)


[docs]def compute_covariance_matrix(info):
    """
    """
    covar = np.zeros((len(info.ref_names), len(info.ref_names)))
    for i in xrange(len(info.ref_names)):
        for j in xrange(i, len(info.ref_names)):
            covar[i, j] = (
                info.chain[:, 0]*(
                    (info.chain[:, i+2]-info.mean[i]) *
                    (info.chain[:, j+2]-info.mean[j]))).sum()
            if i != j:
                covar[j, i] = covar[i, j]
    covar /= info.total

    # Removing scale factors in order to store true parameter covariance
    covar = np.dot(info.scales.T, np.dot(covar, info.scales))

    return covar


[docs]def adjust_ticks(param, information_instances):
    """
    """
    if len(information_instances) == 1:
        return
    # Recovering all x_range and ticks entries from the concerned information
    # instances
    x_ranges = []
    ticks = []
    for info in information_instances:
        if not info.ignore_param:
            x_ranges.append(info.x_range[info.native_index])
            ticks.append(info.ticks[info.native_index])

    # The new x_range and tick should min/max all the existing ones
    new_x_range = np.array(
        [min([e[0] for e in x_ranges]), max([e[1] for e in x_ranges])])
    temp_ticks = np.array(
        [min([e[0] for e in ticks]), max([e[-1] for e in ticks])])

    new_ticks = np.linspace(temp_ticks[0],
                            temp_ticks[1],
                            info.ticknumber)

    for info in information_instances:
        if not info.ignore_param:
            info.x_range[info.native_index] = new_x_range
            info.ticks[info.native_index] = new_ticks


[docs]def store_contour_coordinates(info, name1, name2, contours):
    """docstring"""
    file_name = os.path.join(
        info.folder, 'plots', '{0}_2d_{1}-{2}.dat'.format(
            info.basename, name1, name2))

    with open(file_name, 'w') as plot_file:
        plot_file.write(
            '# contour for confidence level {0}\n'.format(
                info.levels[1]))
        for elem in contours.collections[0].get_paths():
            points = elem.vertices
            for k in range(np.shape(points)[0]):
                plot_file.write("%.8g\t %.8g\n" % (
                    points[k, 0], points[k, 1]))
                # stop to not include the inner contours
                if k != 0:
                    if all(points[k] == points[0]):
                        plot_file.write("\n")
                        break
        plot_file.write("\n\n")
        plot_file.write(
            '# contour for confidence level {0}\n'.format(
                info.levels[0]))
        for elem in contours.collections[1].get_paths():
            points = elem.vertices
            for k in range(np.shape(points)[0]):
                plot_file.write("%.8g\t %.8g\n" % (
                    points[k, 0], points[k, 1]))
                if k != 0:
                    if all(points[k] == points[0]):
                        plot_file.write("\n")
                        break
        plot_file.write("\n\n")

[docs]def iscomment(s):
    """
    Define what we call a comment in MontePython chain files
    """
    return s.startswith('#')

[docs]class Information(object):
    """
    Hold all information for analyzing runs

    """
    # Counting the number of instances, to choose the color map
    _ids = count(0)
    # Flag checking the absence or presence of the interp1d function
    has_interpolate_module = False

    # Global colormap for the 1d plots. Colours will get chosen from this.
    # Some old versions of matplotlib do not have CMRmap, so the colours will
    # be harcoded
    # Note that, as with the other customisation options, you can specify new
    # values for this in the extra plot_file.
    cm = [
        (0.,      0.,      0.,      1.),
        (0.30235, 0.15039, 0.74804, 1.),
        (0.99843, 0.25392, 0.14765, 1.),
        (0.90000, 0.75353, 0.10941, 1.)]

    # Define colormaps for the contour plots
    cmaps = [plt.cm.gray_r, plt.cm.Purples, plt.cm.Reds_r, plt.cm.Greens]
    alphas = [1.0, 0.8, 0.6, 0.4]

    def __init__(self, command_line, other=None):
        """
        The following initialization creates the three tables that can be
        customized in an extra plot_file (see :mod:`parser_mp`).

        Parameters
        ----------
        command_line : Namespace
            it contains the initialised command line arguments
        """
        self.to_change = {}
        """
        Dictionary whose keys are the old parameter names, and values are the
        new ones. For instance :code:`{'beta_plus_lambda':'beta+lambda'}`

        """
        self.to_plot = []
        """
        Array of names of parameters to plot. If left empty, all will be
        plotted.

        .. warning::
            If you changed a parameter name with :attr:`to_change`, you need to
            give the new name to this array

        """
        self.new_scales = {}
        """
        Dictionary that redefines some scales. The keys will be the parameter
        name, and the value its scale.

        """
        # Assign a unique id to this instance
        self.id = self._ids.next()

        # Defining the sigma contours (1, 2 and 3-sigma)
        self.levels = np.array([68.26, 95.4, 99.7])/100.

        # Follows a bunch of initialisation to provide default members
        self.ref_names, self.backup_names = [], []
        self.scales, self.plotted_parameters = [], []
        self.spam = []

        # Store directly all information from the command_line object into this
        # instance, except the protected members (begin and end with __)
        for elem in dir(command_line):
            if elem.find('__') == -1:
                setattr(self, elem, getattr(command_line, elem))

        # initialize the legend size to be the same as fontsize, but can be
        # altered in the extra file
        self.legendsize = self.fontsize
        self.legendnames = []

        # Read a potential file describing changes to be done for the parameter
        # names, and number of paramaters plotted (can be let empty, all will
        # then be plotted), but also the style of the plot. Note that this
        # overrides the command line options
        if command_line.optional_plot_file:
            plot_file_vars = {'info': self}
            execfile(command_line.optional_plot_file, plot_file_vars)

        # check and store keep_fraction
        if command_line.keep_fraction<=0 or command_line.keep_fraction>1:
            raise io_mp.AnalyzeError("after --keep-fraction you should pass a float >0 and <=1")
        self.keep_fraction = command_line.keep_fraction

[docs]    def remap_parameters(self, spam):
        """
        Perform substitutions of parameters for analyzing

        .. note::

            for arbitrary combinations of parameters, the prior will not
            necessarily be flat.

        """
        if hasattr(self, 'redefine'):
            for key, value in self.redefine.iteritems():
                # Check that the key was an original name
                if key in self.backup_names:
                    print ' /|\  Transforming', key, 'into', value
                    # We recover the indices of the key
                    index_to_change = self.backup_names.index(key)+2
                    print('/_o_\ The new variable will be called ' +
                          self.ref_names[self.backup_names.index(key)])
                    # Recover all indices of all variables present in the
                    # remapping
                    variable_names = [elem for elem in self.backup_names if
                                      value.find(elem) != -1]
                    indices = [self.backup_names.index(name)+2 for name in
                               variable_names]
                    # Now loop over all files in spam
                    for i in xrange(len(spam)):
                        # Assign variables to their values
                        for index, name in zip(indices, variable_names):
                            exec("%s = spam[i][:, %i]" % (name, index))
                        # Assign to the desired index the combination
                        exec("spam[i][:, %i] = %s" % (index_to_change, value))

[docs]    def define_ticks(self):
        """
        """
        self.max_values = self.chain[:, 2:].max(axis=0)
        self.min_values = self.chain[:, 2:].min(axis=0)
        self.span = (self.max_values-self.min_values)
        # Define the place of ticks, given the number of ticks desired, stored
        # in conf.ticknumber
        self.ticks = np.array(
            [np.linspace(self.min_values[i]+self.span[i]*0.1,
                         self.max_values[i]-self.span[i]*0.1,
                         self.ticknumber) for i in range(len(self.span))])
        # Define the x range (ticks start not exactly at the range boundary to
        # avoid display issues)
        self.x_range = np.array((self.min_values, self.max_values)).T

        # In case the exploration hit a boundary (as defined in the parameter
        # file), at the level of precision defined by the number of bins, the
        # ticks and x_range should be altered in order to display this
        # meaningful number instead.
        for i in range(np.shape(self.ticks)[0]):
            x_range = self.x_range[i]
            bounds = self.boundaries[i]
            # Left boundary
            if bounds[0] is not None:
                if abs(x_range[0]-bounds[0]) < self.span[i]/self.bins:
                    self.ticks[i][0] = bounds[0]
                    self.x_range[i][0] = bounds[0]
            # Right boundary
            if bounds[-1] is not None:
                if abs(x_range[-1]-bounds[-1]) < self.span[i]/self.bins:
                    self.ticks[i][-1] = bounds[-1]
                    self.x_range[i][-1] = bounds[-1]

[docs]    def write_information_files(self):

        # Store in info_names only the tex_names that were plotted, for this
        # instance, and in indices the corresponding list of indices. It also
        # removes the $ signs, for clarity
        self.info_names = [
            name for index, name in enumerate(self.tex_names) if
            self.ref_names[index] in self.plotted_parameters]
        self.indices = [self.tex_names.index(name) for name in self.info_names]
        self.tex_names = [name for index, name in enumerate(self.tex_names) if
            self.ref_names[index] in self.plotted_parameters]
        self.info_names = [name.replace('$', '') for name in self.info_names]

        # Define the bestfit array
        self.bestfit = np.zeros(len(self.ref_names))
        for i in xrange(len(self.ref_names)):
            self.bestfit[i] = self.chain[self.sorted_indices[0], :][2+i]

        # Write down to the .h_info file all necessary information
        self.write_h_info()
        self.write_v_info()
        self.write_tex()

[docs]    def write_h_info(self):

        with open(self.h_info_path, 'w') as h_info:
            h_info.write(' param names\t:  ')
            for name in self.info_names:
                h_info.write("%-14s" % name)

            write_h(h_info, self.indices, 'R-1 values', '% .6f', self.R)
            write_h(h_info, self.indices, 'Best Fit  ', '% .6e', self.bestfit)
            write_h(h_info, self.indices, 'mean      ', '% .6e', self.mean)
            write_h(h_info, self.indices, 'sigma     ', '% .6e',
                    (self.bounds[:, 0, 1]-self.bounds[:, 0, 0])/2.)
            h_info.write('\n')
            write_h(h_info, self.indices, '1-sigma - ', '% .6e',
                    self.bounds[:, 0, 0])
            write_h(h_info, self.indices, '1-sigma + ', '% .6e',
                    self.bounds[:, 0, 1])
            write_h(h_info, self.indices, '2-sigma - ', '% .6e',
                    self.bounds[:, 1, 0])
            write_h(h_info, self.indices, '2-sigma + ', '% .6e',
                    self.bounds[:, 1, 1])
            write_h(h_info, self.indices, '3-sigma - ', '% .6e',
                    self.bounds[:, 2, 0])
            write_h(h_info, self.indices, '3-sigma + ', '% .6e',
                    self.bounds[:, 2, 1])

            # bounds
            h_info.write('\n')
            write_h(h_info, self.indices, '1-sigma > ', '% .6e',
                    self.mean+self.bounds[:, 0, 0])
            write_h(h_info, self.indices, '1-sigma < ', '% .6e',
                    self.mean+self.bounds[:, 0, 1])
            write_h(h_info, self.indices, '2-sigma > ', '% .6e',
                    self.mean+self.bounds[:, 1, 0])
            write_h(h_info, self.indices, '2-sigma < ', '% .6e',
                    self.mean+self.bounds[:, 1, 1])
            write_h(h_info, self.indices, '3-sigma > ', '% .6e',
                    self.mean+self.bounds[:, 2, 0])
            write_h(h_info, self.indices, '3-sigma < ', '% .6e',
                    self.mean+self.bounds[:, 2, 1])

[docs]    def write_v_info(self):
        """Write vertical info file"""
        with open(self.v_info_path, 'w') as v_info:
            v_info.write('%-15s\t:  %-11s' % ('param names', 'R-1'))
            v_info.write(' '.join(['%-11s' % elem for elem in [
                'Best fit', 'mean', 'sigma', '1-sigma -', '1-sigma +',
                '2-sigma -', '2-sigma +', '1-sigma >', '1-sigma <',
                '2-sigma >', '2-sigma <']]))
            for index, name in zip(self.indices, self.info_names):
                v_info.write('\n%-15s\t: % .4e' % (name, self.R[index]))
                v_info.write(' '.join(['% .4e' % elem for elem in [
                    self.bestfit[index], self.mean[index],
                    (self.bounds[index, 0, 1]-self.bounds[index, 0, 0])/2.,
                    self.bounds[index, 0, 0], self.bounds[index, 0, 1],
                    self.bounds[index, 1, 0], self.bounds[index, 1, 1],
                    self.mean[index]+self.bounds[index, 0, 0],
                    self.mean[index]+self.bounds[index, 0, 1],
                    self.mean[index]+self.bounds[index, 1, 0],
                    self.mean[index]+self.bounds[index, 1, 1]]]))

[docs]    def write_tex(self):
        """Write a tex table containing the main results """
        with open(self.tex_path, 'w') as tex:
            tex.write("\\begin{tabular}{|l|c|c|c|c|} \n \\hline \n")
            tex.write("Param & best-fit & mean$\pm\sigma$ ")
            tex.write("& 95\% lower & 95\% upper \\\\ \\hline \n")
            for index, name in zip(self.indices, self.tex_names):
                tex.write("%s &" % name)
                tex.write("$%.4g$ & $%.4g_{%.2g}^{+%.2g}$ " % (
                    self.bestfit[index], self.mean[index],
                    self.bounds[index, 0, 0], self.bounds[index, 0, 1]))
                tex.write("& $%.4g$ & $%.4g$ \\\\ \n" % (
                    self.mean[index]+self.bounds[index, 1, 0],
                    self.mean[index]+self.bounds[index, 1, 1]))

            tex.write("\\hline \n \\end{tabular} \\\\ \n")
            tex.write("$-\ln{\cal L}_\mathrm{min} =%.6g$, " % (
                self.min_minus_lkl))
            tex.write("minimum $\chi^2=%.4g$ \\\\ \n" % (
                self.min_minus_lkl*2.))