Source code for hyswap.plots

"""Functions for plotting."""
import calendar
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from hyswap.percentiles import calculate_variable_percentile_thresholds_by_day
from hyswap.cumulative import calculate_daily_cumulative_values



[docs]
def plot_flow_duration_curve(
        values, exceedance_probabilities,
        observations=None, observation_probabilities=None,
        ax=None, title='Flow Duration Curve',
        xlab='Exceedance Probability\n' +
        '(Percentage of time indicated value was equaled or exceeded)',
        ylab='Discharge, ft3/s', grid=True,
        scatter_kwargs={}, **kwargs):
    """ Plot a flow duration curve.

    Flow duration curves are cumulative frequency curves that show the
    percentage of time measured discharge values are equaled or exceeded
    by all other discharge values in the dataset.

    Parameters
    ----------
    values : array-like
        Values to plot along y-axis.
    exceedance_probabilities : array-like
        Exceedance probabilities for each value, likely calculated from
        a function like :obj:`hyswap.exceedance.calculate_exceedance_probability_from_values_multiple`.
    observations : list, numpy.ndarray, optional
        List, numpy array or list-able set of flow observations. Optional, if
        not provided the observations are not plotted.
    observation_probabilities : list, numpy.ndarray, optional
        Exceedance probabilities corresponding to each observation, likely
        calculated from a function like
        :obj:`hyswap.exceedance.calculate_exceedance_probability_from_values_multiple`.
        Optional, if not provided observations are not plotted.
    ax : matplotlib.axes.Axes, optional
        Axes to plot on. If not provided, a new figure and axes will be
        created.
    title : str, optional
        Title for the plot. If not provided, the default title will be
        'Flow Duration Curve'.
    xlab : str, optional
        Label for the x-axis. If not provided, a default label will be used.
    ylab : str, optional
        Label for the y-axis. If not provided, a default label will be used.
    grid : bool, optional
        Whether to show grid lines on the plot. Default is True.
    scatter_kwargs : dict
        Dictionary containing keyword arguments to pass to the observations
        plotting method, :meth:`matplotlib.axes.Axes.scatter`.
    **kwargs
        Keyword arguments passed to :meth:`matplotlib.axes.Axes.plot`.

    Returns
    -------
    matplotlib.axes.Axes
        Axes object containing the plot.

    Examples
    --------
    Fetch some data from NWIS, calculate the exceedance probabilities and then
    make the flow duration curve.

    .. plot::
        :include-source:

        >>> df, _ = dataretrieval.nwis.get_dv(site='06892350',
        ...                                   parameterCd='00060',
        ...                                   start='1776-07-04',
        ...                                   end='2020-01-01')
        >>> values = np.linspace(df['00060_Mean'].min(),
        ...                      df['00060_Mean'].max(), 10000)
        >>> exceedance_probabilities = hyswap.exceedance.calculate_exceedance_probability_from_values_multiple(  # noqa
        ...     values, df['00060_Mean'])
        >>> ax = hyswap.plots.plot_flow_duration_curve(
        ...     values, exceedance_probabilities,
        ...     title='Flow Duration Curve for USGS Site 06892350')
        >>> plt.tight_layout()
        >>> plt.show()
    """
    # Create axes if not provided
    if ax is None:
        _, ax = plt.subplots()
    # do plotting
    ax.plot(exceedance_probabilities*100, values, **kwargs)
    if (observations is not None) and (observation_probabilities is not None):
        ax.scatter(np.array(observation_probabilities)*100, observations,
                   **scatter_kwargs)
    ax.set_xlabel(xlab)
    ax.set_ylabel(ylab)
    ax.set_title(title)
    # set log scales for x axis
    ax.set_yscale('log')
    # set limits for axes
    ax.set_xlim(0.1, 99.9)
    # set ticks for axes
    # always use same ticks for x-axis
    ax.set_xticks([0.1, 5, 10, 25, 50, 75, 90, 95, 99.9])
    ax.set_xticklabels([
        '0.1', '5', '10', '25', '50', '75', '90', '95', '99.9'])
    # get y-axis ticks and convert to comma-separated strings
    yticks = ax.get_yticks()
    # min value is 0.1
    # yticks = np.array([i for i in yticks if i >= 0.1])
    # get logs for min/max values rounded to next lowest/highest
    min_vals = np.log10(yticks[yticks <= np.min(values)])
    if len(min_vals) > 0:
        min_tick = min_vals[-1]
    else:
        min_tick = -1.0
    max_tick = np.log10(yticks[yticks >= np.max(values)][0])
    # set list of values using logs
    yticks = list(10**np.arange(min_tick, max_tick+1))
    yticklabels = [f'{int(y):,}' for y in yticks]
    ax.set_yticks(yticks, labels=yticklabels)
    ax.set_ylim(np.min(yticks), np.max(yticks))
    # add grid lines
    if grid:
        ax.grid(which='both', axis='both', alpha=0.5)
    # return the axes
    return ax




[docs]
def plot_raster_hydrograph(df_formatted, ax=None,
                           title='Raster Hydrograph',
                           xlab='Month', ylab='Year',
                           cbarlab='Discharge, ft3/s',
                           **kwargs):
    """Plot a raster hydrograph.

    Raster hydrographs are pixel-based plots for visualizing and identifying
    variations and changes in large multidimensional data sets. Originally
    developed by Keim (2000), they were first applied in hydrology by
    Koehler (2004) as a means of highlighting inter-annual and intra-annual
    changes in streamflow. The raster hydrographs in hyswap, like those
    developed by Koehler, depict years on the y-axis and days along the
    x-axis. Users can choose to plot streamflow (actual values or log values),
    streamflow percentile, or streamflow class (from 1, for low flow, to 7
    for high flow), for Daily, 7-Day, 14-Day, and 28-Day streamflow. For a
    more comprehensive description of raster hydrographs, see Strandhagen
    et al. (2006).

    References:
    Keim, D.A. 2000. Designing pixel-oriented visualization techniques:
    theory and applications. IEEE Transactions on Visualization and
    Computer Graphics, 6(1), 59-78.

    Koehler, R. 2004. Raster Based Analysis and Visualization of Hydrologic
    Time Series. Ph.D  dissertation, University of Arizona. Tucson, AZ, 189 p.

    `Strandhagen, E., Marcus, W.A., and Meacham, J.E. 2006. Views of the
    rivers: representing streamflow of the greater Yellowstone ecosystem.
    Cartographic Perspectives, no. 55, 54-29.`__

    Parameters
    ----------
    df_formatted : pandas.DataFrame
        Formatted dataframe containing the raster hydrograph data.
    ax : matplotlib.axes.Axes, optional
        Axes to plot on. If not provided, a new figure and axes will be
        created.
    title : str, optional
        Title for the plot. If not provided, the default title will be
        'Streamflow Raster Hydrograph'.
    xlab : str, optional
        Label for the x-axis. If not provided, the default label will be
        'Month'.
    ylab : str, optional
        Label for the y-axis. If not provided, the default label will be
        'Year'.
    cbarlab : str, optional
        Label for the colorbar. If not provided, the default label will be
        'Discharge, ft3/s'.
    **kwargs
        Keyword arguments passed to :meth:`matplotlib.axes.Axes.imshow`.

    Returns
    -------
    matplotlib.axes.Axes
        Axes object containing the plot.

    Examples
    --------
    Fetch some data from NWIS, format it for a raster hydrograph plot and then
    make the raster hydrograph plot.

    .. plot::
        :include-source:

        >>> df, _ = dataretrieval.nwis.get_dv(site='09380000',
        ...                                   parameterCd='00060',
        ...                                   start='1960-01-01',
        ...                                   end='1970-12-31')
        >>> df_rh = hyswap.rasterhydrograph.format_data(df, '00060_Mean')
        >>> fig, ax = plt.subplots(figsize=(6, 6))
        >>> ax = hyswap.plots.plot_raster_hydrograph(
        ...     df_rh, ax=ax, title='Raster Hydrograph for USGS Site 09380000')
        >>> plt.tight_layout()
        >>> plt.show()
    """
    # Create axes if not provided
    if ax is None:
        _, ax = plt.subplots()
    # define min/max values
    min_10 = np.nanmax(
        [np.floor(np.log10(np.nanmin(df_formatted.to_numpy()))), 0]
    )
    max_10 = np.ceil(np.log10(np.nanmax(df_formatted.to_numpy())))
    # pop some kwargs
    cmap = kwargs.pop('cmap', 'YlGnBu')
    aspect = kwargs.pop('aspect', 'auto')
    interpolation = kwargs.pop('interpolation', 'none')
    vmin = kwargs.pop('vmin', int(10**min_10))
    vmax = kwargs.pop('vmax', int(10**max_10))
    norm = kwargs.pop('norm', matplotlib.colors.LogNorm(vmin=vmin, vmax=vmax))
    # do plotting
    img = ax.imshow(df_formatted, aspect=aspect, cmap=cmap,
                    interpolation=interpolation, norm=norm, **kwargs)
    # set labels
    ax.set_xlabel(xlab)
    ax.set_ylabel(ylab)
    ax.set_title(title)
    # add colorbar
    cbar = plt.colorbar(img, ax=ax)
    # set colorbar ticks
    cticks = cbar.ax.get_yticks()
    cbar.ax.set_yticks(cticks[1:-1],
                       labels=[f'{int(v):,}' for v in cticks[1:-1]])
    # set colorbar label
    cbar.set_label(cbarlab)
    # cbar height to be same as axes
    cbar.ax.set_aspect('auto')
    # set yticks
    ax.set_yticks(np.arange(-0.5, len(df_formatted.index)), [], minor=True)
    ax.set_yticks(np.arange(len(df_formatted.index)), df_formatted.index)
    # figure out how many labels to show - for example; every 4th label
    # dividing the number of y values by 20 seems to give a good multiple
    # for this plot size
    show_label_multiple = len(ax.get_yaxis().get_ticklabels()) // 20
    # if there were less than 20 labels, you don't need to hide any
    # if there are more, hide all the extra labels so they don't overlap
    if show_label_multiple > 0:
        for i, label in enumerate(ax.get_yaxis().get_ticklabels()):
            if i % show_label_multiple != 0:
                label.set_visible(False)
    # set xticks at start/end of each month
    xvals = df_formatted.columns.values
    months = [int(i.split('-')[0]) for i in xvals]
    month_transitions = np.where(np.diff(months) != 0)[0]
    ax.set_xticks([0] + list(month_transitions),
                  labels=[], minor=False)
    # set xticklabels to be month name at middle of each month
    unique_months = []
    [unique_months.append(x) for x in months if x not in unique_months]
    month_names = [calendar.month_abbr[i] for i in unique_months]
    month_names = [f'{m}' for m in month_names]
    days = [int(i.split('-')[1]) for i in xvals]
    midway_pts = np.where(np.array(days) == 15)[0]
    ax.set_xticks(midway_pts, labels=month_names, minor=True)
    # make minor ticks invisible
    ax.tick_params(which='minor', length=0)
    # return axes
    return ax




[docs]
def plot_duration_hydrograph(percentiles_by_day, df, data_column_name,
                             date_column_name=None,
                             pct_list=[5, 10, 25, 75, 90, 95],
                             data_label=None, ax=None,
                             disclaimer=False,
                             title="Duration Hydrograph",
                             ylab="Discharge, ft3/s",
                             xlab="Month-Year",
                             color_palette=None,
                             **kwargs):
    """Plot a duration hydrograph.

    The duration hydrograph is a graphical presentation of recent daily
    streamflow (discharge) observed at an individual USGS streamgage,
    plotted over the long-term statistics of streamflow for each day of
    the year at that station. Typically, the statistics (based on quality
    assured and approved data) include the maximum discharge recorded during
    the period of record for each day of the year; the 90th percentile flow
    for each day; the interquartile range (75th percentile on top and 25th
    percentile on the bottom); the 10th percentile flow for each day; and the
    minimum discharge recorded for each day. This function, however, allows
    the user to plot a custom list of percentiles.

    Note: For some streams, flow statistics may have been computed from
    mixed regulated and unregulated flows; this can affect depictions
    of flow conditions.

    Parameters
    ----------
    percentiles_by_day : pandas.DataFrame
        Dataframe containing the percentiles by month-day.
        Note that this plotting function is incompatible
        with percentiles calculated by day-of-year.
    df : pandas.DataFrame
        Dataframe containing the data to plot.
    data_column_name : str
        Name of column containing data to plot.
    date_column_name : str, optional
        Name of column containing date information. If None, the index of
        `df` will be used. Defaults to None.
    pct_list : list, optional
        List of integers corresponding to the percentile values to be
        plotted. Values of 0 and 100 are ignored as unbiased plotting position
        formulas do not assign values to 0 or 100th percentile.
        Defaults to 5, 10, 25, 75, 90, 95.
    data_label : str, optional
        Label for the data to plot. If not provided, a default label will
        be used.
    ax : matplotlib.axes.Axes, optional
        Axes to plot on. If not provided, a new figure and axes will be
        created.
    disclaimer : bool, optional
        If True, displays the disclaimer 'For some streams, flow
        statistics may have been computed from mixed regulated
        and unregulated flows; this can affect depictions of flow
        conditions.' below the x-axis.
    title : str, optional
        Title for the plot. If not provided, the default title will be
        'Duration Hydrograph'.
    ylab : str, optional
        Label for the y-axis. If not provided, the default label will be
        'Discharge, ft3/s'.
    xlab : str, optional
        Label for the x-axis. If not provided, the default label will be
        'Month'.
    color_palette : list, optional
        List of colors to use for the lines or a string describing one of
        two built-in palettes: 'BrownBlue' or 'Rainbow'. If not provided,
        the 'BrownBlue' palette will be used. The max number of colors
        in this list is seven.
    **kwargs
        Keyword arguments passed to :meth:`matplotlib.axes.Axes.fill_between`.

    Returns
    -------
    matplotlib.axes.Axes
        Axes object containing the plot.

    Examples
    --------
    Fetch some data from NWIS and make a streamflow duration hydrograph plot.

    .. plot::
        :include-source:

        >>> df, _ = dataretrieval.nwis.get_dv(site='06892350',
        ...                                   parameterCd='00060',
        ...                                   start='1900-01-01',
        ...                                   end='2022-12-31')
        >>> pct_by_day = hyswap.percentiles.calculate_variable_percentile_thresholds_by_day(  # noqa: E501
        ...     df, '00060_Mean')
        >>> df_2022 = df[df.index.year == 2022]
        >>> fig, ax = plt.subplots(figsize=(12, 6))
        >>> ax = hyswap.plots.plot_duration_hydrograph(
        ...     pct_by_day, df_2022, '00060_Mean',
        ...     data_label='2022 Daily Mean Discharge',
        ...     ax=ax, title='Duration Hydrograph for USGS Site 06892350')
        >>> plt.tight_layout()
        >>> plt.show()
    """
    # check that pct_list is present in percentile threshold data
    if all(pct in pct_list + ['min', 'max'] for pct in percentiles_by_day.columns):  # noqa: E501
        raise ValueError('one or more percent values are not in provided' +
                         'percentile threshold data')
    # ignore 0 and 100 percentile levels if provided in pct_list
    if 0 in pct_list:
        pct_list.remove(0)
    if 100 in pct_list:
        pct_list.remove(100)

    # Create axes if not provided
    if ax is None:
        _, ax = plt.subplots()
    # pop some kwargs
    alpha = kwargs.pop('alpha', 0.5)
    zorder = kwargs.pop('zorder', -20)
    if data_label is None:
        label = df[data_column_name].name
    else:
        label = data_label
    # Add disclaimer if True
    if disclaimer is True:
        txt = 'For some streams, flow statistics may have been computed from mixed \nregulated and unregulated flows; this can affect depictions of flow conditions.'  # noqa: E501
    else:
        txt = ''
    # get colors
    if color_palette is None or color_palette == 'BrownBlue':
        color_palette = ['#8f4f1f', '#dcb668', '#ebd6ab', '#e9e9e9', '#aacee0',
                         '#5699c0', '#292f6b']
    if color_palette == 'Rainbow':
        color_palette = ["#e37676", "#e8c285", "#dbf595", "#a1cc9f",
                         "#7bdbd2", "#7587bf", "#ad63ba"]
    # set the df index
    if date_column_name is not None:
        df = df.set_index(date_column_name)
    df['month_day'] = df.index.strftime('%m-%d')
    # Join percentiles with data
    df_combined = pd.merge(df, percentiles_by_day, left_on=df['month_day'], right_index=True, how='left')  # noqa: E501
    # plot the latest data -1 to 0-index day of year
    ax.plot(df_combined.index.values, df[data_column_name], color='k', zorder=10, label=label)  # noqa: E501
    # sort the list in ascending order
    pct_list.sort()
    # plot the historic percentiles filling between each pair
    ax.fill_between(
            df_combined.index.values,
            df_combined['min'].tolist(),
            df_combined['p' + str(pct_list[0]).zfill(2)].tolist(),
            color=color_palette[0],
            alpha=alpha,
            linewidth=0,
            label="Min. - {}th Percentile".format(pct_list[0]),
            zorder=zorder
        )
    for i in range(1, len(pct_list)):
        ax.fill_between(
            df_combined.index.values,
            df_combined['p' + str(pct_list[i-1]).zfill(2)].tolist(),
            df_combined['p' + str(pct_list[i]).zfill(2)].tolist(),
            color=color_palette[i],
            alpha=alpha,
            linewidth=0,
            label="{}th - {}th Percentile".format(
                pct_list[i - 1], pct_list[i]),
            zorder=zorder
        )
    ax.fill_between(
        df_combined.index.values,
        df_combined['p' + str(pct_list[-1]).zfill(2)].tolist(),
        df_combined['max'].tolist(),
        color=color_palette[-1],
        alpha=alpha,
        linewidth=0,
        label="{}th Percentile - Max.".format(pct_list[-1]),
        zorder=zorder
    )
    # set labels
    ax.set_xlabel(xlab)
    ax.set_xlim(df_combined.index.min(), df_combined.index.max())
    plt.gca().xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%b-%Y'))  # noqa: E501
    plt.xticks(ha='left')
    # other labels
    ax.set_ylabel(ylab)
    ax.set_yscale("log")
    ax.set_title(title)
    # disclaimer
    ax.text(0, -0.18, txt, color='red', transform=ax.transAxes)
    # get y-axis ticks and convert to comma-separated strings
    yticks = ax.get_yticks()
    yticklabels = [f'{float(y):,}' for y in yticks]
    ax.set_yticks(yticks[1:-1], labels=yticklabels[1:-1])
    # two column legend
    ax.legend(loc="best", ncol=2, title='Historical percentiles')
    # return axes
    return ax




[docs]
def plot_cumulative_hydrograph(df,
                               target_years,
                               data_column_name,
                               date_column_name=None,
                               year_type='calendar',
                               unit='acre-feet',
                               envelope_pct=[25, 75],
                               max_year=False, min_year=False,
                               ax=None,
                               disclaimer=False,
                               title="Cumulative Streamflow Hydrograph",
                               ylab="Cumulative discharge, acre-feet",
                               xlab="Month",
                               clip_leap_day=False,
                               **kwargs):
    """Plot a cumulative hydrograph.

    The cumulative-streamflow hydrograph is a graphical presentation of
    recent cumulative daily streamflow (discharge) observed at an
    individual USGS streamgage, plotted over the long-term statistics
    of streamflow for each day of the year at that station. Typically,
    the statistics, based on quality assured and approved data, include
    the maximum annual cumulative discharge recorded during the period
    of record; the mean-daily cumulative flow for each day; the minimum
    cumulative discharge recorded for each day.

    Note: For some streams, flow statistics may have been computed from
    mixed regulated and unregulated flows; this can affect depictions
    of flow conditions.

    Parameters
    ----------
    df : pandas.DataFrame
        Dataframe containing the data to plot.
    target_years : int, or list
        Target year(s) to plot in black as the line. Can provide a single year
        as an integer, or a list of years.
    data_column_name : str
        Name of column containing data to calculate cumulative values for.
        Discharge data assumed to be in unit of ft3/s.
    date_column_name : str, optional
        Name of column containing date information. If None, the index of
        `df` will be used. Defaults to None.
    unit : str, optional
        The unit the user wants to use to report cumulative flow. One of
        'acre-feet', 'cfs', 'cubic-meters', 'cubic-feet'. Assumes input
        data are in cubic feet per second (cfs).
    envelope_pct : list, optional
        List of percentiles to plot as the envelope. Default is [25, 75].
        If an empty list, [], then no envelope is plotted.
    max_year : bool, optional
        If True, plot the cumulative flow for the year with the maximum
        end of the year cumulative value as a dashed line. Default is False.
    min_year : bool, optional
        If True, plot the cumulative flow for the year with the minimum
        end of the year cumulative value as a dashed line. Default is False.
    ax : matplotlib.axes.Axes, optional
        Axes to plot on. If not provided, a new figure and axes will be
        created.
    disclaimer : bool, optional
        If True, displays the disclaimer 'For some streams, flow
        statistics may have been computed from mixed regulated
        and unregulated flows; this can affect depictions of flow
        conditions.' below the x-axis.
    title : str, optional
        Title for the plot. If not provided, the default title will be
        'Cumulative Streamflow Hydrograph'.
    ylab : str, optional
        Label for the y-axis. If not provided, the default label will be
        'Cumulative Streamflow, ft3/s'.
    xlab : str, optional
        Label for the x-axis. If not provided, the default label will be
        'Month'.
    clip_leap_day : bool, optional
        If True, removes leap day '02-29' from the percentiles dataset
        used to create the plot. Defaults to False.
    **kwargs
        Keyword arguments passed to :meth:`matplotlib.axes.Axes.fill_between`.

    Returns
    -------
    matplotlib.axes.Axes
        Axes object containing the plot.

    Examples
    --------
    Fetch some data from NWIS and make a cumulative hydrograph plot.

    .. plot::
        :include-source:

        >>> df, _ = dataretrieval.nwis.get_dv(site='06892350',
        ...                                   parameterCd='00060',
        ...                                   start='1900-01-01',
        ...                                   end='2021-12-31')
        >>> fig, ax = plt.subplots(figsize=(8, 5))
        >>> ax = hyswap.plots.plot_cumulative_hydrograph(
        ...     df,
        ...     data_column_name='00060_Mean',
        ...     target_years=2020, ax=ax,
        ...     title='2020 Cumulative Streamflow Hydrograph, site 06892350')
        >>> plt.tight_layout()
        >>> plt.show()
    """
    # Create axes if not provided
    if ax is None:
        _, ax = plt.subplots()
    # calculate cumulative values
    cumulative_df = calculate_daily_cumulative_values(
        df=df,
        data_column_name=data_column_name,
        date_column_name=date_column_name,
        year_type=year_type,
        unit=unit,
        clip_leap_day=clip_leap_day
        )
    # calculations for percentiles by day
    pdf = calculate_variable_percentile_thresholds_by_day(
        cumulative_df, data_column_name='cumulative',
        clip_leap_day=clip_leap_day,
        percentiles=envelope_pct)
    # pop some kwargs
    alpha = kwargs.pop('alpha', 0.5)
    zorder = kwargs.pop('zorder', -20)
    color = kwargs.pop('color', 'xkcd:bright green')
    # Add disclaimer if True
    if disclaimer is True:
        txt = 'For some streams, flow statistics may have been computed from mixed \nregulated and unregulated flows; this can affect depictions of flow conditions.'  # noqa: E501
    else:
        txt = ''
    # Incorporate leap year decision into x-axis labels
    if clip_leap_day:
        year = 1901
    else:
        year = 1904
    # Create x-axis scale and labels
    if year_type == 'water':
        month_day_order = pd.date_range(start=f'{year-1}-10-01', end=f'{year}-09-30').strftime('%m-%d')  # noqa: E501
        month_begin_ticks = [f"{str(month).zfill(2)}-01" for month in range(10, 13)] + [f"{str(month).zfill(2)}-01" for month in range(1, 10)]  # noqa: E501
        month_label_ticks = [f"{str(month).zfill(2)}-15" for month in range(10, 13)] + [f"{str(month).zfill(2)}-15" for month in range(1, 10)]  # noqa: E501
        month_labels = ['Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep']  # noqa: E501
    elif year_type == 'climate':
        month_day_order = pd.date_range(start=f'{year-1}-04-01', end=f'{year}-03-31').strftime('%m-%d')  # noqa: E501
        month_begin_ticks = [f"{str(month).zfill(2)}-01" for month in range(4, 13)] + [f"{str(month).zfill(2)}-01" for month in range(1, 4)]  # noqa: E501
        month_label_ticks = [f"{str(month).zfill(2)}-15" for month in range(4, 13)] + [f"{str(month).zfill(2)}-15" for month in range(1, 4)]  # noqa: E501
        month_labels = ['Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar']  # noqa: E501
    else:
        month_day_order = pd.date_range(start=f'{year}-01-01', end=f'{year}-12-31').strftime('%m-%d')  # noqa: E501
        month_begin_ticks = [f"{str(month).zfill(2)}-01" for month in range(1, 13)]  # noqa: E501
        month_label_ticks = [f"{str(month).zfill(2)}-15" for month in range(1, 13)]  # noqa: E501
        month_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']  # noqa: E501
    # Reorder percentile thresholds by year_type
    pdf_reordered = pdf.reindex(month_day_order)
    # plot percentile envelope
    if len(envelope_pct) == 2:
        ax.fill_between(pdf_reordered.index,
                        list(pdf_reordered["p" + str(envelope_pct[0]).zfill(2)].values),  # noqa: E501
                        list(pdf_reordered["p" + str(envelope_pct[1]).zfill(2)].values),  # noqa: E501
                        color=color, alpha=alpha,
                        label=f"{envelope_pct[0]}th - {envelope_pct[1]}th " +
                        "Percentile Envelope",
                        zorder=zorder)
    # plot min/max if desired
    if max_year:
        max_y = cumulative_df.loc[
            cumulative_df['cumulative'].idxmax()]['index_year']
        max_year_df = cumulative_df[
            cumulative_df['index_year'] == max_y]
        ax.plot(
            max_year_df['index_month_day'],
            max_year_df['cumulative'], color='k',
            alpha=0.5, linestyle='--',
            label=f"Highest observed cumulative flow ({max_y})"
            )
    if min_year:
        min_y = cumulative_df.loc[
            cumulative_df['cumulative'].idxmin()]['index_year']
        min_year_df = cumulative_df[
            cumulative_df['index_year'] == min_y]
        ax.plot(
            min_year_df['index_month_day'],
            min_year_df['cumulative'], color='k',
            alpha=0.5, linestyle=':',
            label=f"Lowest observed cumulative flow ({min_y})"
            )
    # handle target years
    col_targets = ['k'] + list(matplotlib.colormaps['tab20'].colors)
    if isinstance(target_years, int):
        target_years = [target_years]  # make int a list
    for i, target_year in enumerate(target_years):
        # get data from target year
        target_year_data = cumulative_df.loc[
            cumulative_df['index_year'] == target_year]
        # plot target year
        ax.plot(target_year_data['index_month_day'],
                target_year_data['cumulative'],
                color=col_targets[i],
                label=f"Observed cumulative flow ({target_year})")
    # Get axis labels and ticks in order
    ax.set_xlim(0, 365)
    ax.set_xlabel(xlab)
    ax.set_ylabel(ylab)
    ax.set_title(title)
    plt.xticks(month_begin_ticks, labels='')
    ax.set_xticks(month_label_ticks, labels=month_labels, minor=True)
    # make minor x-ticks invisible
    ax.tick_params(axis='x', which='minor', length=0)
    # get y-axis ticks and convert to comma-separated strings
    yticks = ax.get_yticks()
    yticklabels = [f'{int(y):,}' for y in yticks]
    ax.set_yticks(yticks[1:], labels=yticklabels[1:])
    ax.set_ylim(0, yticks.max())
    # disclaimer
    ax.text(0, -0.18, txt, color='red', transform=ax.transAxes)
    # two column legend
    ax.legend(loc="best")
    # return
    return ax




[docs]
def plot_hydrograph(df, data_column_name,
                    date_column_name=None,
                    start_date=None,
                    end_date=None,
                    ax=None,
                    title='Streamflow Hydrograph',
                    ylab='Discharge, ft3/s',
                    xlab='Date',
                    yscale='log',
                    **kwargs):
    """Plot a simple hydrograph.

    Hydrographs show the streamflow discharge over time at a single station.

    Parameters
    ----------
    df : pandas.DataFrame
        DataFrame containing the data to plot.

    data_column_name : str
        Name of column containing data to plot.

    date_column_name : str, optional
        Name of column containing date information. If None, the index of
        `df` will be used. Defaults to None.

    start_date : str, optional
        Start date for the plot. If not provided, the minimum date in the
        DataFrame will be used.

    end_date : str, optional
        End date for the plot. If not provided, the maximum date in the
        DataFrame will be used.

    ax : matplotlib.axes.Axes, optional
        Axes object to plot on. If not provided, a new figure and axes will be
        created.

    title : str, optional
        Title of the plot. Default is 'Streamflow Hydrograph'.

    ylab : str, optional
        Y-axis label. Default is 'Streamflow, ft3/s'.

    xlab : str, optional
        X-axis label. Default is 'Date'.

    yscale : str, optional
        Y-axis scale. Default is 'log'. Options are 'linear' or 'log'.

    **kwargs
        Additional keyword arguments to pass to matplotlib.pyplot.plot().

    Returns
    -------
    matplotlib.axes.Axes
        Axes object containing the plot.

    Examples
    --------
    Fetch data for a USGS gage and plot the hydrograph.

    .. plot::
        :include-source:

        >>> siteno = '06892350'
        >>> df, _ = dataretrieval.nwis.get_dv(site=siteno,
        ...                                   parameterCd='00060',
        ...                                   start='2019-01-01',
        ...                                   end='2020-01-01')
        >>> ax = hyswap.plots.plot_hydrograph(
        ...     df, data_column_name='00060_Mean',
        ...     title=f'2019 Hydrograph for Station {siteno}',
        ...     ylab='Discharge, ft3/s',
        ...     xlab='Date', yscale='log')
        >>> plt.tight_layout()
        >>> plt.show()
    """
    # check if ax provided
    if ax is None:
        _, ax = plt.subplots()
    # check if date_column_name provided
    if date_column_name is not None:
        df = df.set_index(date_column_name)
    # sort by date
    df = df.sort_index()
    # check if start_date provided
    if start_date is not None:
        df = df.loc[start_date:]
    # check if end_date provided
    if end_date is not None:
        df = df.loc[:end_date]
    # plot
    ax.plot(df.index, df[data_column_name], **kwargs)
    # set labels
    ax.set_xlabel(xlab)
    ax.set_ylabel(ylab)
    ax.set_title(title)
    # set yscale
    ax.set_yscale(yscale)
    # get y-axis ticks and convert to comma-separated strings
    yticks = ax.get_yticks()
    yticklabels = [f'{int(y):,}' for y in yticks]
    ax.set_yticks(yticks[1:-1], labels=yticklabels[1:-1])
    # return
    return ax




[docs]
def plot_similarity_heatmap(sim_matrix, n_obs=None, cmap='cividis',
                            show_values=False, ax=None,
                            title='Similarity Matrix'):
    """Plot a similarity matrix heatmap.

    The heatmap shows the results of a correlation matrix between
    measurements at two or more sites. Lighter, warmer colors denote
    higher similarity (correlation), while darker colors denote less
    similarity between two sites.

    Parameters
    ----------
    sim_matrix : pandas.DataFrame
        Similarity matrix to plot. Must be square. Can be the output of
        :meth:`hyswap.similarity.calculate_correlations`,
        :meth:`hyswap.similarity.calculate_wasserstein_distance`,
        :meth:`hyswap.similarity.calculate_energy_distance`, or any other
        square matrix represented as a pandas DataFrame.

    cmap : str, optional
        Colormap to use. Default is 'cividis'.

    show_values : bool, optional
        Whether to show the values of the matrix on the plot. Default is False.

    ax : matplotlib.axes.Axes, optional
        Axes object to plot on. If not provided, a new figure and axes will be
        created.

    title : str, optional
        Title for the plot. Default is 'Similarity Matrix'.

    Returns
    -------
    matplotlib.axes.Axes
        Axes object containing the plot.

    Examples
    --------
    Calculate the correlation matrix between two sites and plot it as a
    heatmap.

    .. plot::
        :include-source:

        >>> df, _ = dataretrieval.nwis.get_dv(site='06892350',
        ...                                   parameterCd='00060',
        ...                                   start='2010-01-01',
        ...                                   end='2021-12-31')
        >>> df2, _ = dataretrieval.nwis.get_dv(site='06892000',
        ...                                    parameterCd='00060',
        ...                                    start='2010-01-01',
        ...                                    end='2021-12-31')
        >>> corr_matrix, n_obs = hyswap.similarity.calculate_correlations(
        ...     [df, df2], '00060_Mean')
        >>> ax = hyswap.plots.plot_similarity_heatmap(corr_matrix,
        ...                                           show_values=True)
        >>> plt.show()
    """
    # Create axes if not provided
    if ax is None:
        _, ax = plt.subplots()
    # plot heatmap using matplotlib
    vmin = sim_matrix.min().min()
    vmax = sim_matrix.max().max()
    im = ax.imshow(sim_matrix, cmap=cmap,
                   vmin=sim_matrix.min().min(),
                   vmax=sim_matrix.max().max())
    # show values if desired
    if show_values:
        for i in range(sim_matrix.shape[0]):
            for j in range(sim_matrix.shape[1]):
                # if below halfway point, make text white
                if sim_matrix.iloc[i, j] < (vmax - vmin) / 2 + vmin:
                    ax.text(j, i, f'{sim_matrix.iloc[i, j]:.2f}',
                            ha="center", va="center", color="w")
                # otherwise, make text black
                else:
                    ax.text(j, i, f'{sim_matrix.iloc[i, j]:.2f}',
                            ha="center", va="center", color="k")
    # set labels
    if n_obs is not None:
        title = f'{title} (n={n_obs})'
    ax.set_title(title)
    ax.set_xlabel('Site')
    ax.set_ylabel('Site')
    # set ticks at center of each cell
    ax.set_xticks(np.arange(sim_matrix.shape[0]))
    ax.set_yticks(np.arange(sim_matrix.shape[1]))
    # set tick labels
    ax.set_xticklabels(sim_matrix.columns)
    ax.set_yticklabels(sim_matrix.index)
    plt.xticks(rotation=45, ha='right')
    # add colorbar
    plt.colorbar(im, ax=ax)
    # return
    return ax