Source code for hyswap.plots

"""Functions for plotting."""
import calendar
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from hyswap.percentiles import calculate_variable_percentile_thresholds_by_day
from hyswap.cumulative import calculate_daily_cumulative_values


[docs] def plot_flow_duration_curve( values, exceedance_probabilities, observations=None, observation_probabilities=None, ax=None, title='Flow Duration Curve', xlab='Exceedance Probability\n' + '(Percentage of time indicated value was equaled or exceeded)', ylab='Discharge, ft3/s', grid=True, scatter_kwargs={}, **kwargs): """ Plot a flow duration curve. Flow duration curves are cumulative frequency curves that show the percentage of time measured discharge values are equaled or exceeded by all other discharge values in the dataset. Parameters ---------- values : array-like Values to plot along y-axis. exceedance_probabilities : array-like Exceedance probabilities for each value, likely calculated from a function like :obj:`hyswap.exceedance.calculate_exceedance_probability_from_values_multiple`. observations : list, numpy.ndarray, optional List, numpy array or list-able set of flow observations. Optional, if not provided the observations are not plotted. observation_probabilities : list, numpy.ndarray, optional Exceedance probabilities corresponding to each observation, likely calculated from a function like :obj:`hyswap.exceedance.calculate_exceedance_probability_from_values_multiple`. Optional, if not provided observations are not plotted. ax : matplotlib.axes.Axes, optional Axes to plot on. If not provided, a new figure and axes will be created. title : str, optional Title for the plot. If not provided, the default title will be 'Flow Duration Curve'. xlab : str, optional Label for the x-axis. If not provided, a default label will be used. ylab : str, optional Label for the y-axis. If not provided, a default label will be used. grid : bool, optional Whether to show grid lines on the plot. Default is True. scatter_kwargs : dict Dictionary containing keyword arguments to pass to the observations plotting method, :meth:`matplotlib.axes.Axes.scatter`. **kwargs Keyword arguments passed to :meth:`matplotlib.axes.Axes.plot`. Returns ------- matplotlib.axes.Axes Axes object containing the plot. Examples -------- Fetch some data from NWIS, calculate the exceedance probabilities and then make the flow duration curve. .. plot:: :include-source: >>> df, _ = dataretrieval.nwis.get_dv(site='06892350', ... parameterCd='00060', ... start='1776-07-04', ... end='2020-01-01') >>> values = np.linspace(df['00060_Mean'].min(), ... df['00060_Mean'].max(), 10000) >>> exceedance_probabilities = hyswap.exceedance.calculate_exceedance_probability_from_values_multiple( # noqa ... values, df['00060_Mean']) >>> ax = hyswap.plots.plot_flow_duration_curve( ... values, exceedance_probabilities, ... title='Flow Duration Curve for USGS Site 06892350') >>> plt.tight_layout() >>> plt.show() """ # Create axes if not provided if ax is None: _, ax = plt.subplots() # do plotting ax.plot(exceedance_probabilities*100, values, **kwargs) if (observations is not None) and (observation_probabilities is not None): ax.scatter(np.array(observation_probabilities)*100, observations, **scatter_kwargs) ax.set_xlabel(xlab) ax.set_ylabel(ylab) ax.set_title(title) # set log scales for x axis ax.set_yscale('log') # set limits for axes ax.set_xlim(0.1, 99.9) # set ticks for axes # always use same ticks for x-axis ax.set_xticks([0.1, 5, 10, 25, 50, 75, 90, 95, 99.9]) ax.set_xticklabels([ '0.1', '5', '10', '25', '50', '75', '90', '95', '99.9']) # get y-axis ticks and convert to comma-separated strings yticks = ax.get_yticks() # min value is 0.1 # yticks = np.array([i for i in yticks if i >= 0.1]) # get logs for min/max values rounded to next lowest/highest min_vals = np.log10(yticks[yticks <= np.min(values)]) if len(min_vals) > 0: min_tick = min_vals[-1] else: min_tick = -1.0 max_tick = np.log10(yticks[yticks >= np.max(values)][0]) # set list of values using logs yticks = list(10**np.arange(min_tick, max_tick+1)) yticklabels = [f'{int(y):,}' for y in yticks] ax.set_yticks(yticks, labels=yticklabels) ax.set_ylim(np.min(yticks), np.max(yticks)) # add grid lines if grid: ax.grid(which='both', axis='both', alpha=0.5) # return the axes return ax
[docs] def plot_raster_hydrograph(df_formatted, ax=None, title='Raster Hydrograph', xlab='Month', ylab='Year', cbarlab='Discharge, ft3/s', **kwargs): """Plot a raster hydrograph. Raster hydrographs are pixel-based plots for visualizing and identifying variations and changes in large multidimensional data sets. Originally developed by Keim (2000), they were first applied in hydrology by Koehler (2004) as a means of highlighting inter-annual and intra-annual changes in streamflow. The raster hydrographs in hyswap, like those developed by Koehler, depict years on the y-axis and days along the x-axis. Users can choose to plot streamflow (actual values or log values), streamflow percentile, or streamflow class (from 1, for low flow, to 7 for high flow), for Daily, 7-Day, 14-Day, and 28-Day streamflow. For a more comprehensive description of raster hydrographs, see Strandhagen et al. (2006). References: Keim, D.A. 2000. Designing pixel-oriented visualization techniques: theory and applications. IEEE Transactions on Visualization and Computer Graphics, 6(1), 59-78. Koehler, R. 2004. Raster Based Analysis and Visualization of Hydrologic Time Series. Ph.D dissertation, University of Arizona. Tucson, AZ, 189 p. `Strandhagen, E., Marcus, W.A., and Meacham, J.E. 2006. Views of the rivers: representing streamflow of the greater Yellowstone ecosystem. Cartographic Perspectives, no. 55, 54-29.`__ Parameters ---------- df_formatted : pandas.DataFrame Formatted dataframe containing the raster hydrograph data. ax : matplotlib.axes.Axes, optional Axes to plot on. If not provided, a new figure and axes will be created. title : str, optional Title for the plot. If not provided, the default title will be 'Streamflow Raster Hydrograph'. xlab : str, optional Label for the x-axis. If not provided, the default label will be 'Month'. ylab : str, optional Label for the y-axis. If not provided, the default label will be 'Year'. cbarlab : str, optional Label for the colorbar. If not provided, the default label will be 'Discharge, ft3/s'. **kwargs Keyword arguments passed to :meth:`matplotlib.axes.Axes.imshow`. Returns ------- matplotlib.axes.Axes Axes object containing the plot. Examples -------- Fetch some data from NWIS, format it for a raster hydrograph plot and then make the raster hydrograph plot. .. plot:: :include-source: >>> df, _ = dataretrieval.nwis.get_dv(site='09380000', ... parameterCd='00060', ... start='1960-01-01', ... end='1970-12-31') >>> df_rh = hyswap.rasterhydrograph.format_data(df, '00060_Mean') >>> fig, ax = plt.subplots(figsize=(6, 6)) >>> ax = hyswap.plots.plot_raster_hydrograph( ... df_rh, ax=ax, title='Raster Hydrograph for USGS Site 09380000') >>> plt.tight_layout() >>> plt.show() """ # Create axes if not provided if ax is None: _, ax = plt.subplots() # define min/max values min_10 = np.nanmax( [np.floor(np.log10(np.nanmin(df_formatted.to_numpy()))), 0] ) max_10 = np.ceil(np.log10(np.nanmax(df_formatted.to_numpy()))) # pop some kwargs cmap = kwargs.pop('cmap', 'YlGnBu') aspect = kwargs.pop('aspect', 'auto') interpolation = kwargs.pop('interpolation', 'none') vmin = kwargs.pop('vmin', int(10**min_10)) vmax = kwargs.pop('vmax', int(10**max_10)) norm = kwargs.pop('norm', matplotlib.colors.LogNorm(vmin=vmin, vmax=vmax)) # do plotting img = ax.imshow(df_formatted, aspect=aspect, cmap=cmap, interpolation=interpolation, norm=norm, **kwargs) # set labels ax.set_xlabel(xlab) ax.set_ylabel(ylab) ax.set_title(title) # add colorbar cbar = plt.colorbar(img, ax=ax) # set colorbar ticks cticks = cbar.ax.get_yticks() cbar.ax.set_yticks(cticks[1:-1], labels=[f'{int(v):,}' for v in cticks[1:-1]]) # set colorbar label cbar.set_label(cbarlab) # cbar height to be same as axes cbar.ax.set_aspect('auto') # set yticks ax.set_yticks(np.arange(-0.5, len(df_formatted.index)), [], minor=True) ax.set_yticks(np.arange(len(df_formatted.index)), df_formatted.index) # figure out how many labels to show - for example; every 4th label # dividing the number of y values by 20 seems to give a good multiple # for this plot size show_label_multiple = len(ax.get_yaxis().get_ticklabels()) // 20 # if there were less than 20 labels, you don't need to hide any # if there are more, hide all the extra labels so they don't overlap if show_label_multiple > 0: for i, label in enumerate(ax.get_yaxis().get_ticklabels()): if i % show_label_multiple != 0: label.set_visible(False) # set xticks at start/end of each month xvals = df_formatted.columns.values months = [int(i.split('-')[0]) for i in xvals] month_transitions = np.where(np.diff(months) != 0)[0] ax.set_xticks([0] + list(month_transitions), labels=[], minor=False) # set xticklabels to be month name at middle of each month unique_months = [] [unique_months.append(x) for x in months if x not in unique_months] month_names = [calendar.month_abbr[i] for i in unique_months] month_names = [f'{m}' for m in month_names] days = [int(i.split('-')[1]) for i in xvals] midway_pts = np.where(np.array(days) == 15)[0] ax.set_xticks(midway_pts, labels=month_names, minor=True) # make minor ticks invisible ax.tick_params(which='minor', length=0) # return axes return ax
[docs] def plot_duration_hydrograph(percentiles_by_day, df, data_column_name, date_column_name=None, pct_list=[5, 10, 25, 75, 90, 95], data_label=None, ax=None, disclaimer=False, title="Duration Hydrograph", ylab="Discharge, ft3/s", xlab="Month-Year", color_palette=None, **kwargs): """Plot a duration hydrograph. The duration hydrograph is a graphical presentation of recent daily streamflow (discharge) observed at an individual USGS streamgage, plotted over the long-term statistics of streamflow for each day of the year at that station. Typically, the statistics (based on quality assured and approved data) include the maximum discharge recorded during the period of record for each day of the year; the 90th percentile flow for each day; the interquartile range (75th percentile on top and 25th percentile on the bottom); the 10th percentile flow for each day; and the minimum discharge recorded for each day. This function, however, allows the user to plot a custom list of percentiles. Note: For some streams, flow statistics may have been computed from mixed regulated and unregulated flows; this can affect depictions of flow conditions. Parameters ---------- percentiles_by_day : pandas.DataFrame Dataframe containing the percentiles by month-day. Note that this plotting function is incompatible with percentiles calculated by day-of-year. df : pandas.DataFrame Dataframe containing the data to plot. data_column_name : str Name of column containing data to plot. date_column_name : str, optional Name of column containing date information. If None, the index of `df` will be used. Defaults to None. pct_list : list, optional List of integers corresponding to the percentile values to be plotted. Values of 0 and 100 are ignored as unbiased plotting position formulas do not assign values to 0 or 100th percentile. Defaults to 5, 10, 25, 75, 90, 95. data_label : str, optional Label for the data to plot. If not provided, a default label will be used. ax : matplotlib.axes.Axes, optional Axes to plot on. If not provided, a new figure and axes will be created. disclaimer : bool, optional If True, displays the disclaimer 'For some streams, flow statistics may have been computed from mixed regulated and unregulated flows; this can affect depictions of flow conditions.' below the x-axis. title : str, optional Title for the plot. If not provided, the default title will be 'Duration Hydrograph'. ylab : str, optional Label for the y-axis. If not provided, the default label will be 'Discharge, ft3/s'. xlab : str, optional Label for the x-axis. If not provided, the default label will be 'Month'. color_palette : list, optional List of colors to use for the lines or a string describing one of two built-in palettes: 'BrownBlue' or 'Rainbow'. If not provided, the 'BrownBlue' palette will be used. The max number of colors in this list is seven. **kwargs Keyword arguments passed to :meth:`matplotlib.axes.Axes.fill_between`. Returns ------- matplotlib.axes.Axes Axes object containing the plot. Examples -------- Fetch some data from NWIS and make a streamflow duration hydrograph plot. .. plot:: :include-source: >>> df, _ = dataretrieval.nwis.get_dv(site='06892350', ... parameterCd='00060', ... start='1900-01-01', ... end='2022-12-31') >>> pct_by_day = hyswap.percentiles.calculate_variable_percentile_thresholds_by_day( # noqa: E501 ... df, '00060_Mean') >>> df_2022 = df[df.index.year == 2022] >>> fig, ax = plt.subplots(figsize=(12, 6)) >>> ax = hyswap.plots.plot_duration_hydrograph( ... pct_by_day, df_2022, '00060_Mean', ... data_label='2022 Daily Mean Discharge', ... ax=ax, title='Duration Hydrograph for USGS Site 06892350') >>> plt.tight_layout() >>> plt.show() """ # check that pct_list is present in percentile threshold data if all(pct in pct_list + ['min', 'max'] for pct in percentiles_by_day.columns): # noqa: E501 raise ValueError('one or more percent values are not in provided' + 'percentile threshold data') # ignore 0 and 100 percentile levels if provided in pct_list if 0 in pct_list: pct_list.remove(0) if 100 in pct_list: pct_list.remove(100) # Create axes if not provided if ax is None: _, ax = plt.subplots() # pop some kwargs alpha = kwargs.pop('alpha', 0.5) zorder = kwargs.pop('zorder', -20) if data_label is None: label = df[data_column_name].name else: label = data_label # Add disclaimer if True if disclaimer is True: txt = 'For some streams, flow statistics may have been computed from mixed \nregulated and unregulated flows; this can affect depictions of flow conditions.' # noqa: E501 else: txt = '' # get colors if color_palette is None or color_palette == 'BrownBlue': color_palette = ['#8f4f1f', '#dcb668', '#ebd6ab', '#e9e9e9', '#aacee0', '#5699c0', '#292f6b'] if color_palette == 'Rainbow': color_palette = ["#e37676", "#e8c285", "#dbf595", "#a1cc9f", "#7bdbd2", "#7587bf", "#ad63ba"] # set the df index if date_column_name is not None: df = df.set_index(date_column_name) df['month_day'] = df.index.strftime('%m-%d') # Join percentiles with data df_combined = pd.merge(df, percentiles_by_day, left_on=df['month_day'], right_index=True, how='left') # noqa: E501 # plot the latest data -1 to 0-index day of year ax.plot(df_combined.index.values, df[data_column_name], color='k', zorder=10, label=label) # noqa: E501 # sort the list in ascending order pct_list.sort() # plot the historic percentiles filling between each pair ax.fill_between( df_combined.index.values, df_combined['min'].tolist(), df_combined['p' + str(pct_list[0]).zfill(2)].tolist(), color=color_palette[0], alpha=alpha, linewidth=0, label="Min. - {}th Percentile".format(pct_list[0]), zorder=zorder ) for i in range(1, len(pct_list)): ax.fill_between( df_combined.index.values, df_combined['p' + str(pct_list[i-1]).zfill(2)].tolist(), df_combined['p' + str(pct_list[i]).zfill(2)].tolist(), color=color_palette[i], alpha=alpha, linewidth=0, label="{}th - {}th Percentile".format( pct_list[i - 1], pct_list[i]), zorder=zorder ) ax.fill_between( df_combined.index.values, df_combined['p' + str(pct_list[-1]).zfill(2)].tolist(), df_combined['max'].tolist(), color=color_palette[-1], alpha=alpha, linewidth=0, label="{}th Percentile - Max.".format(pct_list[-1]), zorder=zorder ) # set labels ax.set_xlabel(xlab) ax.set_xlim(df_combined.index.min(), df_combined.index.max()) plt.gca().xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%b-%Y')) # noqa: E501 plt.xticks(ha='left') # other labels ax.set_ylabel(ylab) ax.set_yscale("log") ax.set_title(title) # disclaimer ax.text(0, -0.18, txt, color='red', transform=ax.transAxes) # get y-axis ticks and convert to comma-separated strings yticks = ax.get_yticks() yticklabels = [f'{float(y):,}' for y in yticks] ax.set_yticks(yticks[1:-1], labels=yticklabels[1:-1]) # two column legend ax.legend(loc="best", ncol=2, title='Historical percentiles') # return axes return ax
[docs] def plot_cumulative_hydrograph(df, target_years, data_column_name, date_column_name=None, year_type='calendar', unit='acre-feet', envelope_pct=[25, 75], max_year=False, min_year=False, ax=None, disclaimer=False, title="Cumulative Streamflow Hydrograph", ylab="Cumulative discharge, acre-feet", xlab="Month", clip_leap_day=False, **kwargs): """Plot a cumulative hydrograph. The cumulative-streamflow hydrograph is a graphical presentation of recent cumulative daily streamflow (discharge) observed at an individual USGS streamgage, plotted over the long-term statistics of streamflow for each day of the year at that station. Typically, the statistics, based on quality assured and approved data, include the maximum annual cumulative discharge recorded during the period of record; the mean-daily cumulative flow for each day; the minimum cumulative discharge recorded for each day. Note: For some streams, flow statistics may have been computed from mixed regulated and unregulated flows; this can affect depictions of flow conditions. Parameters ---------- df : pandas.DataFrame Dataframe containing the data to plot. target_years : int, or list Target year(s) to plot in black as the line. Can provide a single year as an integer, or a list of years. data_column_name : str Name of column containing data to calculate cumulative values for. Discharge data assumed to be in unit of ft3/s. date_column_name : str, optional Name of column containing date information. If None, the index of `df` will be used. Defaults to None. unit : str, optional The unit the user wants to use to report cumulative flow. One of 'acre-feet', 'cfs', 'cubic-meters', 'cubic-feet'. Assumes input data are in cubic feet per second (cfs). envelope_pct : list, optional List of percentiles to plot as the envelope. Default is [25, 75]. If an empty list, [], then no envelope is plotted. max_year : bool, optional If True, plot the cumulative flow for the year with the maximum end of the year cumulative value as a dashed line. Default is False. min_year : bool, optional If True, plot the cumulative flow for the year with the minimum end of the year cumulative value as a dashed line. Default is False. ax : matplotlib.axes.Axes, optional Axes to plot on. If not provided, a new figure and axes will be created. disclaimer : bool, optional If True, displays the disclaimer 'For some streams, flow statistics may have been computed from mixed regulated and unregulated flows; this can affect depictions of flow conditions.' below the x-axis. title : str, optional Title for the plot. If not provided, the default title will be 'Cumulative Streamflow Hydrograph'. ylab : str, optional Label for the y-axis. If not provided, the default label will be 'Cumulative Streamflow, ft3/s'. xlab : str, optional Label for the x-axis. If not provided, the default label will be 'Month'. clip_leap_day : bool, optional If True, removes leap day '02-29' from the percentiles dataset used to create the plot. Defaults to False. **kwargs Keyword arguments passed to :meth:`matplotlib.axes.Axes.fill_between`. Returns ------- matplotlib.axes.Axes Axes object containing the plot. Examples -------- Fetch some data from NWIS and make a cumulative hydrograph plot. .. plot:: :include-source: >>> df, _ = dataretrieval.nwis.get_dv(site='06892350', ... parameterCd='00060', ... start='1900-01-01', ... end='2021-12-31') >>> fig, ax = plt.subplots(figsize=(8, 5)) >>> ax = hyswap.plots.plot_cumulative_hydrograph( ... df, ... data_column_name='00060_Mean', ... target_years=2020, ax=ax, ... title='2020 Cumulative Streamflow Hydrograph, site 06892350') >>> plt.tight_layout() >>> plt.show() """ # Create axes if not provided if ax is None: _, ax = plt.subplots() # calculate cumulative values cumulative_df = calculate_daily_cumulative_values( df=df, data_column_name=data_column_name, date_column_name=date_column_name, year_type=year_type, unit=unit, clip_leap_day=clip_leap_day ) # calculations for percentiles by day pdf = calculate_variable_percentile_thresholds_by_day( cumulative_df, data_column_name='cumulative', clip_leap_day=clip_leap_day, percentiles=envelope_pct) # pop some kwargs alpha = kwargs.pop('alpha', 0.5) zorder = kwargs.pop('zorder', -20) color = kwargs.pop('color', 'xkcd:bright green') # Add disclaimer if True if disclaimer is True: txt = 'For some streams, flow statistics may have been computed from mixed \nregulated and unregulated flows; this can affect depictions of flow conditions.' # noqa: E501 else: txt = '' # Incorporate leap year decision into x-axis labels if clip_leap_day: year = 1901 else: year = 1904 # Create x-axis scale and labels if year_type == 'water': month_day_order = pd.date_range(start=f'{year-1}-10-01', end=f'{year}-09-30').strftime('%m-%d') # noqa: E501 month_begin_ticks = [f"{str(month).zfill(2)}-01" for month in range(10, 13)] + [f"{str(month).zfill(2)}-01" for month in range(1, 10)] # noqa: E501 month_label_ticks = [f"{str(month).zfill(2)}-15" for month in range(10, 13)] + [f"{str(month).zfill(2)}-15" for month in range(1, 10)] # noqa: E501 month_labels = ['Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep'] # noqa: E501 elif year_type == 'climate': month_day_order = pd.date_range(start=f'{year-1}-04-01', end=f'{year}-03-31').strftime('%m-%d') # noqa: E501 month_begin_ticks = [f"{str(month).zfill(2)}-01" for month in range(4, 13)] + [f"{str(month).zfill(2)}-01" for month in range(1, 4)] # noqa: E501 month_label_ticks = [f"{str(month).zfill(2)}-15" for month in range(4, 13)] + [f"{str(month).zfill(2)}-15" for month in range(1, 4)] # noqa: E501 month_labels = ['Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar'] # noqa: E501 else: month_day_order = pd.date_range(start=f'{year}-01-01', end=f'{year}-12-31').strftime('%m-%d') # noqa: E501 month_begin_ticks = [f"{str(month).zfill(2)}-01" for month in range(1, 13)] # noqa: E501 month_label_ticks = [f"{str(month).zfill(2)}-15" for month in range(1, 13)] # noqa: E501 month_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] # noqa: E501 # Reorder percentile thresholds by year_type pdf_reordered = pdf.reindex(month_day_order) # plot percentile envelope if len(envelope_pct) == 2: ax.fill_between(pdf_reordered.index, list(pdf_reordered["p" + str(envelope_pct[0]).zfill(2)].values), # noqa: E501 list(pdf_reordered["p" + str(envelope_pct[1]).zfill(2)].values), # noqa: E501 color=color, alpha=alpha, label=f"{envelope_pct[0]}th - {envelope_pct[1]}th " + "Percentile Envelope", zorder=zorder) # plot min/max if desired if max_year: max_y = cumulative_df.loc[ cumulative_df['cumulative'].idxmax()]['index_year'] max_year_df = cumulative_df[ cumulative_df['index_year'] == max_y] ax.plot( max_year_df['index_month_day'], max_year_df['cumulative'], color='k', alpha=0.5, linestyle='--', label=f"Highest observed cumulative flow ({max_y})" ) if min_year: min_y = cumulative_df.loc[ cumulative_df['cumulative'].idxmin()]['index_year'] min_year_df = cumulative_df[ cumulative_df['index_year'] == min_y] ax.plot( min_year_df['index_month_day'], min_year_df['cumulative'], color='k', alpha=0.5, linestyle=':', label=f"Lowest observed cumulative flow ({min_y})" ) # handle target years col_targets = ['k'] + list(matplotlib.colormaps['tab20'].colors) if isinstance(target_years, int): target_years = [target_years] # make int a list for i, target_year in enumerate(target_years): # get data from target year target_year_data = cumulative_df.loc[ cumulative_df['index_year'] == target_year] # plot target year ax.plot(target_year_data['index_month_day'], target_year_data['cumulative'], color=col_targets[i], label=f"Observed cumulative flow ({target_year})") # Get axis labels and ticks in order ax.set_xlim(0, 365) ax.set_xlabel(xlab) ax.set_ylabel(ylab) ax.set_title(title) plt.xticks(month_begin_ticks, labels='') ax.set_xticks(month_label_ticks, labels=month_labels, minor=True) # make minor x-ticks invisible ax.tick_params(axis='x', which='minor', length=0) # get y-axis ticks and convert to comma-separated strings yticks = ax.get_yticks() yticklabels = [f'{int(y):,}' for y in yticks] ax.set_yticks(yticks[1:], labels=yticklabels[1:]) ax.set_ylim(0, yticks.max()) # disclaimer ax.text(0, -0.18, txt, color='red', transform=ax.transAxes) # two column legend ax.legend(loc="best") # return return ax
[docs] def plot_hydrograph(df, data_column_name, date_column_name=None, start_date=None, end_date=None, ax=None, title='Streamflow Hydrograph', ylab='Discharge, ft3/s', xlab='Date', yscale='log', **kwargs): """Plot a simple hydrograph. Hydrographs show the streamflow discharge over time at a single station. Parameters ---------- df : pandas.DataFrame DataFrame containing the data to plot. data_column_name : str Name of column containing data to plot. date_column_name : str, optional Name of column containing date information. If None, the index of `df` will be used. Defaults to None. start_date : str, optional Start date for the plot. If not provided, the minimum date in the DataFrame will be used. end_date : str, optional End date for the plot. If not provided, the maximum date in the DataFrame will be used. ax : matplotlib.axes.Axes, optional Axes object to plot on. If not provided, a new figure and axes will be created. title : str, optional Title of the plot. Default is 'Streamflow Hydrograph'. ylab : str, optional Y-axis label. Default is 'Streamflow, ft3/s'. xlab : str, optional X-axis label. Default is 'Date'. yscale : str, optional Y-axis scale. Default is 'log'. Options are 'linear' or 'log'. **kwargs Additional keyword arguments to pass to matplotlib.pyplot.plot(). Returns ------- matplotlib.axes.Axes Axes object containing the plot. Examples -------- Fetch data for a USGS gage and plot the hydrograph. .. plot:: :include-source: >>> siteno = '06892350' >>> df, _ = dataretrieval.nwis.get_dv(site=siteno, ... parameterCd='00060', ... start='2019-01-01', ... end='2020-01-01') >>> ax = hyswap.plots.plot_hydrograph( ... df, data_column_name='00060_Mean', ... title=f'2019 Hydrograph for Station {siteno}', ... ylab='Discharge, ft3/s', ... xlab='Date', yscale='log') >>> plt.tight_layout() >>> plt.show() """ # check if ax provided if ax is None: _, ax = plt.subplots() # check if date_column_name provided if date_column_name is not None: df = df.set_index(date_column_name) # sort by date df = df.sort_index() # check if start_date provided if start_date is not None: df = df.loc[start_date:] # check if end_date provided if end_date is not None: df = df.loc[:end_date] # plot ax.plot(df.index, df[data_column_name], **kwargs) # set labels ax.set_xlabel(xlab) ax.set_ylabel(ylab) ax.set_title(title) # set yscale ax.set_yscale(yscale) # get y-axis ticks and convert to comma-separated strings yticks = ax.get_yticks() yticklabels = [f'{int(y):,}' for y in yticks] ax.set_yticks(yticks[1:-1], labels=yticklabels[1:-1]) # return return ax
[docs] def plot_similarity_heatmap(sim_matrix, n_obs=None, cmap='cividis', show_values=False, ax=None, title='Similarity Matrix'): """Plot a similarity matrix heatmap. The heatmap shows the results of a correlation matrix between measurements at two or more sites. Lighter, warmer colors denote higher similarity (correlation), while darker colors denote less similarity between two sites. Parameters ---------- sim_matrix : pandas.DataFrame Similarity matrix to plot. Must be square. Can be the output of :meth:`hyswap.similarity.calculate_correlations`, :meth:`hyswap.similarity.calculate_wasserstein_distance`, :meth:`hyswap.similarity.calculate_energy_distance`, or any other square matrix represented as a pandas DataFrame. cmap : str, optional Colormap to use. Default is 'cividis'. show_values : bool, optional Whether to show the values of the matrix on the plot. Default is False. ax : matplotlib.axes.Axes, optional Axes object to plot on. If not provided, a new figure and axes will be created. title : str, optional Title for the plot. Default is 'Similarity Matrix'. Returns ------- matplotlib.axes.Axes Axes object containing the plot. Examples -------- Calculate the correlation matrix between two sites and plot it as a heatmap. .. plot:: :include-source: >>> df, _ = dataretrieval.nwis.get_dv(site='06892350', ... parameterCd='00060', ... start='2010-01-01', ... end='2021-12-31') >>> df2, _ = dataretrieval.nwis.get_dv(site='06892000', ... parameterCd='00060', ... start='2010-01-01', ... end='2021-12-31') >>> corr_matrix, n_obs = hyswap.similarity.calculate_correlations( ... [df, df2], '00060_Mean') >>> ax = hyswap.plots.plot_similarity_heatmap(corr_matrix, ... show_values=True) >>> plt.show() """ # Create axes if not provided if ax is None: _, ax = plt.subplots() # plot heatmap using matplotlib vmin = sim_matrix.min().min() vmax = sim_matrix.max().max() im = ax.imshow(sim_matrix, cmap=cmap, vmin=sim_matrix.min().min(), vmax=sim_matrix.max().max()) # show values if desired if show_values: for i in range(sim_matrix.shape[0]): for j in range(sim_matrix.shape[1]): # if below halfway point, make text white if sim_matrix.iloc[i, j] < (vmax - vmin) / 2 + vmin: ax.text(j, i, f'{sim_matrix.iloc[i, j]:.2f}', ha="center", va="center", color="w") # otherwise, make text black else: ax.text(j, i, f'{sim_matrix.iloc[i, j]:.2f}', ha="center", va="center", color="k") # set labels if n_obs is not None: title = f'{title} (n={n_obs})' ax.set_title(title) ax.set_xlabel('Site') ax.set_ylabel('Site') # set ticks at center of each cell ax.set_xticks(np.arange(sim_matrix.shape[0])) ax.set_yticks(np.arange(sim_matrix.shape[1])) # set tick labels ax.set_xticklabels(sim_matrix.columns) ax.set_yticklabels(sim_matrix.index) plt.xticks(rotation=45, ha='right') # add colorbar plt.colorbar(im, ax=ax) # return return ax