Source code for hyswap.cumulative

"""Cumulative calculation functions."""

import pandas as pd
from hyswap.utils import define_year_doy_columns


[docs] def calculate_daily_cumulative_values(df, data_column_name, date_column_name=None, year_type='calendar', unit='acre-feet', clip_leap_day=False): """Calculate daily cumulative values. Parameters ---------- df : pandas.DataFrame DataFrame containing data to calculate cumulative values. data_column_name : str Name of column containing data to calculate cumulative values for. Discharge data assumed to be in unit of ft3/s. date_column_name : str, optional Name of column containing date information. If None, the index of `df` will be used. year_type : str, optional The type of year to use. Must be one of 'calendar', 'water', or 'climate'. Default is 'calendar' which starts the year on January 1 and ends on December 31. 'water' starts the year on October 1 and ends on September 30 of the following year which is the "water year". For example, October 1, 2010 to September 30, 2011 is "water year 2011". 'climate' years begin on April 1 and end on March 31 of the following year, they are numbered by the ending year. For example, April 1, 2010 to March 31, 2011 is "climate year 2011". unit : str, optional The unit the user wants to use to report cumulative flow. One of 'acre-feet', 'cfs', 'cubic-meters', 'cubic-feet'. Assumes input data are in cubic feet per second (cfs). Returns ------- cumulative_values : pandas.DataFrame DataFrame containing daily cumulative values for each year in the input DataFrame, rows are dates and columns include years, month-days, day-of-year and cumulative values in the units specified. Examples -------- Calculate daily cumulative values from some synthetic data. .. doctest:: >>> df = pd.DataFrame({ ... "date": pd.date_range("2000-01-01", "2000-12-31"), ... "data": np.arange(366)}) >>> results = cumulative.calculate_daily_cumulative_values( ... df, "data", date_column_name="date") >>> results.columns.tolist() ['index_month_day', 'index_year', 'index_doy', 'cumulative'] """ # check that unit is valid if unit not in ['acre-feet', 'cfs', 'cubic-meters', 'cubic-feet']: raise ValueError( 'Unit must be one of "acre-feet", "cfs", "cubic-meters", "cubic-feet"') # noqa: E501 # set date index, add day/year columns with function df = define_year_doy_columns(df, date_column_name=date_column_name, year_type=year_type, clip_leap_day=clip_leap_day) # get unique years in the data years = df['index_year'].unique() # make an empty dataframe to hold cumulative values for each year cdf = pd.DataFrame() selected_columns = [ data_column_name, 'index_month_day', 'index_year', 'index_doy' ] # loop through each year and calculate cumulative values for year in years: # get data for the year year_data = df[df['index_year'] == year][selected_columns] year_data = year_data.sort_index() # calculate cumulative values and assign to cdf if unit == 'acre-feet': # convert cubic feet to acre-feet # multiplied by seconds per day year_data['cumulative'] = year_data[data_column_name].cumsum().values * 0.0000229568 * 86400 # noqa: E501 elif unit == 'cubic-meters': # convert cubic feet to cubic meters # multiplied by seconds per day year_data['cumulative'] = year_data[data_column_name].cumsum().values * 0.02831685 * 86400 # noqa: E501 elif unit == 'cubic-feet': # convert cubic feet per second to cubic feet # multiplied by seconds per day year_data['cumulative'] = year_data[data_column_name].cumsum().values * 86400 # noqa: E501 else: year_data['cumulative'] = year_data[data_column_name].cumsum().values # noqa: E501 cdf = pd.concat([cdf, year_data]) cdf = cdf[['index_month_day', 'index_year', 'index_doy', 'cumulative']] return cdf