Python Equivalents to R Vignette Examples
[1]:
from dataretrieval import nwis
from dataretrieval import wqp
The dataRetrieval package was created as a python equivalent to the R dataRetrieval tool.
The following shows python equivalents for methods outlined in the R dataRetrieval Vignette with the equivalent R code in comments
[2]:
'''
library(dataRetrieval)
# Choptank River near Greensboro, MD
siteNumber <- "01491000"
ChoptankInfo <- readNWISsite(siteNumber)
parameterCd <- "00060"
#Raw daily data:
rawDailyData <- readNWISdv(siteNumber,parameterCd,
"1980-01-01","2010-01-01")
# Sample data Nitrate:
parameterCd <- "00618"
qwData <- readNWISqw(siteNumber,parameterCd,
"1980-01-01","2010-01-01")
pCode <- readNWISpCode(parameterCd)
'''
# Choptank River near Greensboro, MD
siteNumber = '01491000'
chop_tank_info, md = nwis.get_info(sites=siteNumber)
parameterCd = '00060'
# raw daily data
rawDailyData, md = nwis.get_dv(sites=siteNumber, parameterCd=parameterCd, start="1980-01-01", end="2010-01-01")
# sample data Nitrate:
parameterCd = "00618"
qwData, md = nwis.get_qwdata(sites=siteNumber, parameterCd=parameterCd, start="1980-01-01", end="2010-01-01")
pCode, md = nwis.get_pmcodes(parameterCd=parameterCd)
/home/runner/.local/lib/python3.12/site-packages/dataretrieval/nwis.py:193: UserWarning: WARNING: Starting in March 2024, the NWIS qw data endpoint is retiring and no longer receives updates. For more information, refer to https://waterdata.usgs.gov.nwis/qwdata and https://doi-usgs.github.io/dataRetrieval/articles/Status.html or email CompTools@usgs.gov.
warnings.warn(
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~/.local/lib/python3.12/site-packages/pandas/core/indexes/base.py:3805, in Index.get_loc(self, key)
3804 try:
-> 3805 return self._engine.get_loc(casted_key)
3806 except KeyError as err:
File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()
File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/hashtable_class_helper.pxi:7081, in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas/_libs/hashtable_class_helper.pxi:7089, in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'sample_start_time_datum_cd'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Cell In[2], line 30
28 # sample data Nitrate:
29 parameterCd = "00618"
---> 30 qwData, md = nwis.get_qwdata(sites=siteNumber, parameterCd=parameterCd, start="1980-01-01", end="2010-01-01")
32 pCode, md = nwis.get_pmcodes(parameterCd=parameterCd)
File ~/.local/lib/python3.12/site-packages/dataretrieval/nwis.py:254, in get_qwdata(sites, start, end, multi_index, wide_format, datetime_index, ssl_check, **kwargs)
251 df = _read_rdb(response.text)
253 if datetime_index is True:
--> 254 df = format_datetime(df, "sample_dt", "sample_tm", "sample_start_time_datum_cd")
256 return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
File ~/.local/lib/python3.12/site-packages/dataretrieval/utils.py:79, in format_datetime(df, date_field, time_field, tz_field)
56 """Creates a datetime field from separate date, time, and
57 time zone fields.
58
(...) 76
77 """
78 # create a datetime index from the columns in qwdata response
---> 79 df[tz_field] = df[tz_field].map(tz)
81 df["datetime"] = pd.to_datetime(
82 df[date_field] + " " + df[time_field] + " " + df[tz_field],
83 format="ISO8601",
84 utc=True,
85 )
87 # if there are any incomplete dates, warn the user
File ~/.local/lib/python3.12/site-packages/pandas/core/frame.py:4102, in DataFrame.__getitem__(self, key)
4100 if self.columns.nlevels > 1:
4101 return self._getitem_multilevel(key)
-> 4102 indexer = self.columns.get_loc(key)
4103 if is_integer(indexer):
4104 indexer = [indexer]
File ~/.local/lib/python3.12/site-packages/pandas/core/indexes/base.py:3812, in Index.get_loc(self, key)
3807 if isinstance(casted_key, slice) or (
3808 isinstance(casted_key, abc.Iterable)
3809 and any(isinstance(x, slice) for x in casted_key)
3810 ):
3811 raise InvalidIndexError(key)
-> 3812 raise KeyError(key) from err
3813 except TypeError:
3814 # If we have a listlike key, _check_indexing_error will raise
3815 # InvalidIndexError. Otherwise we fall through and re-raise
3816 # the TypeError.
3817 self._check_indexing_error(key)
KeyError: 'sample_start_time_datum_cd'
[3]:
'''
{r getSite, echo=TRUE, eval=FALSE}
siteNumbers <- c("01491000","01645000")
siteINFO <- readNWISsite(siteNumbers)
'''
siteNumbers = ["01491000","01645000"]
siteINFO, md = nwis.get_iv(sites=siteNumbers)
[4]:
'''
# Continuing from the previous example:
# This pulls out just the daily, mean data:
dailyDataAvailable <- whatNWISdata(siteNumbers,
service="dv", statCd="00003")
'''
dailyDataAvailable, md = nwis.get_dv(sites=siteNumbers, statCd="00003")
[5]:
'''
# Using defaults:
parameterCd <- "00618"
parameterINFO <- readNWISpCode(parameterCd)
'''
pCode, md = nwis.get_pmcodes(parameterCd="00618")
[6]:
'''
# Choptank River near Greensboro, MD:
siteNumber <- "01491000"
parameterCd <- "00060" # Discharge
startDate <- "2009-10-01"
endDate <- "2012-09-30"
discharge <- readNWISdv(siteNumber,
parameterCd, startDate, endDate)
'''
# Choptank River near Greensboro, MD:
siteNumber = "01491000"
parameterCd = "00060" # Discharge
startDate = "2009-10-01"
endDate = "2012-09-30"
discharge, md = nwis.get_dv(sites=siteNumber, parameterCd=parameterCd, start=startDate, end=endDate)
[7]:
'''
siteNumber <- "01491000"
parameterCd <- c("00010","00060") # Temperature and discharge
statCd <- c("00001","00003") # Mean and maximum
startDate <- "2012-01-01"
endDate <- "2012-05-01"
temperatureAndFlow <- readNWISdv(siteNumber, parameterCd,
startDate, endDate, statCd=statCd)
'''
siteNumber = "01491000"
parameterCd = ["00010","00060"] # Temperature and discharge
statCd = ["00001","00003"] # Mean and maximum
startDate = "2012-01-01"
endDate = "2012-05-01"
temperatureAndFlow, md = nwis.get_dv(sites=siteNumber, parameterCd=parameterCd,
start=startDate, end=endDate, statCd=statCd)
[8]:
'''
parameterCd <- "00060" # Discharge
startDate <- "2012-05-12"
endDate <- "2012-05-13"
dischargeUnit <- readNWISuv(siteNumber, parameterCd,
startDate, endDate)
'''
siteNumber = "01491000"
parameterCd = "00060" # Discharge
startDate = "2012-05-12"
endDate = "2012-05-13"
dischargeUnit, md = nwis.get_iv(sites=siteNumber, parameterCd=parameterCd,
start=startDate, end=endDate)
[9]:
'''
# Dissolved Nitrate parameter codes:
parameterCd <- c("00618","71851")
startDate <- "1985-10-01"
endDate <- "2012-09-30"
dfLong <- readNWISqw(siteNumber, parameterCd,
startDate, endDate)
# Or the wide return:
dfWide <- readNWISqw(siteNumber, parameterCd,
startDate, endDate, reshape=TRUE)
'''
siteNumber = "01491000"
parameterCd = ["00618","71851"]
startDate = "1985-10-01"
endDate = "2012-09-30"
dfLong, md = nwis.get_qwdata(sites=siteNumber, parameterCd=parameterCd,
start=startDate, end=endDate)
/home/runner/.local/lib/python3.12/site-packages/dataretrieval/nwis.py:193: UserWarning: WARNING: Starting in March 2024, the NWIS qw data endpoint is retiring and no longer receives updates. For more information, refer to https://waterdata.usgs.gov.nwis/qwdata and https://doi-usgs.github.io/dataRetrieval/articles/Status.html or email CompTools@usgs.gov.
warnings.warn(
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~/.local/lib/python3.12/site-packages/pandas/core/indexes/base.py:3805, in Index.get_loc(self, key)
3804 try:
-> 3805 return self._engine.get_loc(casted_key)
3806 except KeyError as err:
File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()
File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/hashtable_class_helper.pxi:7081, in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas/_libs/hashtable_class_helper.pxi:7089, in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'sample_start_time_datum_cd'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Cell In[9], line 16
14 startDate = "1985-10-01"
15 endDate = "2012-09-30"
---> 16 dfLong, md = nwis.get_qwdata(sites=siteNumber, parameterCd=parameterCd,
17 start=startDate, end=endDate)
File ~/.local/lib/python3.12/site-packages/dataretrieval/nwis.py:254, in get_qwdata(sites, start, end, multi_index, wide_format, datetime_index, ssl_check, **kwargs)
251 df = _read_rdb(response.text)
253 if datetime_index is True:
--> 254 df = format_datetime(df, "sample_dt", "sample_tm", "sample_start_time_datum_cd")
256 return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
File ~/.local/lib/python3.12/site-packages/dataretrieval/utils.py:79, in format_datetime(df, date_field, time_field, tz_field)
56 """Creates a datetime field from separate date, time, and
57 time zone fields.
58
(...) 76
77 """
78 # create a datetime index from the columns in qwdata response
---> 79 df[tz_field] = df[tz_field].map(tz)
81 df["datetime"] = pd.to_datetime(
82 df[date_field] + " " + df[time_field] + " " + df[tz_field],
83 format="ISO8601",
84 utc=True,
85 )
87 # if there are any incomplete dates, warn the user
File ~/.local/lib/python3.12/site-packages/pandas/core/frame.py:4102, in DataFrame.__getitem__(self, key)
4100 if self.columns.nlevels > 1:
4101 return self._getitem_multilevel(key)
-> 4102 indexer = self.columns.get_loc(key)
4103 if is_integer(indexer):
4104 indexer = [indexer]
File ~/.local/lib/python3.12/site-packages/pandas/core/indexes/base.py:3812, in Index.get_loc(self, key)
3807 if isinstance(casted_key, slice) or (
3808 isinstance(casted_key, abc.Iterable)
3809 and any(isinstance(x, slice) for x in casted_key)
3810 ):
3811 raise InvalidIndexError(key)
-> 3812 raise KeyError(key) from err
3813 except TypeError:
3814 # If we have a listlike key, _check_indexing_error will raise
3815 # InvalidIndexError. Otherwise we fall through and re-raise
3816 # the TypeError.
3817 self._check_indexing_error(key)
KeyError: 'sample_start_time_datum_cd'
[10]:
'''
siteNumber <- "434400121275801"
groundWater <- readNWISgwl(siteNumber)
'''
siteNumber = "434400121275801"
groundWater, md = nwis.get_gwlevels(sites=siteNumber)
/home/runner/.local/lib/python3.12/site-packages/dataretrieval/utils.py:90: UserWarning: Warning: 567 incomplete dates found, consider setting datetime_index to False.
warnings.warn(
[11]:
'''
siteNumber <- '01594440'
peakData <- readNWISpeak(siteNumber)
'''
siteNumber = '01594440'
peakData, md = nwis.get_discharge_peaks(sites=siteNumber)
[12]:
'''
ratingData <- readNWISrating(siteNumber, "base")
attr(ratingData, "RATING")
'''
ratings_data, md = nwis.get_ratings(site='01594440', file_type="base")
[13]:
'''surfaceData <- readNWISmeas(siteNumber)'''
siteNumber = '01594440'
surface_data, md = nwis.get_discharge_measurements(sites=siteNumber)
[14]:
'''
allegheny <- readNWISuse(stateCd = "Pennsylvania",
countyCd = "Allegheny")
national <- readNWISuse(stateCd = NULL,
countyCd = NULL,
transform = TRUE)
'''
allegheny, md = nwis.get_water_use(state="PA", counties="003")
national, md = nwis.get_water_use()
[15]:
'''
discharge_stats <- readNWISstat(siteNumbers=c("02319394"),
parameterCd=c("00060"),
statReportType="annual")
'''
discharge_stats, md = nwis.get_stats(sites='02319394', parameterCd="00060", statReportType='annual', statTypeCd='all')
[16]:
# '''
# specificCond <- readWQPqw('WIDNR_WQX-10032762',
# 'Specific conductance',
# '2011-05-01','2011-09-30')
# '''
# specific_cond, md = wqp.get_results(siteid='WIDNR_WQX-10032762',
# characteristicName = 'Specific conductance',
# startDateLo='2011-05-01', startDateHi='2011-09-30')
[17]:
# '''
# dischargeWI <- readNWISdata(service="dv",
# stateCd="WI",
# parameterCd="00060",
# drainAreaMin="50",
# statCd="00003")
# '''
# dischargeWI, md = nwis.get_dv(stateCd="WI", parameterCd="00060", drainAreaMin="50", statCd="00003")
[18]:
# '''
# sitesNJ <- whatWQPsites(statecode="US:34",
# characteristicName="Chloride")
# '''
# sitesNJ, md = wqp.what_sites(statecode="US:34", characteristicName="Chloride")
[19]:
# '''
# dataPH <- readWQPdata(statecode="US:55",
# characteristicName="pH")
# '''
# dataPH, md = wqp.what_sites(statecode="US:55", characteristicName="pH")
[20]:
# '''
# type <- "Stream"
# sites <- whatWQPdata(countycode="US:55:025",siteType=type)
# '''
# streamType = "Stream"
# sites, md = wqp.get_results(countycode="US:55:025", siteType=streamType)
[21]:
'''site <- whatWQPsamples(siteid="USGS-01594440")'''
site, md = wqp.what_sites(siteid="USGS-01594440")
[22]:
'''
type <- "Stream"
sites <- whatWQPmetrics(countycode="US:55:025",siteType=type)
'''
streamType = "Stream"
sites, md = wqp.what_sites(countycode="US:55:025",siteType=streamType)
Embedded Metadata
All service methods return the DataFrame containing requested data and Metadata as a tuple. Note, a call using get_record will only return the DataFrame to remain compatible with previous usage.
national, md = nwis.get_water_use()
md is an object with the following attributes
Metadata
url # the resulting url to query usgs
query_time # the time it took to query usgs
site_info # a method to call site_info with the site parameters supplied
header # any headers attached to the response object