"""National Ground-Water Monitoring Network (NGWMN) getters.
The NGWMN exposes its data through a dedicated OGC API
(``https://api.waterdata.usgs.gov/ngwmn/ogcapi``) with five collections:
``sites``, ``waterLevelObs``, ``lithologyObs``, ``constructionObs``, and
``providers``. Each getter below delegates to the shared OGC engine
(:func:`~dataretrieval.ogc.engine.get_ogc_data`) with
``base_url=NGWMN_OGC_API_URL``, so multi-value chunking, pagination,
retry/resume, and result shaping all behave exactly as they do for the main
Water Data getters.
Unlike the main Water Data collections, NGWMN aggregates monitoring locations
from many agencies, so ``monitoring_location_id`` values use other agency
prefixes besides ``USGS-`` (e.g. ``MBMG-702934``, ``AKDNR-535134236016630``).
See https://api.waterdata.usgs.gov/ngwmn/ogcapi for the API reference.
"""
from __future__ import annotations
from collections.abc import Iterable
from typing import Any
import pandas as pd
from dataretrieval.codes.states import apply_state
from dataretrieval.ogc.engine import BASE_URL, OgcDialect, _get_args, get_ogc_data
from dataretrieval.utils import BaseMetadata
# The National Ground-Water Monitoring Network exposes its own OGC API at a
# separate, unversioned base.
NGWMN_OGC_API_URL = f"{BASE_URL}/ngwmn/ogcapi"
# --- state-filter shim -------------------------------------------------------
# NGWMN's collections expose DIFFERENT state queryables: ``sites`` filters on
# the full ``state_name`` (e.g. "Wisconsin"), while ``providers`` filters on the
# two-letter postal ``state`` (uppercase, e.g. "WI"). The state-aware getters
# take a single ``state`` parameter accepting any US-state encoding (full name,
# postal code, or FIPS code); ``_get`` resolves it into the one queryable each
# collection wants via the shared ``codes.states.apply_state``, keyed by
# ``_STATE_QUERYABLE`` below.
#
# This shim exists only to smooth over that upstream asymmetry.
# ``tests/ngwmn_test.py::test_state_queryables_still_diverge_upstream`` fails --
# the signal to remove it -- if the API ever unifies the two queryables.
_STATE_QUERYABLE = {
# service -> ``apply_state`` kwargs (destination queryable + to_state format)
"sites": {"into": "state_name", "to": "name"},
"providers": {"into": "state", "to": "postal"},
}
# The NGWMN OGC API exposes the feature id under the generic ``id`` column
# (there is no service-specific id name as there is for the main collections).
_NGWMN_OUTPUT_ID = "id"
# NGWMN's request shape matches the generic OGC default (no CQL2-only or
# date-only collections), but its result columns need their own coercion and
# sort vocabulary: water-level observations are timestamped by ``sample_time``
# (not the Water Data ``time``) and report depths/levels in feet.
NGWMN_DIALECT = OgcDialect(
time_cols=frozenset({"sample_time"}),
numerical_cols=frozenset(
{
"water_depth_below_land_surface_ft",
"water_level_above_site_datum_ft",
"water_level_above_navd88_ft",
}
),
sort_cols=("sample_time", "monitoring_location_id"),
)
[docs]
def _get(service: str, local_vars: dict[str, Any]) -> tuple[pd.DataFrame, BaseMetadata]:
"""Marshal a getter's arguments and dispatch to the shared OGC engine.
Every NGWMN getter ends with this same call; centralizing it keeps the
NGWMN base URL, output id, and dialect wired up in exactly one place.
"""
queryable = _STATE_QUERYABLE.get(service)
if queryable is not None:
apply_state(local_vars, to=queryable["to"], into=queryable["into"])
args = _get_args(local_vars)
return get_ogc_data(
args,
service,
output_id=_NGWMN_OUTPUT_ID,
base_url=NGWMN_OGC_API_URL,
dialect=NGWMN_DIALECT,
)
[docs]
def get_sites(
monitoring_location_id: str | Iterable[str] | None = None,
agency_code: str | Iterable[str] | None = None,
monitoring_location_number: str | Iterable[str] | None = None,
altitude: str | Iterable[str] | None = None,
national_aquifer_code: str | Iterable[str] | None = None,
national_aquifer_description: str | Iterable[str] | None = None,
country_code: str | Iterable[str] | None = None,
country_name: str | Iterable[str] | None = None,
state: str | Iterable[str] | None = None,
county_name: str | Iterable[str] | None = None,
aquifer_name: str | Iterable[str] | None = None,
site_type: str | Iterable[str] | None = None,
aquifer_type_code: str | Iterable[str] | None = None,
qw_sys_name: str | Iterable[str] | None = None,
qw_sn_flag: str | Iterable[str] | None = None,
qw_baseline_flag: str | Iterable[str] | None = None,
qw_well_chars: str | Iterable[str] | None = None,
qw_well_type: str | Iterable[str] | None = None,
qw_well_purpose: str | Iterable[str] | None = None,
wl_sys_name: str | Iterable[str] | None = None,
wl_sn_flag: str | Iterable[str] | None = None,
wl_baseline_flag: str | Iterable[str] | None = None,
wl_well_chars: str | Iterable[str] | None = None,
wl_well_type: str | Iterable[str] | None = None,
wl_well_purpose: str | Iterable[str] | None = None,
properties: str | Iterable[str] | None = None,
skip_geometry: bool | None = None,
bbox: list[float] | None = None,
limit: int | None = None,
convert_type: bool = True,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""Get NGWMN monitoring-location (site) metadata.
Site records describe each NGWMN monitoring location — its identifier,
responsible agency, location, aquifer, and whether it participates in the
network's water-quality (``qw_*``) and water-level (``wl_*``) sub-networks.
Parameters
----------
monitoring_location_id : str or iterable of str, optional
One or more agency-qualified site identifiers in ``AGENCY-ID`` form
(e.g. ``"USGS-423114090161101"``, ``"MBMG-702934"``).
agency_code : str or iterable of str, optional
Code of the agency that manages the site.
monitoring_location_number : str or iterable of str, optional
Agency-assigned site number.
altitude : str or iterable of str, optional
Land-surface altitude at the site.
national_aquifer_code, national_aquifer_description : str or iterable, optional
National aquifer code / description.
country_code, country_name : str or iterable, optional
Country filters.
state : str or iterable of str, optional
State/territory filter. Accepts a full name (``"Wisconsin"``), a
two-letter postal code (``"WI"``), or a two-digit ANSI/FIPS code
(``"55"``).
county_name : str or iterable of str, optional
County name filter.
aquifer_name, site_type, aquifer_type_code : str or iterable, optional
Aquifer name, site type, and aquifer-type code.
qw_sys_name, qw_sn_flag, qw_baseline_flag : str or iterable, optional
Water-quality sub-network membership flags.
qw_well_chars, qw_well_type, qw_well_purpose : str or iterable, optional
Water-quality well characteristics, type, and purpose.
wl_sys_name, wl_sn_flag, wl_baseline_flag : str or iterable, optional
Water-level sub-network membership flags.
wl_well_chars, wl_well_type, wl_well_purpose : str or iterable, optional
Water-level well characteristics, type, and purpose.
properties : str or iterable of str, optional
Subset of columns to return. ``None`` (default) returns all columns.
skip_geometry : bool, optional
When ``True``, omit the geometry column. ``None`` (default) leaves the
server default (geometry included).
bbox : list of float, optional
Bounding box ``[minx, miny, maxx, maxy]`` (CRS 4326) to spatially
filter sites.
limit : int, optional
Per-page size; pagination still follows ``next`` links to completion.
convert_type : bool, optional
Whether to coerce column dtypes (default ``True``).
Returns
-------
pandas.DataFrame or geopandas.GeoDataFrame
Site metadata, one row per monitoring location.
BaseMetadata
Metadata object with the request URL and query time.
Examples
--------
.. code::
>>> # All NGWMN sites in Wisconsin
>>> # state accepts a full name, postal code ("WI"), or FIPS ("55")
>>> df, md = dataretrieval.ngwmn.get_sites(state="Wisconsin")
>>> # Specific sites, geometry omitted
>>> df, md = dataretrieval.ngwmn.get_sites(
... monitoring_location_id=["USGS-423114090161101", "MBMG-702934"],
... skip_geometry=True,
... )
"""
return _get("sites", locals())
[docs]
def get_water_level(
monitoring_location_id: str | Iterable[str] | None = None,
monitoring_location_obs_number: str | Iterable[str] | None = None,
sample_time: str | Iterable[str] | None = None,
data_provided_by: str | Iterable[str] | None = None,
water_depth_below_land_surface_ft: str | Iterable[str] | None = None,
water_level_above_site_datum_ft: str | Iterable[str] | None = None,
monitoring_location_vertical_datum: str | Iterable[str] | None = None,
water_level_above_navd88_ft: str | Iterable[str] | None = None,
datetime: str | Iterable[str] | None = None,
properties: str | Iterable[str] | None = None,
limit: int | None = None,
convert_type: bool = True,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""Get NGWMN water-level observations.
Parameters
----------
monitoring_location_id : str or iterable of str, optional
One or more agency-qualified site identifiers (``AGENCY-ID`` form).
monitoring_location_obs_number : str or iterable of str, optional
Per-site observation number; use to subset a site's observations.
sample_time : str or iterable of str, optional
Exact sample-time value(s) to match. For a time *range*, use
``datetime`` instead.
data_provided_by : str or iterable of str, optional
Source organization for the observation.
water_depth_below_land_surface_ft : str or iterable, optional
Depth-to-water value filter (feet below land surface).
water_level_above_site_datum_ft : str or iterable, optional
Water-level value filter (feet above the site datum).
water_level_above_navd88_ft : str or iterable, optional
Water-level value filter (feet above NAVD 88).
monitoring_location_vertical_datum : str or iterable of str, optional
Vertical datum of the reported water level.
datetime : str or iterable of str, optional
Temporal filter — a single instant or a two-element ``[start, end]``
range (ISO-8601 dates/datetimes); ``".."`` denotes an open end.
properties : str or iterable of str, optional
Subset of columns to return. ``None`` (default) returns all columns.
limit : int, optional
Per-page size; pagination still follows ``next`` links to completion.
convert_type : bool, optional
Whether to coerce column dtypes (default ``True``).
Returns
-------
pandas.DataFrame
Water-level observations, one row per measurement.
BaseMetadata
Metadata object with the request URL and query time.
Examples
--------
.. code::
>>> site = "USGS-272838082142201"
>>> df, md = dataretrieval.ngwmn.get_water_level(
... monitoring_location_id=site
... )
>>> # Restrict to a date range
>>> df, md = dataretrieval.ngwmn.get_water_level(
... monitoring_location_id=site, datetime=["2022-01-01", "2024-01-01"]
... )
>>> # Multiple sites across agencies
>>> df, md = dataretrieval.ngwmn.get_water_level(
... monitoring_location_id=["USGS-272838082142201", "MBMG-702934"]
... )
"""
return _get("waterLevelObs", locals())
[docs]
def get_lithology(
monitoring_location_id: str | Iterable[str] | None = None,
monitoring_location_obs_number: str | Iterable[str] | None = None,
properties: str | Iterable[str] | None = None,
limit: int | None = None,
convert_type: bool = True,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""Get NGWMN lithology observations.
Lithology records describe the geologic materials logged at a monitoring
location, with depth intervals and controlled lithology concepts.
Parameters
----------
monitoring_location_id : str or iterable of str, optional
One or more agency-qualified site identifiers (``AGENCY-ID`` form).
monitoring_location_obs_number : str or iterable of str, optional
Per-site observation number; use to subset a site's records.
properties : str or iterable of str, optional
Subset of columns to return. ``None`` (default) returns all columns.
limit : int, optional
Per-page size; pagination still follows ``next`` links to completion.
convert_type : bool, optional
Whether to coerce column dtypes (default ``True``).
Returns
-------
pandas.DataFrame
Lithology observations, one row per logged interval.
BaseMetadata
Metadata object with the request URL and query time.
Examples
--------
.. code::
>>> df, md = dataretrieval.ngwmn.get_lithology(
... monitoring_location_id="AKDNR-535134236016630"
... )
"""
return _get("lithologyObs", locals())
[docs]
def get_well_construction(
monitoring_location_id: str | Iterable[str] | None = None,
monitoring_location_obs_number: str | Iterable[str] | None = None,
material: str | Iterable[str] | None = None,
properties: str | Iterable[str] | None = None,
limit: int | None = None,
convert_type: bool = True,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""Get NGWMN well-construction observations.
Construction records describe a well's physical build-out — casing,
screens, and similar elements — with depth intervals, materials, and
diameters.
Parameters
----------
monitoring_location_id : str or iterable of str, optional
One or more agency-qualified site identifiers (``AGENCY-ID`` form).
monitoring_location_obs_number : str or iterable of str, optional
Per-site observation number; use to subset a site's records.
material : str or iterable of str, optional
Construction-material filter.
properties : str or iterable of str, optional
Subset of columns to return. ``None`` (default) returns all columns.
limit : int, optional
Per-page size; pagination still follows ``next`` links to completion.
convert_type : bool, optional
Whether to coerce column dtypes (default ``True``).
Returns
-------
pandas.DataFrame
Well-construction observations, one row per construction element.
BaseMetadata
Metadata object with the request URL and query time.
Examples
--------
.. code::
>>> df, md = dataretrieval.ngwmn.get_well_construction(
... monitoring_location_id="USGS-272838082142201"
... )
"""
return _get("constructionObs", locals())
[docs]
def get_providers(
state: str | Iterable[str] | None = None,
agency_code: str | Iterable[str] | None = None,
organization_type: str | Iterable[str] | None = None,
properties: str | Iterable[str] | None = None,
limit: int | None = None,
convert_type: bool = True,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""Get NGWMN data-provider records.
Providers are the organizations that contribute data to the network.
Parameters
----------
state : str or iterable of str, optional
State/territory filter. Accepts a full name (``"Wisconsin"``), a
two-letter postal code (``"WI"``), or a two-digit ANSI/FIPS code
(``"55"``). Only one state at a time — a multi-value state filter
returns no records for this collection.
agency_code : str or iterable of str, optional
Provider agency code.
organization_type : str or iterable of str, optional
Provider organization type, e.g. ``"NWIS"``.
properties : str or iterable of str, optional
Subset of columns to return. ``None`` (default) returns all columns.
limit : int, optional
Per-page size; pagination still follows ``next`` links to completion.
convert_type : bool, optional
Whether to coerce column dtypes (default ``True``).
Returns
-------
pandas.DataFrame
Provider records, one row per provider.
BaseMetadata
Metadata object with the request URL and query time.
Examples
--------
.. code::
>>> df, md = dataretrieval.ngwmn.get_providers(state="WI")
>>> # a full name (or FIPS code) works too
>>> df, md = dataretrieval.ngwmn.get_providers(
... organization_type="NWIS", state="Wisconsin"
... )
"""
return _get("providers", locals())