import xml.etree.ElementTree as ET
import sys
import warnings
import numpy as np
import os
from pathlib import Path
import pandas as pd
import geopandas as gpd
import shutil
import requests
import json
import yaml
from functools import partial
import fiona
from fiona.crs import to_string, from_string
from shapely.geometry import (
Polygon,
LineString,
Point,
shape,
mapping,
MultiPolygon)
from shapely.ops import unary_union, transform
import pyproj
import math
import gisutils
import lsmaker
try:
import matplotlib.pyplot as plt
except:
pass
from lsmaker.diagnostics import Diagnostics
# ## Functions #############################
[docs]def add_projection(line, point):
"""Add vertex to line at point,
if the closest point on the line isn't an end.
Parameters
----------
line : LineString
point : Point
Returns
-------
newline : LineString
Line with point added, or original line, if point coincides with end.
"""
l = line
mp = point
distance = l.project(mp)
if distance <= 0.0 or distance >= line.length:
return line
coords = list(l.coords)
for i, p in enumerate(l.coords):
pd = l.project(Point(p))
if pd == distance:
return line
elif pd > distance:
return LineString(coords[:i] + [(mp.x, mp.y)] + coords[i:])
[docs]def add_vertices_at_testpoints(lssdf, tpgeoms, tol=200):
"""Add vertices to LinesinkData at locations of testpoints
(so that modeled flow observations are correct)
Parameters
----------
lssdf : DataFrame
DataFrame of linesink strings. Must contain 'geometry' column
of shapely LineStrings defining geometries of linesink strings,
in same coordinate system as testpoints.
tpgeoms : list
List of testpoint geometries.
tol : float
Tolerance, in coordinate system units, for considering lines
near the testpoints.
Returns
-------
geoms : list of geometries
New geometry column with added vertices.
"""
df = lssdf.copy()
for mp in tpgeoms:
# find all lines within tolerance
nearby = np.array([l.intersects(mp.buffer(tol)) for l in df.geometry])
ldf = df[nearby].copy()
# choose closest if two or more nearby lines
if len(ldf) > 1:
ldf['dist'] = [mp.distance(ll.interpolate(ll.project(mp)))
for ll in ldf.geometry.values]
ldf.sort_values('dist', inplace=True)
# if at least one line is nearby
if len(ldf) > 0:
ind = ldf.index[0]
l = ldf.geometry.values[0]
newline = add_projection(l, mp)
df.loc[ind, 'geometry'] = newline
#df.set_value(ind, 'geometry', newline)
return df.geometry.tolist()
[docs]def get_elevations_from_epqs(points, units='feet'):
"""From list of shapely points in lat, lon, returns list of elevation values
"""
if len(points) > 0:
print('querying National Map Elevation Point Query Service...')
elevations = [get_elevation_from_epqs(p.x, p.y, units=units) for p in points]
else:
elevations = []
return elevations
[docs]def get_elevation_from_epqs(lon, lat, units='feet'):
"""Returns an elevation value at a point location.
Notes
-----
example url for -91, 45:
http://nationalmap.gov/epqs/pqs.php?x=-91&y=45&units=Feet&output=json
Examples
--------
>>> get_elevation_from_epqs(-91, 45)
1139.778277
"""
url = f'https://epqs.nationalmap.gov/v1/json?x={lon}&y={lat}&wkid=4326&units={units}&includeDate=false'
try:
response = requests.get(url)
elev = json.loads(response.text)['value']
print(lat, lon, elev, units)
except:
e = sys.exc_info()
print(e)
print('Problem accessing Elevation Point Query Service. '
'Need an internet connection to get seepage lake elevations.'
"\nIf your internet is working, running the script again may work; sometime the EPQS can be temperamental")
elev = 0.0
try:
elev = float(elev)
except:
print(('Warning, invalid elevation of {} returned for {}, {}.\nSetting elevation to 0.'.format(elev, lon, lat)))
elev = 0.0
return elev
def _get_random_point_in_polygon(poly):
"""Generates a point within a polygon (for lakes where the centroid is not in the lake)"""
minx, miny, maxx, maxy = poly.bounds
while True:
p = Point(np.random.uniform(minx, maxx), np.random.uniform(miny, maxy))
if poly.contains(p):
return p
#def reproject(geoms, pr1, pr2):
# """Reprojects a list of geometries from coordinate system pr1 to pr2
# (given as proj strings)."""
# print('reprojecting from {} to {}...'.format(pr1, pr2))
# if not isinstance(geoms, list):
# geoms = [geoms]
# pr1 = pyproj.Proj(pr1, errcheck=True, preserve_units=True)
# pr2 = pyproj.Proj(pr2, errcheck=True, preserve_units=True)
# project = partial(pyproj.transform, pr1, pr2)
# return [transform(project, g) for g in geoms]
[docs]def w_parameter(B, lmbda):
"""Compute w parameter for estimating an effective conductance term
(i.e., when simulating Lakes using Linesinks instead of GFLOW's lake package)
If only larger lakes are simulated (e.g., > 1 km2), w parameter will be = lambda
see Haitjema 2005, "Dealing with Resistance to Flow into Surface Waters"
"""
if lmbda <= 0.1 * B:
w = lmbda
elif 0.1 * B < lmbda < 2 * B:
w = lmbda * np.tanh(B / (2 * lmbda))
else:
w = B / 2
return w
[docs]def width_from_arboate(arbolate, lmbda):
"""Estimate stream width in feet from arbolate sum in meters, using relationship
described by Feinstein et al (2010), Appendix 2, p 266.
"""
estwidth = 0.1193 * math.pow(1000 * arbolate, 0.5032)
w = 2 * w_parameter(estwidth, lmbda) # assumes stream is rep. by single linesink
return w
[docs]def lake_width(area, total_line_length, lmbda):
"""Estimate conductance width from lake area and length of flowlines running through it
"""
if total_line_length > 0:
estwidth = 1000 * (area / total_line_length) / 0.3048 # (km2/km)*(ft/km)
else:
estwidth = np.sqrt(area / np.pi) * 1000 / 0.3048 # (km)*(ft/km)
# see Haitjema 2005, "Dealing with Resistance to Flow into Surface Waters"
# basically if only larger lakes are simulated (e.g., > 1 km2), w parameter will be = lambda
# this assumes that GFLOW's lake package will not be used
w = w_parameter(estwidth, lmbda)
return w # feet
[docs]def name(x):
"""Abbreviations for naming LinesinkData from names in NHDPlus
GFLOW requires linesink names to be 32 characters or less
"""
if x.GNIS_NAME:
# reduce name down with abbreviations
abb = {'Branch': 'Br',
'Creek': 'Crk',
'East': 'E',
'Flowage': 'Fl',
'Lake': 'L',
'North': 'N',
'Pond': 'P',
'Reservoir': 'Res',
'River': 'R',
'South': 'S',
'West': 'W',
"'": ''}
name = '{} {}'.format(x.name, x.GNIS_NAME)
for k, v in abb.items():
name = name.replace(k, v)
else:
name = '{} unnamed'.format(x.name)
return name[:32]
[docs]def uniquelist(seq):
seen = set()
seen_add = seen.add
return [x for x in seq if not (x in seen or seen_add(x))]
[docs]def closest_vertex_ind(point, shape_coords):
"""Returns index of closest vertices in shapely geometry object
Ugly but works
"""
crds = shape_coords
X = np.array([i[0] for i in crds])
Y = np.array([i[1] for i in crds])
dX, dY = X - point[0], Y - point[1]
closest_ind = np.argmin(np.sqrt(dX ** 2 + dY ** 2))
return closest_ind
[docs]def move_point_along_line(x1, x2, dist):
diff = (x2[0] - x1[0], x2[1] - x1[1])
return tuple(x2 - dist * np.sign(diff))
[docs]class LinesinkData:
maxlines = 4000
int_dtype = str # np.int64
dtypes = {'Label': str,
'HeadSpecified': float,
'StartingHead': float,
'EndingHead': float,
'Resistance': float,
'Width': float,
'Depth': float,
'Routing': np.int64,
'EndStream': np.int64,
'OverlandFlow': np.int64,
'EndInflow': np.int64,
'ScenResistance': str,
'Drain': np.int64,
'ScenFluxName': str,
'Gallery': np.int64,
'TotalDischarge': np.int64,
'InletStream': np.int64,
'OutletStream': np.int64,
'OutletTable': str,
'Lake': np.int64,
'Precipitation': float,
'Evapotranspiration': float,
'Farfield': bool,
'chkScenario': bool,
'AutoSWIZC': np.int64,
'DefaultResistance': float}
fcodes = {'Perennial': 46006,
'Intermittent': 46003,
'Uncategorized': 46000}
def __init__(self, infile=None, GFLOW_lss_xml=None):
# attributes
self._lsmaker_config_file_path = None # absolute path to config file
self.preproc = None
self.resistance = None
self.H = None # aquifer thickness in model units
self.k = None # hydraulic conductivity of the aquifer in model units
self.lmbda = None
self.ScenResistance = None
self.chkScenario = None
self.global_streambed_thickness = None # streambed thickness
self.ComputationalUnits = None # 'feet' or 'meters'; for XML output file
self.BasemapUnits = None
# elevation units multiplier (from NHDPlus cm to model units)
self.zmult = None
# model domain
self.farfield = None
self.routed_area = None
self.nearfield = None
self.prj = None
self.crs = None
#self.crs_str = None # self.crs, in proj string format
self.pyproj_crs = None # pyproj.CRS instance based on prj input
self.farfield_buffer = None
self.clip_farfield = None
self.split_by_HUC = None
self.HUC_shp = None
self.HUC_name_field = None
# simplification
self.refinement_areas = [] # list of n areas within routed area with additional refinement
self.nearfield_tolerance = None
self.routed_area_tolerance = None
self.farfield_tolerance = None
self.min_nearfield_order = None
self.min_routed_area_order = None
self.min_farfield_order = None
self.min_nearfield_wb_size = None
self.min_waterbody_size = None
self.min_farfield_wb_size = None
self.farfield_length_threshold = None
self.routed_area_length_threshold = None
self.drop_intermittent = None
self.drop_crossing = None
self.asum_thresh_ra = None
self.asum_thresh_nf = None
self.asum_thresh_ff = None
# NHD files
self.flowlines = None
self.elevslope = None
self.PlusFlowVAA = None
self.waterbodies = None
# columns to retain in NHD files (when joining to GIS lines)
# Note: may need to add method to handle case discrepancies
self.flowlines_cols = ['COMID', 'FCODE', 'FDATE', 'FLOWDIR', 'FTYPE', 'GNIS_ID', 'GNIS_NAME', 'LENGTHKM',
'REACHCODE', 'RESOLUTION', 'WBAREACOMI', 'geometry']
self.flowlines_cols_dtypes = {'COMID': self.int_dtype,
'FCODE': self.int_dtype,
'FDATE': str,
'FLOWDIR': str,
'FTYPE': str,
'GNIS_ID': self.int_dtype,
'GNIS_NAME': str,
'LENGTHKM': float,
'REACHCODE': str,
'RESOLUTION': str,
'WBAREACOMI': self.int_dtype,
'geometry': object}
self.elevslope_cols = ['MINELEVSMO', 'MAXELEVSMO']
self.elevslope_dtypes = {'MINELEVSMO': float,
'MAXELEVSMO': float}
self.pfvaa_cols = ['ArbolateSu', 'Hydroseq', 'DnHydroseq', 'StreamOrde']
self.pfvaa_cols_dtypes = {'ArbolateSu': float,
'Hydroseq': self.int_dtype,
'DnHydroseq': self.int_dtype,
'StreamOrde': np.int64}
self.wb_cols = ['AREASQKM', 'COMID', 'ELEVATION', 'FCODE', 'FDATE', 'FTYPE', 'GNIS_ID', 'GNIS_NAME',
'REACHCODE', 'RESOLUTION', 'geometry']
self.wb_cols_dtypes = {'AREASQKM': float,
'COMID': self.int_dtype,
'ELEVATION': float,
'FCODE': self.int_dtype,
'FDATE': str,
'FTYPE': str,
'GNIS_ID': self.int_dtype,
'GNIS_NAME': str,
'REACHCODE': str,
'RESOLUTION': str,
'geometry': object}
# could do away with above and have one dtypes list
self.dtypes.update(self.flowlines_cols_dtypes)
self.dtypes.update(self.elevslope_dtypes)
self.dtypes.update(self.pfvaa_cols_dtypes)
self.dtypes.update(self.wb_cols_dtypes)
# preprocessed files
self.DEM = None
self.elevs_field = None
self.DEM_zmult = None
self.flowlines_clipped = None
self.waterbodies_clipped = None
self.routed_mp = None
self.farfield_mp = None
self.preprocessed_lines = None
self.preprocdir = None
self.wb_centroids_w_elevations = None # elevations extracted during preprocessing routine
self.elevs_field = None # field in wb_centroids_w_elevations containing elevations
# outputs
self.outfile_basename = None
self.error_reporting = 'error_reporting.txt'
# attributes
self.from_lss_xml = False
self.df = pd.DataFrame() # working dataframe for translating NHDPlus data to linesink strings
self.lss = pd.DataFrame() # dataframe of GFLOW linesink strings (with attributes)
self.outsegs = pd.DataFrame()
self.confluences = pd.DataFrame()
# read in the configuration file
if isinstance(infile, str) or isinstance(infile, Path):
infile = Path(infile)
if infile.name.endswith('.xml'):
self.read_lsmaker_xml(infile)
else:
for extension in 'yml', 'yaml':
if infile.name.endswith(extension):
self.set_configuration_from_yaml(infile)
elif isinstance(infile, dict):
self.set_configuration(infile)
# or create instance from a GFLOW LSS XML file
elif GFLOW_lss_xml is not None:
self.from_lss_xml = True
self.df = self.read_lss(GFLOW_lss_xml)
# nearfield can't be coarser than the routed area
if infile is not None:
self.min_nearfield_order = min((self.min_nearfield_order,
self.min_routed_area_order))
# create a pyproj CRS instance
# set the CRS (basemap) length units
self.set_crs(prjfile=self.prj)
# logging/diagnostics
# todo: more robust/detail logging
if Path(self.error_reporting).exists():
try:
Path(self.error_reporting).unlink()
except:
j=2
with open(self.error_reporting, 'w') as efp:
efp.write('Linesink-maker version {}\n'.format(lsmaker.__version__))
self.dg = Diagnostics(lsm_object=self, logfile=self.error_reporting)
def __eq__(self, other):
"""Test for equality to another linesink object."""
if not isinstance(other, self.__class__):
return False
exclude_attrs = ['_lsmaker_config_file_path',
'crs',
'crs_str',
'inpars',
'cfg',
'efp',
]
# LinesinkData instances from lss xml won't have some attributes
# or some df columns that came from NHDPlus
compare_df_columns = slice(None)
if self.from_lss_xml or other.from_lss_xml:
# todo: expose default values that are being set on write of lss_xml
# (many of the columns in LinesinkData instance from lss xml aren't in
# LinesinkData instance that was created from scratch, because the variables
# are only being set in LinesinkData.write_lss method)
compare_df_columns = set(self.df.columns).intersection(other.df.columns)
# the geometries and coordinates won't be exactly the same
# explicitly compare the coordinates separately
compare_df_columns = compare_df_columns.difference({'geometry',
'ls_coords',
'width'
})
for k, v in self.__dict__.items():
# items to skip
# todo: implement pyproj.CRS class to robustly compare CRSs
if k in exclude_attrs:
continue
elif self.from_lss_xml or other.from_lss_xml:
if k not in ('df', 'ComputationalUnits', 'BasemapUnits'):
continue
elif k not in other.__dict__:
return False
elif type(v) == bool:
if not v == other.__dict__[k]:
return False
elif k == 'df':
if len(v) == 0 and len(other.__dict__[k]) == 0:
continue
try:
df1 = v[compare_df_columns]
df2 = other.__dict__[k][compare_df_columns]
#
pd.testing.assert_frame_equal(df1, df2)
# compare the coordinates
for dim in 0, 1: # (x, y)
x1 = [crd[dim] for line in v.ls_coords for crd in line]
x2 = [crd[dim] for line in other.__dict__[k].ls_coords for crd in line]
assert np.allclose(x1, x2)
assert np.allclose(v.width.values, other.__dict__[k].width.values, rtol=0.01)
except:
return False
elif type(v) == pd.DataFrame:
try:
pd.testing.assert_frame_equal(v, other.__dict__[k])
except:
return False
elif v != other.__dict__[k]:
try:
if not np.allclose(v, other.__dict__[k]):
return False
except:
continue
#return False
#elif type(v) in [str, int, float, dict, list]:
# if v != other.__dict__[k]:
# pass
# continue
[docs] def read_lsmaker_xml(self, infile):
#try:
inpardat = ET.parse(infile)
#except:
# raise InputFileMissing
# record the config file absolute path
self._lsmaker_config_file_path = os.path.split(os.path.abspath(infile))[0]
inpars = inpardat.getroot()
self.inpars = inpars
# setup the working directory (default to current directory)
try:
self.path = inpars.findall('.//working_dir')[0].text
if not os.path.exists(self.path):
os.makedirs(self.path)
except:
self.path = self._lsmaker_config_file_path
# global settings
self.preproc = self.tf2flag(self._get_xml_entry('preproc', 'True'))
self.resistance = self._get_xml_entry('resistance', 0.3, float) # (days); c in documentation
self.H = self._get_xml_entry('H', 100, float) # aquifer thickness in model units
self.k = self._get_xml_entry('k', 10, float) # hydraulic conductivity of the aquifer in model units
self.lmbda = np.sqrt(self.k * self.H * self.resistance)
self.ScenResistance = self._get_xml_entry('ScenResistance', 'linesink')
self.chkScenario = self.tf2flag(self._get_xml_entry('chkScenario', 'True'))
self.global_streambed_thickness = self._get_xml_entry('global_streambed_thickness',
3, float) # streambed thickness
self.ComputationalUnits = self._get_xml_entry('ComputationalUnits', 'feet').lower() # 'feet' or 'meters'; for XML output file
self.BasemapUnits = self._get_xml_entry('BasemapUnits', None)
if self.BasemapUnits is not None:
self.BasemapUnits = self.BasemapUnits.lower()
# elevation units multiplier (from NHDPlus cm to model units)
self.zmult = 0.03280839895013123 if self.ComputationalUnits.lower() == 'feet' else 0.01
# model domain
self.farfield = self._get_xml_entry('farfield', None)
self.routed_area = self._get_xml_entry('routed_area', None)
self.nearfield = self._get_xml_entry('nearfield', None)
try: # get the projection file and crs from either the nearfield or routed area
self.prj = self._get_xml_entry('prj', self.nearfield[:-4] + '.prj')
self.crs = fiona.open(self.nearfield).crs
except:
self.prj = self._get_xml_entry('prj', self.routed_area[:-4] + '.prj')
self.crs = fiona.open(self.routed_area).crs
self.nearfield = self.routed_area
#self.crs_str = str(self.crs) # self.crs, in proj string format
self.farfield_buffer = self._get_xml_entry('farfield_buffer', 10000, int)
self.clip_farfield = self.tf2flag(self._get_xml_entry('clip_farfield', 'False'))
self.split_by_HUC = self.tf2flag(self._get_xml_entry('split_by_HUC', 'False'))
self.HUC_shp = self._get_xml_entry('HUC_shp', None)
self.HUC_name_field = self.tf2flag(self._get_xml_entry('HUC_name_field', 'False'))
# simplification
self.refinement_areas = [] # list of n areas within routed area with additional refinement
self.nearfield_tolerance = self._get_xml_entry('nearfield_tolerance', 100, int)
self.routed_area_tolerance = self._get_xml_entry('routed_area_tolerance', 100, int)
self.farfield_tolerance = self._get_xml_entry('farfield_tolerance', 300, int)
self.min_farfield_order = self._get_xml_entry('min_farfield_order', 2, int)
self.min_nearfield_order = self._get_xml_entry('min_nearfield_order', 1, int)
self.min_routed_area_order = self._get_xml_entry('min_routed_area_order', 1, int)
self.min_nearfield_wb_size = self._get_xml_entry('min_nearfield_waterbody_size', 1.0, float)
self.min_waterbody_size = self._get_xml_entry('min_waterbody_size', 1.0, float)
self.min_farfield_wb_size = self._get_xml_entry('min_farfield_waterbody_size',
self.min_waterbody_size, float)
self.farfield_length_threshold = self._get_xml_entry('farfield_length_threshold', 0., float)
self.routed_area_length_threshold = self._get_xml_entry('routed_area_length_threshold', 0., float)
self.drop_intermittent = self.tf2flag(self._get_xml_entry('drop_intermittent', 'False'))
self.drop_crossing = self.tf2flag(self._get_xml_entry('drop_crossing', 'False'))
self.asum_thresh_ra = self._get_xml_entry('routed_area_arbolate_sum_threshold', 0., float)
self.asum_thresh_nf = self._get_xml_entry('nearfield_arbolate_sum_threshold', 0., float)
self.asum_thresh_ff = self._get_xml_entry('farfield_arbolate_sum_threshold', 0., float)
# NHD files
self.flowlines = self._get_xml_entry('flowlines', [], str, raise_error=True)
self.elevslope = self._get_xml_entry('elevslope', [], str, raise_error=True)
self.PlusFlowVAA = self._get_xml_entry('PlusFlowVAA', [], str, raise_error=True)
self.waterbodies = self._get_xml_entry('waterbodies', [], str, raise_error=True)
# columns to retain in NHD files (when joining to GIS lines)
# Note: may need to add method to handle case discrepancies
self.flowlines_cols = ['COMID', 'FCODE', 'FDATE', 'FLOWDIR', 'FTYPE', 'GNIS_ID', 'GNIS_NAME', 'LENGTHKM',
'REACHCODE', 'RESOLUTION', 'WBAREACOMI', 'geometry']
self.flowlines_cols_dtypes = {'COMID': self.int_dtype,
'FCODE': self.int_dtype,
'FDATE': str,
'FLOWDIR': str,
'FTYPE': str,
'GNIS_ID': self.int_dtype,
'GNIS_NAME': str,
'LENGTHKM': float,
'REACHCODE': str,
'RESOLUTION': str,
'WBAREACOMI': self.int_dtype,
'geometry': object}
self.elevslope_cols = ['MINELEVSMO', 'MAXELEVSMO']
self.elevslope_dtypes = {'MINELEVSMO': float,
'MAXELEVSMO': float}
self.pfvaa_cols = ['ArbolateSu', 'Hydroseq', 'DnHydroseq', 'StreamOrde']
self.pfvaa_cols_dtypes = {'ArbolateSu': float,
'Hydroseq': self.int_dtype,
'DnHydroseq': self.int_dtype,
'StreamOrde': np.int64}
self.wb_cols = ['AREASQKM', 'COMID', 'ELEVATION', 'FCODE', 'FDATE', 'FTYPE', 'GNIS_ID', 'GNIS_NAME',
'REACHCODE', 'RESOLUTION', 'geometry']
self.wb_cols_dtypes = {'AREASQKM': float,
'COMID': self.int_dtype,
'ELEVATION': float,
'FCODE': self.int_dtype,
'FDATE': str,
'FTYPE': str,
'GNIS_ID': self.int_dtype,
'GNIS_NAME': str,
'REACHCODE': str,
'RESOLUTION': str,
'geometry': object}
# could do away with above and have one dtypes list
self.dtypes.update(self.flowlines_cols_dtypes)
self.dtypes.update(self.elevslope_dtypes)
self.dtypes.update(self.pfvaa_cols_dtypes)
self.dtypes.update(self.wb_cols_dtypes)
# preprocessed files
self.DEM = self._get_xml_entry('DEM', None)
self.elevs_field = self._get_xml_entry('elevs_field', 'elev')
self.DEM_zmult = self._get_xml_entry('DEM_zmult', 1.0, float)
self.flowlines_clipped = self._get_xml_entry('flowlines_clipped',
'preprocessed/flowlines_clipped.shp',
relative_filepath=True)
self.waterbodies_clipped = self._get_xml_entry('waterbodies_clipped',
'preprocessed/waterbodies_clipped.shp',
relative_filepath=True)
self.routed_mp = self._get_xml_entry('routed_area_multipolygon',
'preprocessed/ra_cutout.shp',
relative_filepath=True)
self.farfield_mp = self._get_xml_entry('farfield_multipolygon',
'preprocessed/ff_cutout.shp',
relative_filepath=True)
self.preprocessed_lines = self._get_xml_entry('preprocessed_lines',
'preprocessed/lines.shp',
relative_filepath=True)
self.preprocdir = os.path.split(self.flowlines_clipped)[0]
self.wb_centroids_w_elevations = self.waterbodies_clipped[
:-4] + '_points.shp' # elevations extracted during preprocessing routine
self.elevs_field = 'DEM' # field in wb_centroids_w_elevations containing elevations
# outputs
self.outfile_basename = os.path.join(self._lsmaker_config_file_path,
inpars.findall('.//outfile_basename')[0].text)
self.error_reporting = os.path.join(self._lsmaker_config_file_path,
inpars.findall('.//error_reporting')[0].text)
# attributes
self.df = pd.DataFrame() # working dataframe for translating NHDPlus data to linesink strings
self.lss = pd.DataFrame() # dataframe of GFLOW linesink strings (with attributes)
self.outsegs = pd.DataFrame()
self.confluences = pd.DataFrame()
[docs] @classmethod
def load_configuration(self, infile):
"""Load Linesink-maker YAML configuration into a dictionary.
Parameters
----------
infile : str or pathlike
Linesink-maker configuration file in YAML format.
Returns
-------
cfg : dict
Configuration dictionary
"""
# read in the user-specified configuration
with open(infile) as src:
cfg = yaml.load(src, Loader=yaml.Loader)
cfg['filename'] = os.path.split(os.path.abspath(infile))[0]
return cfg
[docs] def set_configuration_from_yaml(self, infile):
"""Set the Linesink-maker configuration from a YAML file.
Parameters
----------
infile : str or pathlike
Linesink-maker configuration file in YAML format.
"""
cfg = self.load_configuration(infile)
self.set_configuration(cfg)
[docs] def set_configuration(self, cfg):
"""Set the Linesink-maker configuration from a configuration dictionary.
Parameters
----------
cfg : dict
Configuration dictionary, as produced by the
LinesinkData.set_configuration_from_yaml method.
"""
# read in the default configuration
defaults_file = os.path.join(os.path.split(__file__)[0],
'default_settings.yml')
with open(defaults_file) as src:
defaults = yaml.load(src, Loader=yaml.Loader)
self._lsmaker_config_file_path = Path(cfg['filename'])
# configuration dictionary
self.cfg = cfg
# configuration file blocks
filepaths_to_make_abs = {'outfile_basename'}
for blockname, block in defaults.items():
# entries within blocks
for key, default in defaults[blockname].items():
# try to detect files and make their paths absolute
entry = self.cfg.get(blockname, {}).get(key, default)
if isinstance(entry, str):
file_abspath = self._lsmaker_config_file_path / entry
if file_abspath.exists() or key in filepaths_to_make_abs:
entry = str(file_abspath)
elif isinstance(entry, list):
new_entry = []
for item in entry:
if isinstance(item, str):
file_abspath = self._lsmaker_config_file_path / item
if file_abspath.exists() or item in filepaths_to_make_abs:
item = str(file_abspath)
new_entry.append(item)
entry = new_entry
self.__dict__[key] = entry
# setup the working directory (default to current directory)
self.path = os.path.abspath(self.__dict__.pop('working_dir'))
self.lmbda = np.sqrt(self.k * self.H * self.resistance)
self.zmult = 0.03280839895013123 if self.ComputationalUnits.lower() == 'feet' else 0.01
# get the projection file and crs from either the nearfield(s) or routed area
if self.prj is None:
for filename in self.nearfield, self.routed_area:
# extra logic to handle multiple nearfields
fnames = []
if filename is not None:
if isinstance(filename, dict):
for fname, tol in filename.items():
fnames.append(self._lsmaker_config_file_path / fname)
elif not isinstance(filename, str):
for fname in filename:
fnames.append(fname)
else:
fnames.append(filename)
for fname in fnames:
fname = Path(fname)
prjfile = fname.with_suffix('.pfj')
if prjfile.exists():
self.prj = prjfile
self.crs = gisutils.get_shapefile_crs(prjfile)
else:
#self.crs_str = gisutils.get_proj_str(self.prj)
self.crs = gisutils.get_shapefile_crs(self.prj)
if self.crs is None or self.prj is None:
msg = ("Invalid projection file or projection file not found: {}. \
Specify a valid ESRI projection file under the \
prj: configuration file key".format(self.prj))
raise ValueError(msg)
# NHDPlus files
for variable in 'flowlines', 'elevslope', 'PlusFlowVAA', 'waterbodies':
filename = self.__dict__[variable]
if filename is None:
raise KeyError('Nothing specified for {} in the configuration file!'.format(variable))
elif not os.path.exists(filename):
raise ValueError('file not found: {}'.format(filename))
# preprocessed files
self.preprocdir = os.path.split(self.flowlines_clipped)[0]
self.wb_centroids_w_elevations = self.waterbodies_clipped[
:-4] + '_points.shp' # elevations extracted during preprocessing routine
self.elevs_field = 'DEM' # field in wb_centroids_w_elevations containing elevations
def _parse_xml_text(self, findall_result, dtype, relative_filepath=False):
# handle either strings or lxml Elements
txt = getattr(findall_result, 'text', findall_result)
if dtype == str:
# check if a file exists relative to config file path
# if so, change the file path to be absolute
file_abspath = os.path.join(self._lsmaker_config_file_path, txt)
if os.path.exists(file_abspath) or relative_filepath:
txt = file_abspath
return dtype(txt) if txt is not None else None
def _get_xml_entry(self, XMLentry, default, dtype=str,
relative_filepath=False,
raise_error=False):
try:
tmp = self.inpars.findall('.//{}'.format(XMLentry))
if len(tmp) == 0:
if not raise_error and not relative_filepath:
return default
elif not raise_error and relative_filepath:
tmp = [default] if not isinstance(default, list) else default
else:
raise ValueError('Nothing specified for {} in input XML file!'.format(XMLentry))
if len(tmp) == 1: # and not isinstance(default, list):
return self._parse_xml_text(tmp[0], dtype, relative_filepath=relative_filepath)
elif len(tmp) >= 1:
return [self._parse_xml_text(s, dtype, relative_filepath=relative_filepath)
for s in tmp]
#return [dtype(s.text) for s in tmp]
else:
raise Exception()
except:
if not raise_error:
return default
else:
raise ValueError('Nothing specified for {} in input XML file!'.format(XMLentry))
def _enforce_dtypes(self, df):
"""Ensure that dataframe column dtypes are correct."""
for c in df.columns:
dtype = self.dtypes.get(c, df[c].dtype)
try:
df[c] = df[c].astype(dtype)
except ValueError:
# handle Nonetype values in some NHDPlus fields
j=2
continue
def _get_wb_elevations(self, wb_df):
# (the centroids of some lakes might not be in the lake itself)
wb_points = [wb.centroid if wb.centroid.within(wb)
else _get_random_point_in_polygon(wb)
for wb in wb_df.geometry.tolist()]
# reproject the representative points into lat lon
wb_points_ll = gisutils.project(wb_points, self.crs, "epsg:4269")
wb_elevations = get_elevations_from_epqs(wb_points_ll, units=self.ComputationalUnits)
# add in elevations from waterbodies in stream network
return pd.DataFrame({'COMID': wb_df.COMID.values,
self.elevs_field: wb_elevations,
'geometry': wb_points})
[docs] def load_poly(self, shapefile, dest_crs=None):
"""Load a polygon shapefile and return a multipolygon
of multiple features, or single polygon of one feature,
projected in the destination coordinate system."""
if dest_crs is None:
dest_crs = self.crs
else:
dest_crs = gisutils.get_authority_crs(dest_crs)
print('reading {}...'.format(shapefile))
gdf = gpd.read_file(shapefile)
if gdf.crs != dest_crs:
gdf.to_crs(dest_crs, inplace=True)
if len(gdf) > 1:
return MultiPolygon(gdf.geometry.tolist())
return gdf.geometry.values[0]
[docs] def tf2flag(self, intxt):
# converts text written in XML file to True or False flag
if intxt.lower() == 'true':
return True
elif intxt.lower() == 'none':
return
else:
return False
[docs] def set_crs(self, epsg=None, proj_str=None, prjfile=None):
"""Set the projected coordinate reference system, and the BasemapUnits.
If no arguments are supplied, default to existing BasemapUnits.
Parameters
----------
epsg: int
EPSG code identifying Coordinate Reference System (CRS)
for features in df.geometry
(optional)
proj_str: str
proj_str string identifying CRS for features in df.geometry
(optional)
prjfile: str
File path to projection (.prj) file identifying CRS
for features in df.geometry
(optional)
Returns
-------
sets the LinesinkData.pyproj_crs attribute.
"""
args = any(arg for arg in (epsg, proj_str, prjfile))
pyproj_crs = None
if self.BasemapUnits is None or args:
if epsg is not None:
pyproj_crs = pyproj.CRS.from_epsg(epsg)
elif proj_str is not None:
pyproj_crs = pyproj.CRS.from_string(proj_str)
elif prjfile is not None:
with open(prjfile) as src:
wkt = src.read()
pyproj_crs = pyproj.CRS.from_wkt(wkt)
# if possible, have pyproj try to find the closest
# authority name and code matching the crs
# so that input from epsg codes, proj strings, and prjfiles
# results in equal pyproj_crs instances
if pyproj_crs is not None:
try:
authority = pyproj_crs.to_authority()
if authority is not None:
self.pyproj_crs = pyproj.CRS.from_user_input(pyproj_crs.to_authority())
else:
self.pyproj_crs = pyproj_crs
except:
pass
units = pyproj_crs.axis_info[0].unit_name
translate_units = {'metre': 'meters'}
self.BasemapUnits = translate_units.get(units, units)
[docs] def preprocess(self, save=True):
"""
This method associates attribute information in the NHDPlus PlusFlowVAA and Elevslope tables, and
the model domain configuration (nearfield, farfield, and any other polygon areas) with the NHDPlus
Flowlines and Waterbodies datasets. The following edits are made to the Flowlines and waterbodies:
* remove farfield streams lower than <min_farfield_order>
* remove waterbodies that aren't lakes, and lakes smaller than <min_waterbody_size>
* convert lakes from polygons to lines; merge the lakes with the with flowlines
Parameters:
-----------
save: True/False
Saves the preprocessed dataset to a shapefile specified by <preprocessed_lines> in the XML input file
"""
if self.routed_area is None:
self.routed_area = self.nearfield
if not isinstance(self.nearfield, str) or isinstance(self.nearfield, Path):
raise ValueError("Multiple nearfields option requires specification of an enclosing routed area.")
self.routed_area_tolerance = self.nearfield_tolerance
if self.nearfield is None and self.routed_area is None:
raise InputFileMissing('Need to supply shapefile of routed area or nearfield.')
if self.farfield is None:
print(('\nNo farfield shapefile supplied.\n'
'Creating farfield using buffer of {:.1f} {} around routed area.\n'
.format(self.farfield_buffer, self.BasemapUnits)))
modelareafile = fiona.open(self.routed_area)
nfarea = shape(modelareafile[0]['geometry'])
modelarea_farfield = nfarea.buffer(self.farfield_buffer)
self.farfield = self.routed_area[:-4] + '_ff.shp'
output = fiona.open(self.farfield, 'w',
crs=modelareafile.crs,
schema=modelareafile.schema,
driver=modelareafile.driver)
output.write({'properties': modelareafile[0]['properties'],
'geometry': mapping(modelarea_farfield)})
output.close()
# make the output directory if it doesn't exist yet
if len(self.preprocdir) > 0 and not os.path.isdir(self.preprocdir):
os.makedirs(self.preprocdir)
# make projection file that is independent of any shapefile
if os.path.exists('GFLOW.prj'):
os.remove('GFLOW.prj')
if self.prj is not None and os.path.exists(self.prj):
shutil.copy(self.prj, 'GFLOW.prj')
self.prj = 'GFLOW.prj'
print('clipping and reprojecting input datasets...')
# (zero buffers can clean up self-intersections in hand drawn polygons)
if isinstance(self.nearfield, dict):
gdfs = []
for fname, tol in self.nearfield.items():
gdf = gpd.read_file(self._lsmaker_config_file_path / fname)
gdf = gpd.GeoDataFrame(gdf['geometry']).to_crs(self.crs)
gdf['geometry'] = gdf['geometry'].buffer(0)
gdf['filename'] = fname
gdf['tol'] = tol
if not gdf['geometry'].is_valid.all():
raise ValueError(f"Invalid nearfield polygon(s): {fname}")
gdfs.append(gdf)
self.nf = pd.concat(gdfs)
elif not isinstance(self.nearfield, str) or isinstance(self.nearfield, Path):
gdfs = []
for fname in self.nearfield:
gdf = gpd.read_file(self._lsmaker_config_file_path / fname)
gdf = gpd.GeoDataFrame(gdf['geometry']).to_crs(self.crs)
gdf['geometry'] = gdf['geometry'].buffer(0)
gdf['filename'] = fname
if not gdf['geometry'].is_valid.all():
raise ValueError(f"Invalid nearfield polygon(s): {fname}")
gdfs.append(gdf)
self.nf = pd.concat(gdfs)
if self.nearfield_tolerance is None:
raise ValueError(
"Specification of multiple nearfields in a "
"list requires a separate 'nearfield_tolerance' entry. ")
self.nf['tol'] = self.nearfield_tolerance
else:
self.nf = gpd.read_file(self.nearfield)
self.nf['geometry'] = self.nf['geometry'].buffer(0)
self.nf.to_crs(self.crs, inplace=True)
if not self.nf.geometry.is_valid.all():
raise ValueError(f"Invalid nearfield polygon(s): {self.nearfield}")
if self.nearfield_tolerance is None:
raise ValueError(
"No 'nearfield_tolerance' entry")
self.nf['tol'] = self.nearfield_tolerance
self.ra = self.load_poly(self.routed_area).buffer(0)
self.ff = self.load_poly(self.farfield).buffer(0)
for p in [self.ra, self.ff]:
assert p.is_valid, 'Invalid polygon'
for attr, shapefiles in list({'fl': self.flowlines,
'wb': self.waterbodies}.items()):
# all flowlines and waterbodies must be in same coordinate system
# sfrmaker preproc also uses crs from first file in list
if isinstance(shapefiles, list):
shapefile = shapefiles[0]
else:
shapefile = shapefiles
shp_crs = gisutils.get_shapefile_crs(shapefile)
# get bounding box of model area in nhd crs to speeding reading in data via filter
bounds = gisutils.project([self.ff], self.crs, shp_crs)
bounds = bounds[0].bounds
self.__dict__[attr] = gisutils.shp2df(shapefiles, filter=bounds)
# if NHD features not in model area coodinate system, reproject
if shp_crs != self.crs:
self.__dict__[attr]['geometry'] = gisutils.project(self.__dict__[attr].geometry.tolist(), shp_crs,
self.crs)
# 1816107
# for now, take intersection (don't truncate flowlines at farfield boundary)
print('clipping to {}...'.format(self.farfield))
intersects = np.array([l.intersects(self.ff) for l in self.__dict__[attr].geometry])
self.__dict__[attr] = self.__dict__[attr][intersects].copy()
if self.clip_farfield:
print('truncating waterbodies at farfield boundary...')
# get rid of any islands in the process
wbgeoms = self.wb.geometry
# valid = np.array([p.is_valid for p in wbgeoms])
# if np.any(~valid):
# comids = self.wb.loc[~valid, 'COMID']
# print('The following waterbodies result in invalid polygons when intersected with the domain:')
# for c in comids:
# print(c)
wbgeoms = [Polygon(g.exterior).buffer(0)
for g in self.wb.geometry]
wbgeoms = [p.intersection(self.ff).buffer(0) for p in wbgeoms]
self.wb['geometry'] = wbgeoms
# for multipolygons, retain largest part
geoms = self.wb.geometry.values
for i, g in enumerate(geoms):
if g.type == 'MultiPolygon':
geoms[i] = np.array(g.geoms)[np.argsort([p.area for p in g.geoms])][-1]
self.wb['geometry'] = geoms
# for now, write out preprocessed data to maintain structure with arcpy
for df in ['fl', 'wb']:
if len(self.__dict__[df]) == 0:
raise EmptyDataFrame()
gisutils.df2shp(self.fl, self.flowlines_clipped, crs=self.crs)
gisutils.df2shp(self.wb, self.waterbodies_clipped, crs=self.crs)
print('\nmaking donut polygon of farfield (with routed area removed)...')
ff_donut_poly = self.ff.difference(self.ra)
gdf = gpd.GeoDataFrame({'id': [0], 'geometry': ff_donut_poly}, crs=self.crs)
gdf.to_file(self.farfield_mp, index=False)
print(('wrote {}'.format(self.farfield_mp)))
if self.routed_area is not None and self.routed_area != self.nearfield:
print('\nmaking donut polygon of routed area (with nearfield area removed)...')
if not self.nf.within(self.ra).all():
raise ValueError('Nearfield area must be within routed area!')
# first convert nearfield area(s) to a single MultiPolygon
# (because we only want one 'routed area' Polygon
# with the nearfield hole(s) cut out)
nearfield_mp = MultiPolygon(self.nf.geometry.tolist())
donut = gpd.GeoDataFrame({
'geometry': [self.ra.difference(nearfield_mp)]},
crs=self.crs)
donut.to_file(self.routed_mp, index=False)
routed_area_poly = donut['geometry'].values[0]
print(('wrote {}'.format(self.routed_mp)))
else:
routed_area_poly = self.nf.geometry[0]
# drop waterbodies that aren't lakes bigger than min size
min_size = np.min([self.min_nearfield_wb_size, self.min_waterbody_size, self.min_farfield_wb_size])
self.wb = self.wb.loc[(self.wb.FTYPE == 'LakePond') & (self.wb.AREASQKM > min_size)].copy()
print('\ngetting elevations for waterbodies not in the stream network')
isolated_wb = self.wb
isolated_wb_comids = isolated_wb.COMID.tolist()
if not os.path.exists(self.wb_centroids_w_elevations):
# get elevations for all waterbodies for the time being.
# otherwise waterbodies associated with first-order streams may be dropped from farfield,
# but still waterbodies list, causing a key error in the lakes setup
# these lakes probably should be left in the farfield unless they are explicitly not wanted
'''
# (the centroids of some lakes might not be in the lake itself)
wb_points = [wb.centroid if wb.centroid.within(wb)
else _get_random_point_in_polygon(wb)
for wb in isolated_wb.geometry.tolist()]
# reproject the representative points into lat lon
wb_points_ll = reproject(wb_points, self.crs_str, "+init=epsg:4269")
wb_elevations = get_elevations_from_epqs(wb_points_ll, units=self.ComputationalUnits)
# add in elevations from waterbodies in stream network
wb_points_df = pd.DataFrame({'COMID': isolated_wb_comids,
self.elevs_field: wb_elevations,
'geometry': wb_points})
'''
wb_points_df = self._get_wb_elevations(isolated_wb)
gisutils.df2shp(wb_points_df, self.wb_centroids_w_elevations, crs=self.crs)
else:
print('\nreading elevations from {}...'.format(self.wb_centroids_w_elevations))
wb_points_df = gisutils.shp2df(self.wb_centroids_w_elevations)
toget = set(isolated_wb_comids).difference(set(wb_points_df.COMID.tolist()))
if len(toget) > 0:
isolated_wb = self.wb.loc[self.wb.COMID.isin(toget)].copy()
wb_points_df2 = self._get_wb_elevations(isolated_wb)
wb_points_df = pd.concat([wb_points_df, wb_points_df2])
gisutils.df2shp(wb_points_df, self.wb_centroids_w_elevations, crs=self.crs)
# open error reporting file
with open(self.error_reporting, 'a') as efp:
efp.write('\nPreprocessing...\n')
print('\nAssembling input...')
# read linework shapefile into pandas dataframe
df = gisutils.shp2df(self.flowlines_clipped, index='COMID').drop_duplicates('COMID')
df.index = df.index.astype(float).astype(int).astype(self.int_dtype)
df.drop([c for c in df.columns if c.lower() not in [cc.lower() for cc in self.flowlines_cols]],
axis=1, inplace=True)
# might want to consider enforcing integer index here if strings cause problems
#clipto = df.index.tolist()
elevs = gisutils.shp2df(self.elevslope, index='COMID')
# cast index to str or int (int_dtype), from float, int
# or string representation of float or int
elevs.index = elevs.index.astype(float).astype(int).astype(self.int_dtype)
if not any(elevs.index.isin(df.index)):
raise ValueError(f"No attributes COMIDs in {self.elevslope} match any flowlines!\n"
"Check that datatypes in the two files are consistent, "
"and that the files are for the same area.")
elevs = elevs.loc[elevs.index.isin(df.index)]
pfvaa = gisutils.shp2df(self.PlusFlowVAA, index='COMID') #, clipto=clipto)
pfvaa.index = pfvaa.index.astype(float).astype(int).astype(self.int_dtype)
if not any(pfvaa.index.isin(df.index)):
raise ValueError(f"No attributes COMIDs in {self.PlusFlowVAA} match any flowlines!\n"
"Check that datatypes in the two files are consistent, "
"and that the files are for the same area.")
pfvaa = pfvaa.loc[pfvaa.index.isin(df.index)]
wbs = gisutils.shp2df(self.waterbodies_clipped, index='COMID').drop_duplicates('COMID')
wbs.index = wbs.index.astype(float).astype(int).astype(self.int_dtype)
wbs.drop([c for c in wbs.columns if c.lower() not in [cc.lower() for cc in self.wb_cols]],
axis=1, inplace=True)
self._enforce_dtypes(wbs)
# check for MultiLineStrings / MultiPolygons and drop them (these are features that were fragmented by the boundaries)
mls = [i for i in df.index if 'Multi' in df.loc[i, 'geometry'].type]
df = df.drop(mls, axis=0)
mps = [i for i in wbs.index if 'Multi' in wbs.loc[i, 'geometry'].type]
wbs = wbs.drop(mps, axis=0)
wbs = gpd.GeoDataFrame(wbs, crs=self.crs)
# join NHD tables to lines
df = df.join(elevs[self.elevslope_cols], how='inner', lsuffix='1')
df = df.join(pfvaa[self.pfvaa_cols], how='inner', lsuffix='1')
self._enforce_dtypes(df)
df = gpd.GeoDataFrame(df, crs=self.crs)
# routed_area_poly is the routed area, excluding any hole(s)
# for one of more nearfield areas
print('\nidentifying farfield and nearfield LinesinkData...')
df['farfield'] = [line.intersects(ff_donut_poly) and not line.intersects(routed_area_poly) for line in df.geometry]
wbs['farfield'] = [poly.intersects(ff_donut_poly) and not poly.intersects(routed_area_poly) for poly in wbs.geometry]
df['routed'] = [line.intersects(routed_area_poly) for line in df.geometry]
wbs['routed'] = [poly.intersects(routed_area_poly) for poly in wbs.geometry]
#df['nearfield'] = [line.intersects(routed_area_poly) for line in df.geometry]
df = df.sjoin(self.nf[['tol', 'geometry']], how='left', predicate='intersects')
# non NaN entries in the 'index_right' column indicate intersection
# with those nearfield polygons (denoted by their index numbers)
df['nearfield'] = ~df['index_right'].isna()
#wbs['nearfield'] = [poly.intersects(routed_area_poly) for poly in wbs.geometry]
wbs = wbs.sjoin(self.nf[['tol', 'geometry']], how='left', predicate='intersects')
# non NaN entries in the 'index_right' column indicate intersection
# with those nearfield polygons (denoted by their index numbers)
wbs['nearfield'] = ~wbs['index_right'].isna()
# drop any duplicates caused by lines intersecting more than one nearfield
df = df.loc[~df.index.duplicated(keep='last')].copy()
wbs = wbs.loc[~wbs.index.duplicated(keep='last')].copy()
# assign farfield and routed area tolerances
df.loc[df['farfield'], 'tol'] = self.farfield_tolerance
df.loc[df['routed'], 'tol'] = self.routed_area_tolerance
wbs.loc[wbs['farfield'], 'tol'] = self.farfield_tolerance
wbs.loc[wbs['routed'], 'tol'] = self.routed_area_tolerance
assert not df['tol'].isna().any(),\
"Something went wrong; some lines don't have a simplification tolerance"
assert not wbs['tol'].isna().any(),\
"Something went wrong; some lines don't have a simplification tolerance"
if self.asum_thresh_ra > 0.:
print('\nremoving streams in routed area with arbolate sums < {:.2f}'.format(self.asum_thresh_ra))
# retain all streams in the farfield or in the routed area with arbolate sum > threshold
criteria = ~df.routed.values | df.routed.values & (df.ArbolateSu.values > self.asum_thresh_ra)
df = df.loc[criteria].copy()
if self.asum_thresh_nf > 0.:
print('\nremoving streams in nearfield with arbolate sums < {:.2f}'.format(self.asum_thresh_nf))
criteria = ~df.nearfield.values | df.nearfield.values & (df.ArbolateSu.values > self.asum_thresh_nf)
df = df.loc[criteria].copy()
if self.asum_thresh_ff > 0.:
print('\nremoving streams in farfield with arbolate sums < {:.2f}'.format(self.asum_thresh_ff))
criteria = ~df.farfield.values | df.farfield.values & (df.ArbolateSu.values > self.asum_thresh_ff)
df = df.loc[criteria].copy()
if self.min_nearfield_order > 1.:
print('removing nearfield streams lower than {} order...'.format(self.min_nearfield_order))
# retain all streams in the nearfield of order > min_farfield_order
criteria = ~df.nearfield.values | (df.nearfield.values & (df.StreamOrde.values >= self.min_nearfield_order))
df = df[criteria].copy()
if self.min_routed_area_order > 1.:
print('removing streams in the routed area of lower than {} order...'.format(self.min_routed_area_order))
# retain all streams in the nearfield of order > min_farfield_order
routed = df.routed.values & ~df.nearfield.values
criteria = ~routed | (routed & (df.StreamOrde.values >= self.min_routed_area_order))
df = df[criteria].copy()
if self.min_farfield_order > 1.:
print('removing farfield streams lower than {} order...'.format(self.min_farfield_order))
# retain all streams in the nearfield or in the farfield and of order > min_farfield_order
criteria = ~df.farfield.values | (df.farfield.values & (df.StreamOrde.values >= self.min_farfield_order))
df = df[criteria].copy()
farfield_retain = ~(df.farfield.values & (df.FCODE == self.fcodes['Intermittent']).values) # drop intermittent streams from farfield
if self.drop_intermittent:
print('removing intermittent streams from routed area outside of nearfield...')
# retain all streams in the nearfield or in the farfield and of order > min_farfield_order
farfield_retain = farfield_retain & ~(df.routed.values & ~df.nearfield.values & (df.FCODE == self.fcodes['Intermittent']).values)
df = df[farfield_retain].copy()
print('dropping waterbodies from routed area that are not lakes larger than {}...'.format(self.min_waterbody_size))
nearfield_wbs = wbs.nearfield.values & (wbs.AREASQKM > self.min_nearfield_wb_size) & (wbs.FTYPE == 'LakePond')
routedarea_wbs = wbs.routed.values & (wbs.AREASQKM > self.min_waterbody_size) & (wbs.FTYPE == 'LakePond')
farfield_wbs = wbs.farfield.values & (wbs.AREASQKM > self.min_farfield_wb_size) & (wbs.FTYPE == 'LakePond')
print('dropping waterbodies from nearfield that are not lakes larger than {}...\n'
'dropping waterbodies from farfield that are not lakes larger than {}...'.format(self.min_waterbody_size,
self.min_farfield_wb_size))
wbs = wbs[nearfield_wbs | routedarea_wbs | farfield_wbs]
print('merging waterbodies with coincident boundaries...')
dropped = []
for wb_comid in wbs.index:
# skipped waterbodies that have already been merged
if wb_comid in dropped:
continue
wb_geometry = wbs.geometry[wb_comid]
overlapping = wbs.loc[[wb_geometry.intersects(r) for r in wbs.geometry]]
basering_comid = overlapping.sort_values('FTYPE').index[0] # sort to prioritize features with names
# two or more shapes in overlapping signifies a coincident boundary
if len(overlapping) > 1:
merged = unary_union([r for r in overlapping.geometry])
# multipolygons will result if the two polygons only have a single point in common
if merged.type == 'MultiPolygon':
continue
wbs.loc[basering_comid, 'geometry'] = merged
todrop = [wbc for wbc in overlapping.index if wbc != basering_comid]
dropped += todrop
wbs = wbs.drop(todrop, axis=0) # only keep merged feature; drop others from index
# replace references to dropped waterbody in lines
df.loc[df.WBAREACOMI.isin(todrop), 'WBAREACOMI'] = basering_comid
# swap out polygons in lake geometry column with the linear rings that make up their exteriors
print('converting lake exterior polygons to lines...')
wbs['geometry'] = [LineString(g.exterior) for g in wbs.geometry]
wbs['waterbody'] = np.array([True] * len(wbs), dtype=bool) # boolean variable indicate whether feature is waterbody
print('merging flowline and waterbody datasets...')
df['waterbody'] = np.array([False] * len(df), dtype=bool)
df = pd.concat([df, wbs])
df.COMID = df.index
'''
if self.clip_farfield:
print('clipping farfield features...')
df.loc[df.farfield.values, 'geometry'] = [g.intersection(ffg)
for g in df.geometry[df.farfield.values]]
'''
print('\nDone with preprocessing.')
if save:
gisutils.df2shp(df, self.preprocessed_lines, crs=self.crs)
self.df = df
[docs] def simplify_lines(self, nearfield_tolerance=None, routed_area_tolerance=None, farfield_tolerance=None,
nearfield_refinement={}):
"""Reduces the number of vertices in the GIS linework representing streams and lakes,
to within specified tolerances. The tolerance values represent the maximum distance
in the coordinate system units that the simplified feature can deviate from the original feature.
Parameters
----------
nearfield_tolerance : float
Tolerance for the area representing the model nearfield
farfield_tolerance : float
Tolerance for the area representing the model farfield
Returns
-------
df : DataFrame
A copy of the df attribute with a 'ls_geom' column of simplified geometries, and
a 'ls_coords' column containing lists of coordinate tuples defining each simplified line.
"""
if not hasattr(self, 'df'):
print('No dataframe attribute for LinesinkData instance. Run preprocess first.')
return
if nearfield_tolerance is None:
nearfield_tolerance = self.nearfield_tolerance
routed_area_tolerance = self.routed_area_tolerance
farfield_tolerance = self.farfield_tolerance
#if isinstance(self.df.farfield.iloc[0], str):
# self.df.loc[:, 'farfield'] = [True if f.lower() == 'true' else False for f in self.df.farfield]
print('simplifying NHD linework geometries...')
# simplify line and waterbody geometries
#(see http://toblerity.org/shapely/manual.html)
df = self.df[['farfield', 'routed', 'nearfield', 'tol', 'geometry']].copy()
ls_geom = np.array([LineString()] * len(df), dtype=object)
tols = {'nf': nearfield_tolerance,
'ra': routed_area_tolerance,
'ff': farfield_tolerance}
simplification = {'nf': df.nearfield.values,
'ra': df.routed.values & ~df.nearfield.values & ~df.farfield.values,
'ff': df.farfield.values}
[simplification.pop(k) for k, v in tols.items() if v is None]
for k, within in simplification.items():
# simplify the LinesinkData in the domain; add simplified geometries to global geometry column
# assign geometries to numpy array first and then to df (had trouble assigning with pandas)
ls_geom[within] = [g.simplify(tols[k]) for g in df.loc[within, 'geometry'].tolist()]
df['ls_geom'] = ls_geom
# add columns for additional nearfield refinement areas
for shp in list(nearfield_refinement.keys()):
if shp not in self.refinement_areas:
self.refinement_areas.append(shp)
area_name = os.path.split(shp)[-1][:-4]
with fiona.open(shp) as src:
poly = shape(next(iter(src))['geometry'])
#poly = shape(fiona.open(shp).next()['geometry'])
df[area_name] = [g.intersects(poly) for g in df.geometry]
# add column of lists, containing linesink coordinates
df['ls_coords'] = [list(g.coords) for g in df.ls_geom]
assert np.all(np.array([len(c) for c in df.ls_coords]) > 0) # shouldn't be an empty coordinates
return df
[docs] def prototype(self, nftol=[10, 50, 100, 200, 500], fftol=500):
"""Function to compare multiple simplification distance tolerance values for the model nearfield.
Parameters:
-----------
nftol : list
Contains the tolerance values to be compared.
fftol : numeric
Single tolerance value to be used for the farfield in all comparisons.
Returns:
--------
A new directory called "prototypes" is made
"""
if not os.path.isdir('prototypes'):
os.makedirs('prototypes')
if isinstance(fftol, float) or isinstance(fftol, int):
fftol = [fftol] * len(nftol)
nlines = []
for i, tol in enumerate(nftol):
df = self.simplify_lines(nearfield_tolerance=tol, farfield_tolerance=fftol[i])
# count the number of lines with distance tolerance
nlines.append(np.sum([len(l) for l in df.ls_coords]))
# make a shapefile of the simplified lines with nearfield_tol=tol
df.drop(['ls_coords', 'geometry'], axis=1, inplace=True)
outshp = 'prototypes/' + self.outfile_basename + '_dis_tol_{}.shp'.format(tol)
gisutils.df2shp(df, outshp, geo_column='ls_geom', crs=self.crs)
plt.figure()
plt.plot(nftol, nlines)
plt.xlabel('Distance tolerance')
plt.ylabel('Number of lines')
plt.savefig(self.outfile_basename + 'tol_vs_nlines.pdf')
[docs] def adjust_zero_gradient(self, df, increment=0.01):
dg = self.dg
dg.df = df
comids0 = dg.check4zero_gradient(log=False)
if len(comids0) > 0:
if len(self.confluences) == 0:
self.df = df
self.map_confluences()
if len(self.outsegs) == 0:
self.df = df
self.map_outsegs()
with open(self.error_reporting, 'a') as efp:
efp.write('\nzero-gradient adjustments:\n')
efp.write('comid, old_elevmax, old_elevmin, new_elevmax, new_elevmin, downcomid\n')
print("adjusting elevations for comids with zero-gradient...")
for comid in comids0:
outsegs = [o for o in self.outsegs.loc[comid].values if int(o) > 0]
for i, o in enumerate(outsegs):
if i == len(outsegs) - 1:
oo = 0
else:
oo = outsegs[i + 1]
minElev, maxElev = df.loc[o, 'minElev'], df.loc[o, 'maxElev']
# test if current segment has flat or negative gradient
if minElev >= maxElev:
minElev = maxElev - increment
df.loc[o, 'minElev'] = minElev
efp.write('{}, {:.2f}, {:.2f}, {:.2f}, {}\n'.format(o, maxElev,
minElev + increment,
maxElev,
minElev, oo))
# test if next segment is now higher
if int(oo) > 0 and df.loc[oo, 'maxElev'] > minElev:
efp.write('{}, {:.2f}, {:.2f}, {:.2f}, {}\n'.format(outsegs[i + 1],
df.loc[outsegs[i + 1], 'maxElev'],
df.loc[outsegs[i + 1], 'minElev'],
minElev,
df.loc[outsegs[i + 1], 'minElev'],
oo))
df.loc[oo, 'maxElev'] = minElev
else:
break
# check again for zero-gradient lines
dg.df = df
comids0 = dg.check4zero_gradient(log=False)
if len(comids0) > 0:
for c in comids0:
efp.write('{}\n'.format(c))
print("\nWarning!, the following comids had zero gradients:\n{}".format(comids0))
print("routing for these was turned off. Elevations must be fixed manually.\n" \
"See {}".format(self.error_reporting))
return df
[docs] def drop_crossing_lines(self, df):
dg = self.dg
crosses = dg.check4crossing_lines()
[docs] def drop_duplicates(self, df):
# loops or braids in NHD linework can result in duplicate lines after simplification
# create column of line coordinates converted to strings
df['ls_coords_str'] = [''.join(map(str, coords)) for coords in df.ls_coords]
# identify duplicates; make common set of up and down comids for duplicates
duplicates = np.unique(df.loc[df.duplicated('ls_coords_str'), 'ls_coords_str'])
for dup in duplicates:
alld = df[df.ls_coords_str == dup]
upcomids = []
dncomid = []
for i, r in alld.iterrows():
upcomids += r.upcomids
dncomid += r.dncomid
upcomids = list(set(upcomids).difference({0, '0'}))
dncomid = list(set(dncomid).difference({0, '0'}))
keep_comid = alld.index[0]
df.at[keep_comid, 'upcomids'] = upcomids
df.at[keep_comid, 'dncomid'] = dncomid
#df.set_value(keep_comid, 'upcomids', upcomids)
#df.set_value(keep_comid, 'dncomid', dncomid)
for u in upcomids:
df.at[u, 'dncomid'] = [keep_comid]
#df.set_value(u, 'dncomid', [keep_comid])
for d in dncomid:
upids = set(df.loc[d, 'upcomids']).difference(set(alld.index[1:]))
upids.add(alld.index[0])
df.at[d, 'upcomids'] = list(upids)
#df.set_value(d, 'upcomids', list(upids))
df.drop(alld.index[1:], axis=0, inplace=True)
# drop the duplicates (this may cause problems if multiple braids are routed to)
#df = df.drop_duplicates('ls_coords_str') # drop rows from dataframe containing duplicates
df = df.drop('ls_coords_str', axis=1)
return df
[docs] def setup_linesink_lakes(self, df):
# read in elevations for NHD waterbodies (from preprocessing routine; needed for isolated lakes)
wb_elevs = gisutils.shp2df(self.wb_centroids_w_elevations,
index='COMID', index_dtype=self.int_dtype).drop_duplicates('COMID')
self._enforce_dtypes(wb_elevs)
wb_elevs = wb_elevs[self.elevs_field] * self.DEM_zmult
# identify lines that represent lakes
# get elevations, up/downcomids, and total lengths for those lines
# assign attributes to lakes, then drop the lines
df['total_line_length'] = 0 # field to store total shoreline length of lakes
for wb_comid in self.wblist:
lines = df[df['WBAREACOMI'] == wb_comid]
upcomids = []
dncomids = []
# isolated lakes have no overlapping lines and no routing
if len(lines) == 0:
df.loc[wb_comid, 'maxElev'] = wb_elevs[wb_comid]
df.loc[wb_comid, 'minElev'] = wb_elevs[wb_comid] - 0.01
df.loc[wb_comid, 'routing'] = 0
else:
df.loc[wb_comid, 'minElev'] = np.min(lines.minElev)
df.loc[wb_comid, 'maxElev'] = np.min(lines.maxElev)
# get upcomids and downcomid for lake,
# by differencing all up/down comids for lines in lake, and comids in the lake
upcomids = list(set([c for l in lines.upcomids for c in l]) - set(lines.index))
dncomids = list(set([c for l in lines.dncomid for c in l]) - set(lines.index))
# .at sets an entry for a single row/column location in a DataFrame
# (to avoid confusion over specifying a single row but supplying a sequence)
df.at[wb_comid, 'upcomids'] = upcomids
df.at[wb_comid, 'dncomid'] = dncomids
#df.set_value(wb_comid, 'upcomids', upcomids)
#df.set_value(wb_comid, 'dncomid', dncomids)
try:
assert all([True if isinstance(comid, list) else False for comid in df.upcomids])
assert all([True if isinstance(comid, list) else False for comid in df.dncomid])
except:
j=2
# make the lake the down-comid for the upcomids of the lake
# (instead of the lines that represented the lake in the flowlines dataset)
# do the same for the down-comid of the lake
for u in [u for u in upcomids if int(u) > 0]: # exclude outlets
df.at[u, 'dncomid'] = [wb_comid]
#df.set_value(u, 'dncomid', [wb_comid])
for d in [d for d in dncomids if int(d) > 0]:
df.at[d, 'upcomids'] = [wb_comid]
#df.set_value(d, 'upcomids', [wb_comid])
try:
assert all([True if isinstance(comid, list) else False for comid in df.upcomids])
assert all([True if isinstance(comid, list) else False for comid in df.dncomid])
except:
j=2
'''
# update all up/dn comids in lines dataframe that reference the lines inside of the lakes
# (replace those references with the comids for the lakes)
for comid in lines.index:
if comid == 937070193:
j=2
# make the lake the down-comid for the upcomids of the lake
# (instead of the lines that represented the lake in the flowlines dataset)
df.loc[upcomids, 'dncomid'] = [wb_comid]
df.loc[dncomids, 'upcomids'] = [wb_comid]
df.loc[df.FTYPE != 'LakePond', 'dncomid'] = [[wb_comid if v == comid else v for v in l] for l in df[df.FTYPE != 'LakePond'].dncomid]
df.loc[df.FTYPE != 'LakePond', 'upcomids'] = [[wb_comid if v == comid else v for v in l] for l in df[df.FTYPE != 'LakePond'].upcomids]
'''
# get total length of lines representing lake (used later to estimate width)
df.loc[wb_comid, 'total_line_length'] = np.sum(lines.LENGTHKM)
# modifications to routed lakes
#if df.loc[wb_comid, 'routing'] == 1:
# enforce gradient in routed lakes; update elevations in downstream comids
if df.loc[wb_comid, 'minElev'] == df.loc[wb_comid, 'maxElev']:
df.loc[wb_comid, 'minElev'] -= 0.01
dnids = df.loc[wb_comid, 'dncomid']
for dnid in [d for d in dnids if int(d) > 0]:
df.loc[dnid, 'maxElev'] -= 0.01
#df['dncomid'] = [[d] if not isinstance(d, list) else d for d in df.dncomid]
#df['upcomids'] = [[u] if not isinstance(u, list) else u for u in df.upcomids]
# move begining/end coordinate of linear ring representing lake to outlet location (to ensure correct routing)
# some routed lakes may not have an outlet
# do this for both routed and unrouted (farfield) lakes, so that the outlet line won't cross the lake
# (only tributaries are tested for crossing in step below)
lake_coords = uniquelist(df.loc[wb_comid, 'ls_coords'])
if len(df.loc[wb_comid, 'dncomid']) > 0 and int(dncomids[0]) != 0:
outlet_coords = df.loc[df.loc[wb_comid, 'dncomid'][0], 'ls_coords'][0]
closest_ind = closest_vertex_ind(outlet_coords, lake_coords)
lake_coords[closest_ind] = outlet_coords
next_ind = closest_ind + 1 if closest_ind < (len(lake_coords) - 1) else 0
# for lakes without outlets, make the last coordinate the outlet so that it can be moved below
else:
outlet_coords = lake_coords[-1]
next_ind = 0
inlet_coords = move_point_along_line(lake_coords[next_ind], outlet_coords, 1)
new_coords = [inlet_coords] + lake_coords[next_ind:] + lake_coords[:next_ind]
df.at[wb_comid, 'ls_coords'] = new_coords
# make sure inlets/outlets don't cross lines representing lake
wb_geom = LineString(new_coords)
crosses = [c for c in upcomids if int(c) != 0 and LineString(df.loc[c, 'ls_coords']).crosses(wb_geom)]
if len(crosses) > 0:
for comid_crosses in crosses:
if comid_crosses == '13103297':
j=2
ls_coords = list(df.loc[comid_crosses, 'ls_coords']) # want to copy, to avoid modifying df
# find the first intersection point with the lake
# (for some reason, two very similar coordinates will be occasionally be returned by intersection)
intersection = LineString(ls_coords).intersection(wb_geom)
try:
if intersection.type == 'MultiPoint':
intersection_point = np.array([intersection.geoms[0].xy[0][0],
intersection.geoms[0].xy[1][0]])
else:
intersection_point = np.array([intersection.xy[0][0], intersection.xy[1][0]])
except:
j=2
# sequentially drop last vertex from line until it no longer crosses the lake
crossing = True
while crossing:
ls_coords.pop(-1)
if len(ls_coords) < 2:
break
# need to test for intersection separately,
# in case len(ls_coords) == 1 (can't make a LineString)
elif not LineString(ls_coords).crosses(wb_geom):
break
# append new end vertex on line that is close to, but not coincident with lake
diff = np.array(ls_coords[-1]) - intersection_point
# make a new endpoint that is between the intersection and next to last
new_endvert = tuple(intersection_point + np.sign(diff) * np.sqrt(self.nearfield_tolerance))
ls_coords.append(new_endvert)
# Use df.at here to avoid error about setting a list
# with a different length than existing list
df.at[comid_crosses, 'ls_coords'] = ls_coords
# drop the lines representing the lake from the lines dataframe
df.drop(lines.index, axis=0, inplace=True)
return df
[docs] def list_updown_comids(self, df):
farfield = df.COMID[df.farfield].tolist()
# record up and downstream comids for lines
lines = [l for l in df.index if l not in self.wblist and l not in farfield]
#df['dncomid'] = len(df)*[[]]
#df['upcomids'] = len(df)*[[]]
#df.loc[lines, 'dncomid'] = [list(df[df['Hydroseq'] == df.loc[i, 'DnHydroseq']].index) for i in lines]
#df.loc[lines, 'upcomids'] = [list(df[df['DnHydroseq'] == df.loc[i, 'Hydroseq']].index) for i in lines]
df['upcomids'] = [[]] * len(df)
df['dncomid'] = [[]] * len(df)
dncomid, upcomids = [], []
for l in lines:
# set up/down comids that are not in the model domain to zero
try:
dncomid.append([d if d in lines else 0 for d in
list(df[df['Hydroseq'] == df.loc[l, 'DnHydroseq']].index)])
upcomids.append([u if u in lines else 0 for u in
list(df[df['DnHydroseq'] == df.loc[l, 'Hydroseq']].index)])
except:
j=2
df.loc[lines, 'upcomids'] = np.array(upcomids, dtype=object)
df.loc[lines, 'dncomid'] = np.array(dncomid, dtype=object)
# enforce list datatype (apparently pandas flattens lists of lists len()=1 on assignment
df['dncomid'] = [[d] if not isinstance(d, list) else d for d in df.dncomid]
return df
[docs] def make_linesinks(self, reuse_preprocessed_lines=False,
shp=None):
with open(self.error_reporting, 'a') as efp:
efp.write('\nMaking the lines...\n')
if shp is None:
shp = self.preprocessed_lines
if reuse_preprocessed_lines:
try:
self.df = gpd.read_file(shp, dtypes={'COMID': self.int_dtype})#index='COMID',
#index_dtype=self.int_dtype,
#true_values=['True'],
#false_values=['False'])
self.df.index = self.df['COMID']
self._enforce_dtypes(self.df)
except:
self.preprocess(save=True)
else:
self.preprocess(save=True)
# enforce integers columns
self.df.index = self.df.index.astype(self.int_dtype)
self.df['COMID'] = self.df.COMID.astype(self.int_dtype)
# simplify the lines in the df (dataframe) attribute
df = self.df.copy()
df['ls_geom'] = df['geometry'].simplify(df['tol'].values)
# add column of lists, containing linesink coordinates
df['ls_coords'] = [list(g.coords) for g in df['ls_geom']]
# there shouldn't be an empty coordinates
assert np.all(np.array([len(c) for c in df.ls_coords]) > 0)
self.lines_df = df
self.wblist = set(df.loc[df.waterbody].index.values.astype(self.int_dtype)).difference({0})
print('Assigning attributes for GFLOW input...')
# routing
# need to reformulate simplification so that "routed" means all routed LinesinkData,
# not just those between the nearfield and farfield
df['routing'] = (df.routed.values | df.nearfield.values).astype(int)
# linesink elevations (lakes won't be populated yet)
min_elev_col = [c for c in df.columns if 'minelev' in c.lower()][0]
max_elev_col = [c for c in df.columns if 'maxelev' in c.lower()][0]
df['minElev'] = df[min_elev_col] * self.zmult
df['maxElev'] = df[max_elev_col] * self.zmult
df['dStage'] = df['maxElev'] - df['minElev']
# list upstream and downstream comids
df = self.list_updown_comids(df)
# discard duplicate LinesinkData that result from braids in NHD and line simplification
df = self.drop_duplicates(df)
# method to represent lakes with LinesinkData
df = self.setup_linesink_lakes(df)
print('\nmerging or splitting lines with only two vertices...')
# find all routed comids with only 1 line; merge with neighboring comids
# (GFLOW requires two lines for routed streams)
def bisect(coords):
# add vertex to middle of single line segment
coords = np.array(coords)
mid = 0.5 * (coords[0] + coords[-1])
new_coords = list(map(tuple, [coords[0], mid, coords[-1]]))
return new_coords
df['nlines'] = [len(coords) - 1 for i, coords in enumerate(df.ls_coords)]
# bisect lines that have only one segment, and are routed
ls_coords = df.ls_coords.tolist()
singlesegment = ((df['nlines'] < 2) & (df['routing'] == 1)).values
df['ls_coords'] = [bisect(line) if singlesegment[i]
else line for i, line in enumerate(ls_coords)]
# fix LinesinkData where max and min elevations are the same
df = self.adjust_zero_gradient(df)
# end streams
# evaluate whether downstream segment is in farfield
downstream_ff = []
for i in range(len(df)):
try:
dff = df.loc[df.iloc[i].dncomid[0], 'farfield'].item()
except:
dff = True
downstream_ff.append(dff)
# set segments with downstream segment in farfield as End Segments
df['end_stream'] = len(df) * [0]
df.loc[downstream_ff, 'end_stream'] = 1 # set
# widths for lines
arbolate_sum_col = [c for c in df.columns if 'arbolate' in c.lower()][0]
df['width'] = df[arbolate_sum_col].map(lambda x: width_from_arboate(x, self.lmbda))
# widths for lakes
if np.any(df['FTYPE'] == 'LakePond'):
df.loc[df['FTYPE'] == 'LakePond', 'width'] = \
np.vectorize(lake_width)(df.loc[df['FTYPE'] == 'LakePond', 'AREASQKM'],
df.loc[df['FTYPE'] == 'LakePond', 'total_line_length'], self.lmbda)
# resistance
df['resistance'] = self.resistance
df.loc[df['farfield'], 'resistance'] = 0
# depth
df['depth'] = self.global_streambed_thickness
# resistance parameter (scenario)
df['ScenResistance'] = self.ScenResistance
df.loc[df['farfield'], 'ScenResistance'] = '__NONE__'
# check box for "include in Scenario" in the GUI
df['chkScenario'] = self.chkScenario
# linesink location
df.loc[df['FTYPE'] != 'LakePond', 'AutoSWIZC'] = 1 # Along stream centerline
df.loc[df['FTYPE'] == 'LakePond', 'AutoSWIZC'] = 2 # Along surface water boundary
# additional check to drop isolated lines
isolated = [c for c in df.index if len(df.loc[c].dncomid) == 0 and len(df.loc[c].upcomids) == 0
and c not in self.wblist]
#df = df.drop(isolated, axis=0)
# drop lines below minimum length that are not Lakes
if self.farfield_length_threshold > 0.:
criteria = ~df.farfield.values | (df.FTYPE == 'LakePond') |\
(df.farfield.values & (df.LENGTHKM >= self.farfield_length_threshold))
df = df.loc[criteria].copy()
if self.routed_area_length_threshold > 0.:
criteria = ~df.routed.values | (df.FTYPE == 'LakePond') |\
(df.routed.values & (df.LENGTHKM >= self.routed_area_length_threshold))
df = df.loc[criteria].copy()
# names
df['ls_name'] = len(df) * [None]
df['ls_name'] = df.apply(name, axis=1)
# compare number of line segments before and after
npoints_orig = sum([len(p) - 1 for p in df['geometry'].map(lambda x: x.xy[0])])
npoints_simp = sum([len(p) - 1 for p in df.ls_coords])
print('\nnumber of lines in original NHD linework: {}'.format(npoints_orig))
print('number of simplified lines: {}\n'.format(npoints_simp))
if npoints_simp > self.maxlines:
print("Warning, the number of lines exceeds GFLOW's limit of {}!".format(self.maxlines))
if self.split_by_HUC:
self.write_lss_by_huc(df)
else:
self.write_lss(df, '{}.lss.xml'.format(self.outfile_basename))
# run diagnostics on lines and report errors
self.run_diagnostics()
# write shapefile of results
# convert lists in dn and upcomid columns to strings (for writing to shp)
# clean up any dtypes that were altered from sloppy use of pandas
self._enforce_dtypes(df)
self.df = df
self.write_shapefile()
print('Done!')
[docs] def map_confluences(self):
upsegs = self.df.upcomids.tolist()
maxsegs = np.array([np.max([int(uu) for uu in u]) if len(u) > 0
else 0 for u in upsegs])
seglengths = np.array([len(u) for u in upsegs])
# setup dataframe of confluences
# confluences are where segments have upsegs (no upsegs means the reach 1 is a headwater)
confluences = self.df.loc[(seglengths > 0) & (maxsegs > 0), ['COMID', 'upcomids']].copy()
confluences['elev'] = [0] * len(confluences)
nconfluences = len(confluences)
print('Mapping {} confluences and updating segment min/max elevations...'.format(nconfluences))
for i, r in confluences.iterrows():
# confluence elevation is the minimum of the ending segments minimums, starting segments maximums
endsmin = np.min(self.df.loc[self.df.COMID.isin(r.upcomids), 'minElev'].values)
startmax = np.max(self.df.loc[self.df.COMID == i, 'maxElev'].values)
cfelev = np.min([endsmin, startmax])
confluences.loc[i, 'elev'] = cfelev
upcomids = [u for u in r.upcomids if int(u) > 0]
if len(upcomids) > 0:
self.df.loc[upcomids, 'minElev'] = cfelev
self.df.loc[i, 'maxElev'] = cfelev
self.confluences = confluences
self.df['dStage'] = self.df['maxElev'] - self.df['minElev']
print('Done, see confluences attribute.')
[docs] def run_diagnostics(self):
"""Run the diagnostic suite on the LinesinkData instance."""
dg = self.dg
dg.check_vertices()
dg.check4crossing_lines()
dg.check4zero_gradient()
[docs] def map_outsegs(self):
'''
from Mat2, returns dataframe of all downstream segments (will not work with circular routing!)
'''
outsegsmap = pd.DataFrame(self.df.COMID)
outsegs = pd.Series([d[0] if len(d) > 0 else 0 for d in self.df.dncomid], index=self.df.index)
max_outseg = outsegsmap[outsegsmap.columns[-1]].astype(int).max()
knt = 2
while max_outseg > 0:
outsegsmap['outseg{}'.format(knt)] = [outsegs[s] if int(s) > 0 else 0
for s in outsegsmap[outsegsmap.columns[-1]]]
max_outseg = outsegsmap[outsegsmap.columns[-1]].astype(int).max()
if max_outseg == 0:
break
knt += 1
if knt > 1000:
print('Circular routing encountered in segment {}'.format(max_outseg))
break
self.outsegs = outsegsmap
[docs] def read_lss(self, lss_xml):
"""read a linesink string (lss) XML file exported by GFLOW"""
xml = ET.parse(lss_xml)
root = xml.getroot()
for attr in ['ComputationalUnits', 'BasemapUnits']:
self.__dict__[attr] = root.findall(attr)[0].text
# read fields into dictionary, then DataFrame
d = {}
for field in ['Label',
'HeadSpecified',
'StartingHead',
'EndingHead',
'Resistance',
'Width',
'Depth',
'Routing',
'EndStream',
'OverlandFlow',
'EndInflow',
'ScenResistance',
'Drain',
'ScenFluxName',
'Gallery',
'TotalDischarge',
'InletStream',
'OutletStream',
'OutletTable',
'Lake',
'Precipitation',
'Evapotranspiration',
'Farfield',
'chkScenario',
'AutoSWIZC',
'DefaultResistance']:
if self.dtypes[field] == bool:
try:
v = np.array([i.text for i in root.iter(field)],
dtype=self.int_dtype)
d[field] = v.astype(bool)
except:
d[field] = np.array([self.tf2flag(i.text) for i in root.iter(field)])
else:
d[field] = np.array([i.text for i in root.iter(field)],
dtype=self.dtypes[field])
# read in the vertices
def tolss(ls):
X = np.array([v.find('X').text for v in ls.find('Vertices').findall('Vertex')], dtype=float)
Y = np.array([v.find('Y').text for v in ls.find('Vertices').findall('Vertex')], dtype=float)
return LineString(zip(X, Y))
d['geometry'] = [tolss(ls) for ls in root.iter('LinesinkString')]
df = pd.DataFrame(d)
# convert labels back to COMIDs for the index; assign unique values otherwise
comids = []
for i, r in df.iterrows():
try:
comids.append(r.Label.split()[0].strip())
except ValueError:
comids.append(i)
df.index = comids
# rename fields to be consistent with those used in Linesinkmaker
# (should eventually just change lsmaker names to conform)
df.rename(columns={'DefaultResistance': 'resistance',
'Depth': 'depth',
'EndStream': 'end_stream',
'EndingHead': 'minElev',
'Farfield': 'farfield',
'Label': 'ls_name',
'Resistance': 'reistance',
'Routing': 'routing',
'StartingHead': 'maxElev',
'Width': 'width'}, inplace=True)
df['ls_coords'] = [list(g.coords) for g in df.geometry]
return df
[docs] def write_lss_by_huc(self, df):
print('\nGrouping segments by hydrologic unit...')
# intersect lines with HUCs; then group dataframe by HUCs
HUCs_df = gisutils.shp2df(self.HUC_shp, index=self.HUC_name_field)
df[self.HUC_name_field] = len(df) * [None]
for HUC in HUCs_df.index:
lines = [line.intersects(HUCs_df.loc[HUC, 'geometry']) for line in df['geometry']]
df.loc[lines, self.HUC_name_field] = HUC
dfg = df.groupby(self.HUC_name_field)
# write lines for each HUC to separate lss file
HUCs = np.unique(df.HUC)
for HUC in HUCs:
dfh = dfg.get_group(HUC)
outfile = '{}_{}.lss.xml'.format(self.outfile_basename, HUC)
self.write_lss(dfh, outfile)
[docs] def write_lss(self, df, outfile):
"""write GFLOW linesink XML (lss) file from dataframe df
"""
df = df.copy()
df['chkScenario'] = [s.lower() for s in df.chkScenario.astype(str)]
nlines = sum([len(p) - 1 for p in df.ls_coords])
print('writing {} lines to {}'.format(nlines, outfile))
ofp = open(outfile, 'w')
ofp.write('<?xml version="1.0"?>\n')
ofp.write('<LinesinkStringFile version="1">\n')
ofp.write('\t<ComputationalUnits>{}</ComputationalUnits>\n'
'\t<BasemapUnits>{}</BasemapUnits>\n\n'.format(self.ComputationalUnits.capitalize(),
self.BasemapUnits.capitalize()))
for comid in df.index:
ofp.write('\t<LinesinkString>\n')
ofp.write('\t\t<Label>{}</Label>\n'.format(df.loc[comid, 'ls_name']))
ofp.write('\t\t<HeadSpecified>1</HeadSpecified>\n')
ofp.write('\t\t<StartingHead>{:.2f}</StartingHead>\n'.format(df.loc[comid, 'maxElev']))
ofp.write('\t\t<EndingHead>{:.2f}</EndingHead>\n'.format(df.loc[comid, 'minElev']))
ofp.write('\t\t<Resistance>{}</Resistance>\n'.format(df.loc[comid, 'resistance']))
ofp.write('\t\t<Width>{:.2f}</Width>\n'.format(df.loc[comid, 'width']))
ofp.write('\t\t<Depth>{:.2f}</Depth>\n'.format(df.loc[comid, 'depth']))
ofp.write('\t\t<Routing>{}</Routing>\n'.format(df.loc[comid, 'routing']))
ofp.write('\t\t<EndStream>{}</EndStream>\n'.format(df.loc[comid, 'end_stream']))
ofp.write('\t\t<OverlandFlow>0</OverlandFlow>\n')
ofp.write('\t\t<EndInflow>0</EndInflow>\n')
ofp.write('\t\t<ScenResistance>{}</ScenResistance>\n'.format(df.loc[comid, 'ScenResistance']))
ofp.write('\t\t<Drain>0</Drain>\n')
ofp.write('\t\t<ScenFluxName>__NONE__</ScenFluxName>\n')
ofp.write('\t\t<Gallery>0</Gallery>\n')
ofp.write('\t\t<TotalDischarge>0</TotalDischarge>\n')
ofp.write('\t\t<InletStream>0</InletStream>\n')
ofp.write('\t\t<OutletStream>0</OutletStream>\n')
ofp.write('\t\t<OutletTable>__NONE__</OutletTable>\n')
ofp.write('\t\t<Lake>0</Lake>\n')
ofp.write('\t\t<Precipitation>0</Precipitation>\n')
ofp.write('\t\t<Evapotranspiration>0</Evapotranspiration>\n')
ofp.write('\t\t<Farfield>{:.0f}</Farfield>\n'.format(df.loc[comid, 'farfield']))
ofp.write('\t\t<chkScenario>{}</chkScenario>\n'.format(df.loc[comid, 'chkScenario'])) # include linesink in PEST 'scenarios'
ofp.write('\t\t<AutoSWIZC>{:.0f}</AutoSWIZC>\n'.format(df.loc[comid, 'AutoSWIZC']))
ofp.write('\t\t<DefaultResistance>{:.2f}</DefaultResistance>\n'.format(df.loc[comid, 'resistance']))
ofp.write('\t\t<Vertices>\n')
# now write out linesink vertices
for x, y in df.loc[comid, 'ls_coords']:
ofp.write('\t\t\t<Vertex>\n')
ofp.write('\t\t\t\t<X> {:.2f}</X>\n'.format(x))
ofp.write('\t\t\t\t<Y> {:.2f}</Y>\n'.format(y))
ofp.write('\t\t\t</Vertex>\n')
ofp.write('\t\t</Vertices>\n')
ofp.write('\t</LinesinkString>\n\n')
ofp.write('</LinesinkStringFile>')
ofp.close()
[docs] def write_shapefile(self, outfile=None, use_ls_coords=True):
if outfile is None:
outfile = self.outfile_basename.split('.')[0] + '.shp'
df = self.df.copy()
for routid in {'dncomid', 'upcomids'}.intersection(set(df.columns)):
df[routid] = df[routid].map(lambda x: ' '.join([str(c) for c in x])) # handles empties
# recreate shapely geometries from coordinates column; drop all other coords/geometries
if use_ls_coords and 'ls_coords' in df.columns:
df['geometry'] = [LineString(g) for g in df.ls_coords]
df.drop(['ls_coords'], axis=1, inplace=True)
# drop original linesink geometry column
# (geopandas doesn't allow multiple geometry cols)
# note: additional refactoring or code inspection
# might allow this column to be used directly
# to eliminate above operation
# of rebuilding geometry column from the linesink coordinates
df.drop('ls_geom', axis=1, errors='ignore', inplace=True)
df.to_file(outfile, index=False)
print(f"wrote {outfile}")
[docs]class linesinks(LinesinkData):
def __init__(self, *args, **kwargs):
warnings.warn("The 'linesinks' class was renamed to LinesinkData to better follow pep 8.",
DeprecationWarning)
LinesinkData.__init__(self, *args, **kwargs)
[docs]class EmptyDataFrame(Exception):
def __init__(self):
pass
def __str__(self):
return ('\n\nEmpty DataFrame!')