Source code for lsmaker.lsmaker

import xml.etree.ElementTree as ET
import sys
import warnings
import numpy as np
import os
from pathlib import Path
import pandas as pd
import geopandas as gpd
import shutil
import requests
import json
import yaml
from functools import partial
import fiona
from fiona.crs import to_string, from_string
from shapely.geometry import (
    Polygon, 
    LineString, 
    Point, 
    shape, 
    mapping, 
    MultiPolygon)
from shapely.ops import unary_union, transform
import pyproj
import math
import gisutils
import lsmaker
try:
    import matplotlib.pyplot as plt
except:
    pass
from lsmaker.diagnostics import Diagnostics


# ## Functions #############################
[docs]def add_projection(line, point): """Add vertex to line at point, if the closest point on the line isn't an end. Parameters ---------- line : LineString point : Point Returns ------- newline : LineString Line with point added, or original line, if point coincides with end. """ l = line mp = point distance = l.project(mp) if distance <= 0.0 or distance >= line.length: return line coords = list(l.coords) for i, p in enumerate(l.coords): pd = l.project(Point(p)) if pd == distance: return line elif pd > distance: return LineString(coords[:i] + [(mp.x, mp.y)] + coords[i:])
[docs]def add_vertices_at_testpoints(lssdf, tpgeoms, tol=200): """Add vertices to LinesinkData at locations of testpoints (so that modeled flow observations are correct) Parameters ---------- lssdf : DataFrame DataFrame of linesink strings. Must contain 'geometry' column of shapely LineStrings defining geometries of linesink strings, in same coordinate system as testpoints. tpgeoms : list List of testpoint geometries. tol : float Tolerance, in coordinate system units, for considering lines near the testpoints. Returns ------- geoms : list of geometries New geometry column with added vertices. """ df = lssdf.copy() for mp in tpgeoms: # find all lines within tolerance nearby = np.array([l.intersects(mp.buffer(tol)) for l in df.geometry]) ldf = df[nearby].copy() # choose closest if two or more nearby lines if len(ldf) > 1: ldf['dist'] = [mp.distance(ll.interpolate(ll.project(mp))) for ll in ldf.geometry.values] ldf.sort_values('dist', inplace=True) # if at least one line is nearby if len(ldf) > 0: ind = ldf.index[0] l = ldf.geometry.values[0] newline = add_projection(l, mp) df.loc[ind, 'geometry'] = newline #df.set_value(ind, 'geometry', newline) return df.geometry.tolist()
[docs]def get_elevations_from_epqs(points, units='feet'): """From list of shapely points in lat, lon, returns list of elevation values """ if len(points) > 0: print('querying National Map Elevation Point Query Service...') elevations = [get_elevation_from_epqs(p.x, p.y, units=units) for p in points] else: elevations = [] return elevations
[docs]def get_elevation_from_epqs(lon, lat, units='feet'): """Returns an elevation value at a point location. Notes ----- example url for -91, 45: http://nationalmap.gov/epqs/pqs.php?x=-91&y=45&units=Feet&output=json Examples -------- >>> get_elevation_from_epqs(-91, 45) 1139.778277 """ url = f'https://epqs.nationalmap.gov/v1/json?x={lon}&y={lat}&wkid=4326&units={units}&includeDate=false' try: response = requests.get(url) elev = json.loads(response.text)['value'] print(lat, lon, elev, units) except: e = sys.exc_info() print(e) print('Problem accessing Elevation Point Query Service. ' 'Need an internet connection to get seepage lake elevations.' "\nIf your internet is working, running the script again may work; sometime the EPQS can be temperamental") elev = 0.0 try: elev = float(elev) except: print(('Warning, invalid elevation of {} returned for {}, {}.\nSetting elevation to 0.'.format(elev, lon, lat))) elev = 0.0 return elev
def _get_random_point_in_polygon(poly): """Generates a point within a polygon (for lakes where the centroid is not in the lake)""" minx, miny, maxx, maxy = poly.bounds while True: p = Point(np.random.uniform(minx, maxx), np.random.uniform(miny, maxy)) if poly.contains(p): return p #def reproject(geoms, pr1, pr2): # """Reprojects a list of geometries from coordinate system pr1 to pr2 # (given as proj strings).""" # print('reprojecting from {} to {}...'.format(pr1, pr2)) # if not isinstance(geoms, list): # geoms = [geoms] # pr1 = pyproj.Proj(pr1, errcheck=True, preserve_units=True) # pr2 = pyproj.Proj(pr2, errcheck=True, preserve_units=True) # project = partial(pyproj.transform, pr1, pr2) # return [transform(project, g) for g in geoms]
[docs]def w_parameter(B, lmbda): """Compute w parameter for estimating an effective conductance term (i.e., when simulating Lakes using Linesinks instead of GFLOW's lake package) If only larger lakes are simulated (e.g., > 1 km2), w parameter will be = lambda see Haitjema 2005, "Dealing with Resistance to Flow into Surface Waters" """ if lmbda <= 0.1 * B: w = lmbda elif 0.1 * B < lmbda < 2 * B: w = lmbda * np.tanh(B / (2 * lmbda)) else: w = B / 2 return w
[docs]def width_from_arboate(arbolate, lmbda): """Estimate stream width in feet from arbolate sum in meters, using relationship described by Feinstein et al (2010), Appendix 2, p 266. """ estwidth = 0.1193 * math.pow(1000 * arbolate, 0.5032) w = 2 * w_parameter(estwidth, lmbda) # assumes stream is rep. by single linesink return w
[docs]def lake_width(area, total_line_length, lmbda): """Estimate conductance width from lake area and length of flowlines running through it """ if total_line_length > 0: estwidth = 1000 * (area / total_line_length) / 0.3048 # (km2/km)*(ft/km) else: estwidth = np.sqrt(area / np.pi) * 1000 / 0.3048 # (km)*(ft/km) # see Haitjema 2005, "Dealing with Resistance to Flow into Surface Waters" # basically if only larger lakes are simulated (e.g., > 1 km2), w parameter will be = lambda # this assumes that GFLOW's lake package will not be used w = w_parameter(estwidth, lmbda) return w # feet
[docs]def name(x): """Abbreviations for naming LinesinkData from names in NHDPlus GFLOW requires linesink names to be 32 characters or less """ if x.GNIS_NAME: # reduce name down with abbreviations abb = {'Branch': 'Br', 'Creek': 'Crk', 'East': 'E', 'Flowage': 'Fl', 'Lake': 'L', 'North': 'N', 'Pond': 'P', 'Reservoir': 'Res', 'River': 'R', 'South': 'S', 'West': 'W', "'": ''} name = '{} {}'.format(x.name, x.GNIS_NAME) for k, v in abb.items(): name = name.replace(k, v) else: name = '{} unnamed'.format(x.name) return name[:32]
[docs]def uniquelist(seq): seen = set() seen_add = seen.add return [x for x in seq if not (x in seen or seen_add(x))]
[docs]def closest_vertex_ind(point, shape_coords): """Returns index of closest vertices in shapely geometry object Ugly but works """ crds = shape_coords X = np.array([i[0] for i in crds]) Y = np.array([i[1] for i in crds]) dX, dY = X - point[0], Y - point[1] closest_ind = np.argmin(np.sqrt(dX ** 2 + dY ** 2)) return closest_ind
[docs]def move_point_along_line(x1, x2, dist): diff = (x2[0] - x1[0], x2[1] - x1[1]) return tuple(x2 - dist * np.sign(diff))
[docs]class LinesinkData: maxlines = 4000 int_dtype = str # np.int64 dtypes = {'Label': str, 'HeadSpecified': float, 'StartingHead': float, 'EndingHead': float, 'Resistance': float, 'Width': float, 'Depth': float, 'Routing': np.int64, 'EndStream': np.int64, 'OverlandFlow': np.int64, 'EndInflow': np.int64, 'ScenResistance': str, 'Drain': np.int64, 'ScenFluxName': str, 'Gallery': np.int64, 'TotalDischarge': np.int64, 'InletStream': np.int64, 'OutletStream': np.int64, 'OutletTable': str, 'Lake': np.int64, 'Precipitation': float, 'Evapotranspiration': float, 'Farfield': bool, 'chkScenario': bool, 'AutoSWIZC': np.int64, 'DefaultResistance': float} fcodes = {'Perennial': 46006, 'Intermittent': 46003, 'Uncategorized': 46000} def __init__(self, infile=None, GFLOW_lss_xml=None): # attributes self._lsmaker_config_file_path = None # absolute path to config file self.preproc = None self.resistance = None self.H = None # aquifer thickness in model units self.k = None # hydraulic conductivity of the aquifer in model units self.lmbda = None self.ScenResistance = None self.chkScenario = None self.global_streambed_thickness = None # streambed thickness self.ComputationalUnits = None # 'feet' or 'meters'; for XML output file self.BasemapUnits = None # elevation units multiplier (from NHDPlus cm to model units) self.zmult = None # model domain self.farfield = None self.routed_area = None self.nearfield = None self.prj = None self.crs = None #self.crs_str = None # self.crs, in proj string format self.pyproj_crs = None # pyproj.CRS instance based on prj input self.farfield_buffer = None self.clip_farfield = None self.split_by_HUC = None self.HUC_shp = None self.HUC_name_field = None # simplification self.refinement_areas = [] # list of n areas within routed area with additional refinement self.nearfield_tolerance = None self.routed_area_tolerance = None self.farfield_tolerance = None self.min_nearfield_order = None self.min_routed_area_order = None self.min_farfield_order = None self.min_nearfield_wb_size = None self.min_waterbody_size = None self.min_farfield_wb_size = None self.farfield_length_threshold = None self.routed_area_length_threshold = None self.drop_intermittent = None self.drop_crossing = None self.asum_thresh_ra = None self.asum_thresh_nf = None self.asum_thresh_ff = None # NHD files self.flowlines = None self.elevslope = None self.PlusFlowVAA = None self.waterbodies = None # columns to retain in NHD files (when joining to GIS lines) # Note: may need to add method to handle case discrepancies self.flowlines_cols = ['COMID', 'FCODE', 'FDATE', 'FLOWDIR', 'FTYPE', 'GNIS_ID', 'GNIS_NAME', 'LENGTHKM', 'REACHCODE', 'RESOLUTION', 'WBAREACOMI', 'geometry'] self.flowlines_cols_dtypes = {'COMID': self.int_dtype, 'FCODE': self.int_dtype, 'FDATE': str, 'FLOWDIR': str, 'FTYPE': str, 'GNIS_ID': self.int_dtype, 'GNIS_NAME': str, 'LENGTHKM': float, 'REACHCODE': str, 'RESOLUTION': str, 'WBAREACOMI': self.int_dtype, 'geometry': object} self.elevslope_cols = ['MINELEVSMO', 'MAXELEVSMO'] self.elevslope_dtypes = {'MINELEVSMO': float, 'MAXELEVSMO': float} self.pfvaa_cols = ['ArbolateSu', 'Hydroseq', 'DnHydroseq', 'StreamOrde'] self.pfvaa_cols_dtypes = {'ArbolateSu': float, 'Hydroseq': self.int_dtype, 'DnHydroseq': self.int_dtype, 'StreamOrde': np.int64} self.wb_cols = ['AREASQKM', 'COMID', 'ELEVATION', 'FCODE', 'FDATE', 'FTYPE', 'GNIS_ID', 'GNIS_NAME', 'REACHCODE', 'RESOLUTION', 'geometry'] self.wb_cols_dtypes = {'AREASQKM': float, 'COMID': self.int_dtype, 'ELEVATION': float, 'FCODE': self.int_dtype, 'FDATE': str, 'FTYPE': str, 'GNIS_ID': self.int_dtype, 'GNIS_NAME': str, 'REACHCODE': str, 'RESOLUTION': str, 'geometry': object} # could do away with above and have one dtypes list self.dtypes.update(self.flowlines_cols_dtypes) self.dtypes.update(self.elevslope_dtypes) self.dtypes.update(self.pfvaa_cols_dtypes) self.dtypes.update(self.wb_cols_dtypes) # preprocessed files self.DEM = None self.elevs_field = None self.DEM_zmult = None self.flowlines_clipped = None self.waterbodies_clipped = None self.routed_mp = None self.farfield_mp = None self.preprocessed_lines = None self.preprocdir = None self.wb_centroids_w_elevations = None # elevations extracted during preprocessing routine self.elevs_field = None # field in wb_centroids_w_elevations containing elevations # outputs self.outfile_basename = None self.error_reporting = 'error_reporting.txt' # attributes self.from_lss_xml = False self.df = pd.DataFrame() # working dataframe for translating NHDPlus data to linesink strings self.lss = pd.DataFrame() # dataframe of GFLOW linesink strings (with attributes) self.outsegs = pd.DataFrame() self.confluences = pd.DataFrame() # read in the configuration file if isinstance(infile, str) or isinstance(infile, Path): infile = Path(infile) if infile.name.endswith('.xml'): self.read_lsmaker_xml(infile) else: for extension in 'yml', 'yaml': if infile.name.endswith(extension): self.set_configuration_from_yaml(infile) elif isinstance(infile, dict): self.set_configuration(infile) # or create instance from a GFLOW LSS XML file elif GFLOW_lss_xml is not None: self.from_lss_xml = True self.df = self.read_lss(GFLOW_lss_xml) # nearfield can't be coarser than the routed area if infile is not None: self.min_nearfield_order = min((self.min_nearfield_order, self.min_routed_area_order)) # create a pyproj CRS instance # set the CRS (basemap) length units self.set_crs(prjfile=self.prj) # logging/diagnostics # todo: more robust/detail logging if Path(self.error_reporting).exists(): try: Path(self.error_reporting).unlink() except: j=2 with open(self.error_reporting, 'w') as efp: efp.write('Linesink-maker version {}\n'.format(lsmaker.__version__)) self.dg = Diagnostics(lsm_object=self, logfile=self.error_reporting) def __eq__(self, other): """Test for equality to another linesink object.""" if not isinstance(other, self.__class__): return False exclude_attrs = ['_lsmaker_config_file_path', 'crs', 'crs_str', 'inpars', 'cfg', 'efp', ] # LinesinkData instances from lss xml won't have some attributes # or some df columns that came from NHDPlus compare_df_columns = slice(None) if self.from_lss_xml or other.from_lss_xml: # todo: expose default values that are being set on write of lss_xml # (many of the columns in LinesinkData instance from lss xml aren't in # LinesinkData instance that was created from scratch, because the variables # are only being set in LinesinkData.write_lss method) compare_df_columns = set(self.df.columns).intersection(other.df.columns) # the geometries and coordinates won't be exactly the same # explicitly compare the coordinates separately compare_df_columns = compare_df_columns.difference({'geometry', 'ls_coords', 'width' }) for k, v in self.__dict__.items(): # items to skip # todo: implement pyproj.CRS class to robustly compare CRSs if k in exclude_attrs: continue elif self.from_lss_xml or other.from_lss_xml: if k not in ('df', 'ComputationalUnits', 'BasemapUnits'): continue elif k not in other.__dict__: return False elif type(v) == bool: if not v == other.__dict__[k]: return False elif k == 'df': if len(v) == 0 and len(other.__dict__[k]) == 0: continue try: df1 = v[compare_df_columns] df2 = other.__dict__[k][compare_df_columns] # pd.testing.assert_frame_equal(df1, df2) # compare the coordinates for dim in 0, 1: # (x, y) x1 = [crd[dim] for line in v.ls_coords for crd in line] x2 = [crd[dim] for line in other.__dict__[k].ls_coords for crd in line] assert np.allclose(x1, x2) assert np.allclose(v.width.values, other.__dict__[k].width.values, rtol=0.01) except: return False elif type(v) == pd.DataFrame: try: pd.testing.assert_frame_equal(v, other.__dict__[k]) except: return False elif v != other.__dict__[k]: try: if not np.allclose(v, other.__dict__[k]): return False except: continue #return False #elif type(v) in [str, int, float, dict, list]: # if v != other.__dict__[k]: # pass # continue
[docs] def read_lsmaker_xml(self, infile): #try: inpardat = ET.parse(infile) #except: # raise InputFileMissing # record the config file absolute path self._lsmaker_config_file_path = os.path.split(os.path.abspath(infile))[0] inpars = inpardat.getroot() self.inpars = inpars # setup the working directory (default to current directory) try: self.path = inpars.findall('.//working_dir')[0].text if not os.path.exists(self.path): os.makedirs(self.path) except: self.path = self._lsmaker_config_file_path # global settings self.preproc = self.tf2flag(self._get_xml_entry('preproc', 'True')) self.resistance = self._get_xml_entry('resistance', 0.3, float) # (days); c in documentation self.H = self._get_xml_entry('H', 100, float) # aquifer thickness in model units self.k = self._get_xml_entry('k', 10, float) # hydraulic conductivity of the aquifer in model units self.lmbda = np.sqrt(self.k * self.H * self.resistance) self.ScenResistance = self._get_xml_entry('ScenResistance', 'linesink') self.chkScenario = self.tf2flag(self._get_xml_entry('chkScenario', 'True')) self.global_streambed_thickness = self._get_xml_entry('global_streambed_thickness', 3, float) # streambed thickness self.ComputationalUnits = self._get_xml_entry('ComputationalUnits', 'feet').lower() # 'feet' or 'meters'; for XML output file self.BasemapUnits = self._get_xml_entry('BasemapUnits', None) if self.BasemapUnits is not None: self.BasemapUnits = self.BasemapUnits.lower() # elevation units multiplier (from NHDPlus cm to model units) self.zmult = 0.03280839895013123 if self.ComputationalUnits.lower() == 'feet' else 0.01 # model domain self.farfield = self._get_xml_entry('farfield', None) self.routed_area = self._get_xml_entry('routed_area', None) self.nearfield = self._get_xml_entry('nearfield', None) try: # get the projection file and crs from either the nearfield or routed area self.prj = self._get_xml_entry('prj', self.nearfield[:-4] + '.prj') self.crs = fiona.open(self.nearfield).crs except: self.prj = self._get_xml_entry('prj', self.routed_area[:-4] + '.prj') self.crs = fiona.open(self.routed_area).crs self.nearfield = self.routed_area #self.crs_str = str(self.crs) # self.crs, in proj string format self.farfield_buffer = self._get_xml_entry('farfield_buffer', 10000, int) self.clip_farfield = self.tf2flag(self._get_xml_entry('clip_farfield', 'False')) self.split_by_HUC = self.tf2flag(self._get_xml_entry('split_by_HUC', 'False')) self.HUC_shp = self._get_xml_entry('HUC_shp', None) self.HUC_name_field = self.tf2flag(self._get_xml_entry('HUC_name_field', 'False')) # simplification self.refinement_areas = [] # list of n areas within routed area with additional refinement self.nearfield_tolerance = self._get_xml_entry('nearfield_tolerance', 100, int) self.routed_area_tolerance = self._get_xml_entry('routed_area_tolerance', 100, int) self.farfield_tolerance = self._get_xml_entry('farfield_tolerance', 300, int) self.min_farfield_order = self._get_xml_entry('min_farfield_order', 2, int) self.min_nearfield_order = self._get_xml_entry('min_nearfield_order', 1, int) self.min_routed_area_order = self._get_xml_entry('min_routed_area_order', 1, int) self.min_nearfield_wb_size = self._get_xml_entry('min_nearfield_waterbody_size', 1.0, float) self.min_waterbody_size = self._get_xml_entry('min_waterbody_size', 1.0, float) self.min_farfield_wb_size = self._get_xml_entry('min_farfield_waterbody_size', self.min_waterbody_size, float) self.farfield_length_threshold = self._get_xml_entry('farfield_length_threshold', 0., float) self.routed_area_length_threshold = self._get_xml_entry('routed_area_length_threshold', 0., float) self.drop_intermittent = self.tf2flag(self._get_xml_entry('drop_intermittent', 'False')) self.drop_crossing = self.tf2flag(self._get_xml_entry('drop_crossing', 'False')) self.asum_thresh_ra = self._get_xml_entry('routed_area_arbolate_sum_threshold', 0., float) self.asum_thresh_nf = self._get_xml_entry('nearfield_arbolate_sum_threshold', 0., float) self.asum_thresh_ff = self._get_xml_entry('farfield_arbolate_sum_threshold', 0., float) # NHD files self.flowlines = self._get_xml_entry('flowlines', [], str, raise_error=True) self.elevslope = self._get_xml_entry('elevslope', [], str, raise_error=True) self.PlusFlowVAA = self._get_xml_entry('PlusFlowVAA', [], str, raise_error=True) self.waterbodies = self._get_xml_entry('waterbodies', [], str, raise_error=True) # columns to retain in NHD files (when joining to GIS lines) # Note: may need to add method to handle case discrepancies self.flowlines_cols = ['COMID', 'FCODE', 'FDATE', 'FLOWDIR', 'FTYPE', 'GNIS_ID', 'GNIS_NAME', 'LENGTHKM', 'REACHCODE', 'RESOLUTION', 'WBAREACOMI', 'geometry'] self.flowlines_cols_dtypes = {'COMID': self.int_dtype, 'FCODE': self.int_dtype, 'FDATE': str, 'FLOWDIR': str, 'FTYPE': str, 'GNIS_ID': self.int_dtype, 'GNIS_NAME': str, 'LENGTHKM': float, 'REACHCODE': str, 'RESOLUTION': str, 'WBAREACOMI': self.int_dtype, 'geometry': object} self.elevslope_cols = ['MINELEVSMO', 'MAXELEVSMO'] self.elevslope_dtypes = {'MINELEVSMO': float, 'MAXELEVSMO': float} self.pfvaa_cols = ['ArbolateSu', 'Hydroseq', 'DnHydroseq', 'StreamOrde'] self.pfvaa_cols_dtypes = {'ArbolateSu': float, 'Hydroseq': self.int_dtype, 'DnHydroseq': self.int_dtype, 'StreamOrde': np.int64} self.wb_cols = ['AREASQKM', 'COMID', 'ELEVATION', 'FCODE', 'FDATE', 'FTYPE', 'GNIS_ID', 'GNIS_NAME', 'REACHCODE', 'RESOLUTION', 'geometry'] self.wb_cols_dtypes = {'AREASQKM': float, 'COMID': self.int_dtype, 'ELEVATION': float, 'FCODE': self.int_dtype, 'FDATE': str, 'FTYPE': str, 'GNIS_ID': self.int_dtype, 'GNIS_NAME': str, 'REACHCODE': str, 'RESOLUTION': str, 'geometry': object} # could do away with above and have one dtypes list self.dtypes.update(self.flowlines_cols_dtypes) self.dtypes.update(self.elevslope_dtypes) self.dtypes.update(self.pfvaa_cols_dtypes) self.dtypes.update(self.wb_cols_dtypes) # preprocessed files self.DEM = self._get_xml_entry('DEM', None) self.elevs_field = self._get_xml_entry('elevs_field', 'elev') self.DEM_zmult = self._get_xml_entry('DEM_zmult', 1.0, float) self.flowlines_clipped = self._get_xml_entry('flowlines_clipped', 'preprocessed/flowlines_clipped.shp', relative_filepath=True) self.waterbodies_clipped = self._get_xml_entry('waterbodies_clipped', 'preprocessed/waterbodies_clipped.shp', relative_filepath=True) self.routed_mp = self._get_xml_entry('routed_area_multipolygon', 'preprocessed/ra_cutout.shp', relative_filepath=True) self.farfield_mp = self._get_xml_entry('farfield_multipolygon', 'preprocessed/ff_cutout.shp', relative_filepath=True) self.preprocessed_lines = self._get_xml_entry('preprocessed_lines', 'preprocessed/lines.shp', relative_filepath=True) self.preprocdir = os.path.split(self.flowlines_clipped)[0] self.wb_centroids_w_elevations = self.waterbodies_clipped[ :-4] + '_points.shp' # elevations extracted during preprocessing routine self.elevs_field = 'DEM' # field in wb_centroids_w_elevations containing elevations # outputs self.outfile_basename = os.path.join(self._lsmaker_config_file_path, inpars.findall('.//outfile_basename')[0].text) self.error_reporting = os.path.join(self._lsmaker_config_file_path, inpars.findall('.//error_reporting')[0].text) # attributes self.df = pd.DataFrame() # working dataframe for translating NHDPlus data to linesink strings self.lss = pd.DataFrame() # dataframe of GFLOW linesink strings (with attributes) self.outsegs = pd.DataFrame() self.confluences = pd.DataFrame()
[docs] @classmethod def load_configuration(self, infile): """Load Linesink-maker YAML configuration into a dictionary. Parameters ---------- infile : str or pathlike Linesink-maker configuration file in YAML format. Returns ------- cfg : dict Configuration dictionary """ # read in the user-specified configuration with open(infile) as src: cfg = yaml.load(src, Loader=yaml.Loader) cfg['filename'] = os.path.split(os.path.abspath(infile))[0] return cfg
[docs] def set_configuration_from_yaml(self, infile): """Set the Linesink-maker configuration from a YAML file. Parameters ---------- infile : str or pathlike Linesink-maker configuration file in YAML format. """ cfg = self.load_configuration(infile) self.set_configuration(cfg)
[docs] def set_configuration(self, cfg): """Set the Linesink-maker configuration from a configuration dictionary. Parameters ---------- cfg : dict Configuration dictionary, as produced by the LinesinkData.set_configuration_from_yaml method. """ # read in the default configuration defaults_file = os.path.join(os.path.split(__file__)[0], 'default_settings.yml') with open(defaults_file) as src: defaults = yaml.load(src, Loader=yaml.Loader) self._lsmaker_config_file_path = Path(cfg['filename']) # configuration dictionary self.cfg = cfg # configuration file blocks filepaths_to_make_abs = {'outfile_basename'} for blockname, block in defaults.items(): # entries within blocks for key, default in defaults[blockname].items(): # try to detect files and make their paths absolute entry = self.cfg.get(blockname, {}).get(key, default) if isinstance(entry, str): file_abspath = self._lsmaker_config_file_path / entry if file_abspath.exists() or key in filepaths_to_make_abs: entry = str(file_abspath) elif isinstance(entry, list): new_entry = [] for item in entry: if isinstance(item, str): file_abspath = self._lsmaker_config_file_path / item if file_abspath.exists() or item in filepaths_to_make_abs: item = str(file_abspath) new_entry.append(item) entry = new_entry self.__dict__[key] = entry # setup the working directory (default to current directory) self.path = os.path.abspath(self.__dict__.pop('working_dir')) self.lmbda = np.sqrt(self.k * self.H * self.resistance) self.zmult = 0.03280839895013123 if self.ComputationalUnits.lower() == 'feet' else 0.01 # get the projection file and crs from either the nearfield(s) or routed area if self.prj is None: for filename in self.nearfield, self.routed_area: # extra logic to handle multiple nearfields fnames = [] if filename is not None: if isinstance(filename, dict): for fname, tol in filename.items(): fnames.append(self._lsmaker_config_file_path / fname) elif not isinstance(filename, str): for fname in filename: fnames.append(fname) else: fnames.append(filename) for fname in fnames: fname = Path(fname) prjfile = fname.with_suffix('.pfj') if prjfile.exists(): self.prj = prjfile self.crs = gisutils.get_shapefile_crs(prjfile) else: #self.crs_str = gisutils.get_proj_str(self.prj) self.crs = gisutils.get_shapefile_crs(self.prj) if self.crs is None or self.prj is None: msg = ("Invalid projection file or projection file not found: {}. \ Specify a valid ESRI projection file under the \ prj: configuration file key".format(self.prj)) raise ValueError(msg) # NHDPlus files for variable in 'flowlines', 'elevslope', 'PlusFlowVAA', 'waterbodies': filename = self.__dict__[variable] if filename is None: raise KeyError('Nothing specified for {} in the configuration file!'.format(variable)) elif not os.path.exists(filename): raise ValueError('file not found: {}'.format(filename)) # preprocessed files self.preprocdir = os.path.split(self.flowlines_clipped)[0] self.wb_centroids_w_elevations = self.waterbodies_clipped[ :-4] + '_points.shp' # elevations extracted during preprocessing routine self.elevs_field = 'DEM' # field in wb_centroids_w_elevations containing elevations
def _parse_xml_text(self, findall_result, dtype, relative_filepath=False): # handle either strings or lxml Elements txt = getattr(findall_result, 'text', findall_result) if dtype == str: # check if a file exists relative to config file path # if so, change the file path to be absolute file_abspath = os.path.join(self._lsmaker_config_file_path, txt) if os.path.exists(file_abspath) or relative_filepath: txt = file_abspath return dtype(txt) if txt is not None else None def _get_xml_entry(self, XMLentry, default, dtype=str, relative_filepath=False, raise_error=False): try: tmp = self.inpars.findall('.//{}'.format(XMLentry)) if len(tmp) == 0: if not raise_error and not relative_filepath: return default elif not raise_error and relative_filepath: tmp = [default] if not isinstance(default, list) else default else: raise ValueError('Nothing specified for {} in input XML file!'.format(XMLentry)) if len(tmp) == 1: # and not isinstance(default, list): return self._parse_xml_text(tmp[0], dtype, relative_filepath=relative_filepath) elif len(tmp) >= 1: return [self._parse_xml_text(s, dtype, relative_filepath=relative_filepath) for s in tmp] #return [dtype(s.text) for s in tmp] else: raise Exception() except: if not raise_error: return default else: raise ValueError('Nothing specified for {} in input XML file!'.format(XMLentry)) def _enforce_dtypes(self, df): """Ensure that dataframe column dtypes are correct.""" for c in df.columns: dtype = self.dtypes.get(c, df[c].dtype) try: df[c] = df[c].astype(dtype) except ValueError: # handle Nonetype values in some NHDPlus fields j=2 continue def _get_wb_elevations(self, wb_df): # (the centroids of some lakes might not be in the lake itself) wb_points = [wb.centroid if wb.centroid.within(wb) else _get_random_point_in_polygon(wb) for wb in wb_df.geometry.tolist()] # reproject the representative points into lat lon wb_points_ll = gisutils.project(wb_points, self.crs, "epsg:4269") wb_elevations = get_elevations_from_epqs(wb_points_ll, units=self.ComputationalUnits) # add in elevations from waterbodies in stream network return pd.DataFrame({'COMID': wb_df.COMID.values, self.elevs_field: wb_elevations, 'geometry': wb_points})
[docs] def load_poly(self, shapefile, dest_crs=None): """Load a polygon shapefile and return a multipolygon of multiple features, or single polygon of one feature, projected in the destination coordinate system.""" if dest_crs is None: dest_crs = self.crs else: dest_crs = gisutils.get_authority_crs(dest_crs) print('reading {}...'.format(shapefile)) gdf = gpd.read_file(shapefile) if gdf.crs != dest_crs: gdf.to_crs(dest_crs, inplace=True) if len(gdf) > 1: return MultiPolygon(gdf.geometry.tolist()) return gdf.geometry.values[0]
[docs] def tf2flag(self, intxt): # converts text written in XML file to True or False flag if intxt.lower() == 'true': return True elif intxt.lower() == 'none': return else: return False
[docs] def set_crs(self, epsg=None, proj_str=None, prjfile=None): """Set the projected coordinate reference system, and the BasemapUnits. If no arguments are supplied, default to existing BasemapUnits. Parameters ---------- epsg: int EPSG code identifying Coordinate Reference System (CRS) for features in df.geometry (optional) proj_str: str proj_str string identifying CRS for features in df.geometry (optional) prjfile: str File path to projection (.prj) file identifying CRS for features in df.geometry (optional) Returns ------- sets the LinesinkData.pyproj_crs attribute. """ args = any(arg for arg in (epsg, proj_str, prjfile)) pyproj_crs = None if self.BasemapUnits is None or args: if epsg is not None: pyproj_crs = pyproj.CRS.from_epsg(epsg) elif proj_str is not None: pyproj_crs = pyproj.CRS.from_string(proj_str) elif prjfile is not None: with open(prjfile) as src: wkt = src.read() pyproj_crs = pyproj.CRS.from_wkt(wkt) # if possible, have pyproj try to find the closest # authority name and code matching the crs # so that input from epsg codes, proj strings, and prjfiles # results in equal pyproj_crs instances if pyproj_crs is not None: try: authority = pyproj_crs.to_authority() if authority is not None: self.pyproj_crs = pyproj.CRS.from_user_input(pyproj_crs.to_authority()) else: self.pyproj_crs = pyproj_crs except: pass units = pyproj_crs.axis_info[0].unit_name translate_units = {'metre': 'meters'} self.BasemapUnits = translate_units.get(units, units)
[docs] def preprocess(self, save=True): """ This method associates attribute information in the NHDPlus PlusFlowVAA and Elevslope tables, and the model domain configuration (nearfield, farfield, and any other polygon areas) with the NHDPlus Flowlines and Waterbodies datasets. The following edits are made to the Flowlines and waterbodies: * remove farfield streams lower than <min_farfield_order> * remove waterbodies that aren't lakes, and lakes smaller than <min_waterbody_size> * convert lakes from polygons to lines; merge the lakes with the with flowlines Parameters: ----------- save: True/False Saves the preprocessed dataset to a shapefile specified by <preprocessed_lines> in the XML input file """ if self.routed_area is None: self.routed_area = self.nearfield if not isinstance(self.nearfield, str) or isinstance(self.nearfield, Path): raise ValueError("Multiple nearfields option requires specification of an enclosing routed area.") self.routed_area_tolerance = self.nearfield_tolerance if self.nearfield is None and self.routed_area is None: raise InputFileMissing('Need to supply shapefile of routed area or nearfield.') if self.farfield is None: print(('\nNo farfield shapefile supplied.\n' 'Creating farfield using buffer of {:.1f} {} around routed area.\n' .format(self.farfield_buffer, self.BasemapUnits))) modelareafile = fiona.open(self.routed_area) nfarea = shape(modelareafile[0]['geometry']) modelarea_farfield = nfarea.buffer(self.farfield_buffer) self.farfield = self.routed_area[:-4] + '_ff.shp' output = fiona.open(self.farfield, 'w', crs=modelareafile.crs, schema=modelareafile.schema, driver=modelareafile.driver) output.write({'properties': modelareafile[0]['properties'], 'geometry': mapping(modelarea_farfield)}) output.close() # make the output directory if it doesn't exist yet if len(self.preprocdir) > 0 and not os.path.isdir(self.preprocdir): os.makedirs(self.preprocdir) # make projection file that is independent of any shapefile if os.path.exists('GFLOW.prj'): os.remove('GFLOW.prj') if self.prj is not None and os.path.exists(self.prj): shutil.copy(self.prj, 'GFLOW.prj') self.prj = 'GFLOW.prj' print('clipping and reprojecting input datasets...') # (zero buffers can clean up self-intersections in hand drawn polygons) if isinstance(self.nearfield, dict): gdfs = [] for fname, tol in self.nearfield.items(): gdf = gpd.read_file(self._lsmaker_config_file_path / fname) gdf = gpd.GeoDataFrame(gdf['geometry']).to_crs(self.crs) gdf['geometry'] = gdf['geometry'].buffer(0) gdf['filename'] = fname gdf['tol'] = tol if not gdf['geometry'].is_valid.all(): raise ValueError(f"Invalid nearfield polygon(s): {fname}") gdfs.append(gdf) self.nf = pd.concat(gdfs) elif not isinstance(self.nearfield, str) or isinstance(self.nearfield, Path): gdfs = [] for fname in self.nearfield: gdf = gpd.read_file(self._lsmaker_config_file_path / fname) gdf = gpd.GeoDataFrame(gdf['geometry']).to_crs(self.crs) gdf['geometry'] = gdf['geometry'].buffer(0) gdf['filename'] = fname if not gdf['geometry'].is_valid.all(): raise ValueError(f"Invalid nearfield polygon(s): {fname}") gdfs.append(gdf) self.nf = pd.concat(gdfs) if self.nearfield_tolerance is None: raise ValueError( "Specification of multiple nearfields in a " "list requires a separate 'nearfield_tolerance' entry. ") self.nf['tol'] = self.nearfield_tolerance else: self.nf = gpd.read_file(self.nearfield) self.nf['geometry'] = self.nf['geometry'].buffer(0) self.nf.to_crs(self.crs, inplace=True) if not self.nf.geometry.is_valid.all(): raise ValueError(f"Invalid nearfield polygon(s): {self.nearfield}") if self.nearfield_tolerance is None: raise ValueError( "No 'nearfield_tolerance' entry") self.nf['tol'] = self.nearfield_tolerance self.ra = self.load_poly(self.routed_area).buffer(0) self.ff = self.load_poly(self.farfield).buffer(0) for p in [self.ra, self.ff]: assert p.is_valid, 'Invalid polygon' for attr, shapefiles in list({'fl': self.flowlines, 'wb': self.waterbodies}.items()): # all flowlines and waterbodies must be in same coordinate system # sfrmaker preproc also uses crs from first file in list if isinstance(shapefiles, list): shapefile = shapefiles[0] else: shapefile = shapefiles shp_crs = gisutils.get_shapefile_crs(shapefile) # get bounding box of model area in nhd crs to speeding reading in data via filter bounds = gisutils.project([self.ff], self.crs, shp_crs) bounds = bounds[0].bounds self.__dict__[attr] = gisutils.shp2df(shapefiles, filter=bounds) # if NHD features not in model area coodinate system, reproject if shp_crs != self.crs: self.__dict__[attr]['geometry'] = gisutils.project(self.__dict__[attr].geometry.tolist(), shp_crs, self.crs) # 1816107 # for now, take intersection (don't truncate flowlines at farfield boundary) print('clipping to {}...'.format(self.farfield)) intersects = np.array([l.intersects(self.ff) for l in self.__dict__[attr].geometry]) self.__dict__[attr] = self.__dict__[attr][intersects].copy() if self.clip_farfield: print('truncating waterbodies at farfield boundary...') # get rid of any islands in the process wbgeoms = self.wb.geometry # valid = np.array([p.is_valid for p in wbgeoms]) # if np.any(~valid): # comids = self.wb.loc[~valid, 'COMID'] # print('The following waterbodies result in invalid polygons when intersected with the domain:') # for c in comids: # print(c) wbgeoms = [Polygon(g.exterior).buffer(0) for g in self.wb.geometry] wbgeoms = [p.intersection(self.ff).buffer(0) for p in wbgeoms] self.wb['geometry'] = wbgeoms # for multipolygons, retain largest part geoms = self.wb.geometry.values for i, g in enumerate(geoms): if g.type == 'MultiPolygon': geoms[i] = np.array(g.geoms)[np.argsort([p.area for p in g.geoms])][-1] self.wb['geometry'] = geoms # for now, write out preprocessed data to maintain structure with arcpy for df in ['fl', 'wb']: if len(self.__dict__[df]) == 0: raise EmptyDataFrame() gisutils.df2shp(self.fl, self.flowlines_clipped, crs=self.crs) gisutils.df2shp(self.wb, self.waterbodies_clipped, crs=self.crs) print('\nmaking donut polygon of farfield (with routed area removed)...') ff_donut_poly = self.ff.difference(self.ra) gdf = gpd.GeoDataFrame({'id': [0], 'geometry': ff_donut_poly}, crs=self.crs) gdf.to_file(self.farfield_mp, index=False) print(('wrote {}'.format(self.farfield_mp))) if self.routed_area is not None and self.routed_area != self.nearfield: print('\nmaking donut polygon of routed area (with nearfield area removed)...') if not self.nf.within(self.ra).all(): raise ValueError('Nearfield area must be within routed area!') # first convert nearfield area(s) to a single MultiPolygon # (because we only want one 'routed area' Polygon # with the nearfield hole(s) cut out) nearfield_mp = MultiPolygon(self.nf.geometry.tolist()) donut = gpd.GeoDataFrame({ 'geometry': [self.ra.difference(nearfield_mp)]}, crs=self.crs) donut.to_file(self.routed_mp, index=False) routed_area_poly = donut['geometry'].values[0] print(('wrote {}'.format(self.routed_mp))) else: routed_area_poly = self.nf.geometry[0] # drop waterbodies that aren't lakes bigger than min size min_size = np.min([self.min_nearfield_wb_size, self.min_waterbody_size, self.min_farfield_wb_size]) self.wb = self.wb.loc[(self.wb.FTYPE == 'LakePond') & (self.wb.AREASQKM > min_size)].copy() print('\ngetting elevations for waterbodies not in the stream network') isolated_wb = self.wb isolated_wb_comids = isolated_wb.COMID.tolist() if not os.path.exists(self.wb_centroids_w_elevations): # get elevations for all waterbodies for the time being. # otherwise waterbodies associated with first-order streams may be dropped from farfield, # but still waterbodies list, causing a key error in the lakes setup # these lakes probably should be left in the farfield unless they are explicitly not wanted ''' # (the centroids of some lakes might not be in the lake itself) wb_points = [wb.centroid if wb.centroid.within(wb) else _get_random_point_in_polygon(wb) for wb in isolated_wb.geometry.tolist()] # reproject the representative points into lat lon wb_points_ll = reproject(wb_points, self.crs_str, "+init=epsg:4269") wb_elevations = get_elevations_from_epqs(wb_points_ll, units=self.ComputationalUnits) # add in elevations from waterbodies in stream network wb_points_df = pd.DataFrame({'COMID': isolated_wb_comids, self.elevs_field: wb_elevations, 'geometry': wb_points}) ''' wb_points_df = self._get_wb_elevations(isolated_wb) gisutils.df2shp(wb_points_df, self.wb_centroids_w_elevations, crs=self.crs) else: print('\nreading elevations from {}...'.format(self.wb_centroids_w_elevations)) wb_points_df = gisutils.shp2df(self.wb_centroids_w_elevations) toget = set(isolated_wb_comids).difference(set(wb_points_df.COMID.tolist())) if len(toget) > 0: isolated_wb = self.wb.loc[self.wb.COMID.isin(toget)].copy() wb_points_df2 = self._get_wb_elevations(isolated_wb) wb_points_df = pd.concat([wb_points_df, wb_points_df2]) gisutils.df2shp(wb_points_df, self.wb_centroids_w_elevations, crs=self.crs) # open error reporting file with open(self.error_reporting, 'a') as efp: efp.write('\nPreprocessing...\n') print('\nAssembling input...') # read linework shapefile into pandas dataframe df = gisutils.shp2df(self.flowlines_clipped, index='COMID').drop_duplicates('COMID') df.index = df.index.astype(float).astype(int).astype(self.int_dtype) df.drop([c for c in df.columns if c.lower() not in [cc.lower() for cc in self.flowlines_cols]], axis=1, inplace=True) # might want to consider enforcing integer index here if strings cause problems #clipto = df.index.tolist() elevs = gisutils.shp2df(self.elevslope, index='COMID') # cast index to str or int (int_dtype), from float, int # or string representation of float or int elevs.index = elevs.index.astype(float).astype(int).astype(self.int_dtype) if not any(elevs.index.isin(df.index)): raise ValueError(f"No attributes COMIDs in {self.elevslope} match any flowlines!\n" "Check that datatypes in the two files are consistent, " "and that the files are for the same area.") elevs = elevs.loc[elevs.index.isin(df.index)] pfvaa = gisutils.shp2df(self.PlusFlowVAA, index='COMID') #, clipto=clipto) pfvaa.index = pfvaa.index.astype(float).astype(int).astype(self.int_dtype) if not any(pfvaa.index.isin(df.index)): raise ValueError(f"No attributes COMIDs in {self.PlusFlowVAA} match any flowlines!\n" "Check that datatypes in the two files are consistent, " "and that the files are for the same area.") pfvaa = pfvaa.loc[pfvaa.index.isin(df.index)] wbs = gisutils.shp2df(self.waterbodies_clipped, index='COMID').drop_duplicates('COMID') wbs.index = wbs.index.astype(float).astype(int).astype(self.int_dtype) wbs.drop([c for c in wbs.columns if c.lower() not in [cc.lower() for cc in self.wb_cols]], axis=1, inplace=True) self._enforce_dtypes(wbs) # check for MultiLineStrings / MultiPolygons and drop them (these are features that were fragmented by the boundaries) mls = [i for i in df.index if 'Multi' in df.loc[i, 'geometry'].type] df = df.drop(mls, axis=0) mps = [i for i in wbs.index if 'Multi' in wbs.loc[i, 'geometry'].type] wbs = wbs.drop(mps, axis=0) wbs = gpd.GeoDataFrame(wbs, crs=self.crs) # join NHD tables to lines df = df.join(elevs[self.elevslope_cols], how='inner', lsuffix='1') df = df.join(pfvaa[self.pfvaa_cols], how='inner', lsuffix='1') self._enforce_dtypes(df) df = gpd.GeoDataFrame(df, crs=self.crs) # routed_area_poly is the routed area, excluding any hole(s) # for one of more nearfield areas print('\nidentifying farfield and nearfield LinesinkData...') df['farfield'] = [line.intersects(ff_donut_poly) and not line.intersects(routed_area_poly) for line in df.geometry] wbs['farfield'] = [poly.intersects(ff_donut_poly) and not poly.intersects(routed_area_poly) for poly in wbs.geometry] df['routed'] = [line.intersects(routed_area_poly) for line in df.geometry] wbs['routed'] = [poly.intersects(routed_area_poly) for poly in wbs.geometry] #df['nearfield'] = [line.intersects(routed_area_poly) for line in df.geometry] df = df.sjoin(self.nf[['tol', 'geometry']], how='left', predicate='intersects') # non NaN entries in the 'index_right' column indicate intersection # with those nearfield polygons (denoted by their index numbers) df['nearfield'] = ~df['index_right'].isna() #wbs['nearfield'] = [poly.intersects(routed_area_poly) for poly in wbs.geometry] wbs = wbs.sjoin(self.nf[['tol', 'geometry']], how='left', predicate='intersects') # non NaN entries in the 'index_right' column indicate intersection # with those nearfield polygons (denoted by their index numbers) wbs['nearfield'] = ~wbs['index_right'].isna() # drop any duplicates caused by lines intersecting more than one nearfield df = df.loc[~df.index.duplicated(keep='last')].copy() wbs = wbs.loc[~wbs.index.duplicated(keep='last')].copy() # assign farfield and routed area tolerances df.loc[df['farfield'], 'tol'] = self.farfield_tolerance df.loc[df['routed'], 'tol'] = self.routed_area_tolerance wbs.loc[wbs['farfield'], 'tol'] = self.farfield_tolerance wbs.loc[wbs['routed'], 'tol'] = self.routed_area_tolerance assert not df['tol'].isna().any(),\ "Something went wrong; some lines don't have a simplification tolerance" assert not wbs['tol'].isna().any(),\ "Something went wrong; some lines don't have a simplification tolerance" if self.asum_thresh_ra > 0.: print('\nremoving streams in routed area with arbolate sums < {:.2f}'.format(self.asum_thresh_ra)) # retain all streams in the farfield or in the routed area with arbolate sum > threshold criteria = ~df.routed.values | df.routed.values & (df.ArbolateSu.values > self.asum_thresh_ra) df = df.loc[criteria].copy() if self.asum_thresh_nf > 0.: print('\nremoving streams in nearfield with arbolate sums < {:.2f}'.format(self.asum_thresh_nf)) criteria = ~df.nearfield.values | df.nearfield.values & (df.ArbolateSu.values > self.asum_thresh_nf) df = df.loc[criteria].copy() if self.asum_thresh_ff > 0.: print('\nremoving streams in farfield with arbolate sums < {:.2f}'.format(self.asum_thresh_ff)) criteria = ~df.farfield.values | df.farfield.values & (df.ArbolateSu.values > self.asum_thresh_ff) df = df.loc[criteria].copy() if self.min_nearfield_order > 1.: print('removing nearfield streams lower than {} order...'.format(self.min_nearfield_order)) # retain all streams in the nearfield of order > min_farfield_order criteria = ~df.nearfield.values | (df.nearfield.values & (df.StreamOrde.values >= self.min_nearfield_order)) df = df[criteria].copy() if self.min_routed_area_order > 1.: print('removing streams in the routed area of lower than {} order...'.format(self.min_routed_area_order)) # retain all streams in the nearfield of order > min_farfield_order routed = df.routed.values & ~df.nearfield.values criteria = ~routed | (routed & (df.StreamOrde.values >= self.min_routed_area_order)) df = df[criteria].copy() if self.min_farfield_order > 1.: print('removing farfield streams lower than {} order...'.format(self.min_farfield_order)) # retain all streams in the nearfield or in the farfield and of order > min_farfield_order criteria = ~df.farfield.values | (df.farfield.values & (df.StreamOrde.values >= self.min_farfield_order)) df = df[criteria].copy() farfield_retain = ~(df.farfield.values & (df.FCODE == self.fcodes['Intermittent']).values) # drop intermittent streams from farfield if self.drop_intermittent: print('removing intermittent streams from routed area outside of nearfield...') # retain all streams in the nearfield or in the farfield and of order > min_farfield_order farfield_retain = farfield_retain & ~(df.routed.values & ~df.nearfield.values & (df.FCODE == self.fcodes['Intermittent']).values) df = df[farfield_retain].copy() print('dropping waterbodies from routed area that are not lakes larger than {}...'.format(self.min_waterbody_size)) nearfield_wbs = wbs.nearfield.values & (wbs.AREASQKM > self.min_nearfield_wb_size) & (wbs.FTYPE == 'LakePond') routedarea_wbs = wbs.routed.values & (wbs.AREASQKM > self.min_waterbody_size) & (wbs.FTYPE == 'LakePond') farfield_wbs = wbs.farfield.values & (wbs.AREASQKM > self.min_farfield_wb_size) & (wbs.FTYPE == 'LakePond') print('dropping waterbodies from nearfield that are not lakes larger than {}...\n' 'dropping waterbodies from farfield that are not lakes larger than {}...'.format(self.min_waterbody_size, self.min_farfield_wb_size)) wbs = wbs[nearfield_wbs | routedarea_wbs | farfield_wbs] print('merging waterbodies with coincident boundaries...') dropped = [] for wb_comid in wbs.index: # skipped waterbodies that have already been merged if wb_comid in dropped: continue wb_geometry = wbs.geometry[wb_comid] overlapping = wbs.loc[[wb_geometry.intersects(r) for r in wbs.geometry]] basering_comid = overlapping.sort_values('FTYPE').index[0] # sort to prioritize features with names # two or more shapes in overlapping signifies a coincident boundary if len(overlapping) > 1: merged = unary_union([r for r in overlapping.geometry]) # multipolygons will result if the two polygons only have a single point in common if merged.type == 'MultiPolygon': continue wbs.loc[basering_comid, 'geometry'] = merged todrop = [wbc for wbc in overlapping.index if wbc != basering_comid] dropped += todrop wbs = wbs.drop(todrop, axis=0) # only keep merged feature; drop others from index # replace references to dropped waterbody in lines df.loc[df.WBAREACOMI.isin(todrop), 'WBAREACOMI'] = basering_comid # swap out polygons in lake geometry column with the linear rings that make up their exteriors print('converting lake exterior polygons to lines...') wbs['geometry'] = [LineString(g.exterior) for g in wbs.geometry] wbs['waterbody'] = np.array([True] * len(wbs), dtype=bool) # boolean variable indicate whether feature is waterbody print('merging flowline and waterbody datasets...') df['waterbody'] = np.array([False] * len(df), dtype=bool) df = pd.concat([df, wbs]) df.COMID = df.index ''' if self.clip_farfield: print('clipping farfield features...') df.loc[df.farfield.values, 'geometry'] = [g.intersection(ffg) for g in df.geometry[df.farfield.values]] ''' print('\nDone with preprocessing.') if save: gisutils.df2shp(df, self.preprocessed_lines, crs=self.crs) self.df = df
[docs] def simplify_lines(self, nearfield_tolerance=None, routed_area_tolerance=None, farfield_tolerance=None, nearfield_refinement={}): """Reduces the number of vertices in the GIS linework representing streams and lakes, to within specified tolerances. The tolerance values represent the maximum distance in the coordinate system units that the simplified feature can deviate from the original feature. Parameters ---------- nearfield_tolerance : float Tolerance for the area representing the model nearfield farfield_tolerance : float Tolerance for the area representing the model farfield Returns ------- df : DataFrame A copy of the df attribute with a 'ls_geom' column of simplified geometries, and a 'ls_coords' column containing lists of coordinate tuples defining each simplified line. """ if not hasattr(self, 'df'): print('No dataframe attribute for LinesinkData instance. Run preprocess first.') return if nearfield_tolerance is None: nearfield_tolerance = self.nearfield_tolerance routed_area_tolerance = self.routed_area_tolerance farfield_tolerance = self.farfield_tolerance #if isinstance(self.df.farfield.iloc[0], str): # self.df.loc[:, 'farfield'] = [True if f.lower() == 'true' else False for f in self.df.farfield] print('simplifying NHD linework geometries...') # simplify line and waterbody geometries #(see http://toblerity.org/shapely/manual.html) df = self.df[['farfield', 'routed', 'nearfield', 'tol', 'geometry']].copy() ls_geom = np.array([LineString()] * len(df), dtype=object) tols = {'nf': nearfield_tolerance, 'ra': routed_area_tolerance, 'ff': farfield_tolerance} simplification = {'nf': df.nearfield.values, 'ra': df.routed.values & ~df.nearfield.values & ~df.farfield.values, 'ff': df.farfield.values} [simplification.pop(k) for k, v in tols.items() if v is None] for k, within in simplification.items(): # simplify the LinesinkData in the domain; add simplified geometries to global geometry column # assign geometries to numpy array first and then to df (had trouble assigning with pandas) ls_geom[within] = [g.simplify(tols[k]) for g in df.loc[within, 'geometry'].tolist()] df['ls_geom'] = ls_geom # add columns for additional nearfield refinement areas for shp in list(nearfield_refinement.keys()): if shp not in self.refinement_areas: self.refinement_areas.append(shp) area_name = os.path.split(shp)[-1][:-4] with fiona.open(shp) as src: poly = shape(next(iter(src))['geometry']) #poly = shape(fiona.open(shp).next()['geometry']) df[area_name] = [g.intersects(poly) for g in df.geometry] # add column of lists, containing linesink coordinates df['ls_coords'] = [list(g.coords) for g in df.ls_geom] assert np.all(np.array([len(c) for c in df.ls_coords]) > 0) # shouldn't be an empty coordinates return df
[docs] def prototype(self, nftol=[10, 50, 100, 200, 500], fftol=500): """Function to compare multiple simplification distance tolerance values for the model nearfield. Parameters: ----------- nftol : list Contains the tolerance values to be compared. fftol : numeric Single tolerance value to be used for the farfield in all comparisons. Returns: -------- A new directory called "prototypes" is made """ if not os.path.isdir('prototypes'): os.makedirs('prototypes') if isinstance(fftol, float) or isinstance(fftol, int): fftol = [fftol] * len(nftol) nlines = [] for i, tol in enumerate(nftol): df = self.simplify_lines(nearfield_tolerance=tol, farfield_tolerance=fftol[i]) # count the number of lines with distance tolerance nlines.append(np.sum([len(l) for l in df.ls_coords])) # make a shapefile of the simplified lines with nearfield_tol=tol df.drop(['ls_coords', 'geometry'], axis=1, inplace=True) outshp = 'prototypes/' + self.outfile_basename + '_dis_tol_{}.shp'.format(tol) gisutils.df2shp(df, outshp, geo_column='ls_geom', crs=self.crs) plt.figure() plt.plot(nftol, nlines) plt.xlabel('Distance tolerance') plt.ylabel('Number of lines') plt.savefig(self.outfile_basename + 'tol_vs_nlines.pdf')
[docs] def adjust_zero_gradient(self, df, increment=0.01): dg = self.dg dg.df = df comids0 = dg.check4zero_gradient(log=False) if len(comids0) > 0: if len(self.confluences) == 0: self.df = df self.map_confluences() if len(self.outsegs) == 0: self.df = df self.map_outsegs() with open(self.error_reporting, 'a') as efp: efp.write('\nzero-gradient adjustments:\n') efp.write('comid, old_elevmax, old_elevmin, new_elevmax, new_elevmin, downcomid\n') print("adjusting elevations for comids with zero-gradient...") for comid in comids0: outsegs = [o for o in self.outsegs.loc[comid].values if int(o) > 0] for i, o in enumerate(outsegs): if i == len(outsegs) - 1: oo = 0 else: oo = outsegs[i + 1] minElev, maxElev = df.loc[o, 'minElev'], df.loc[o, 'maxElev'] # test if current segment has flat or negative gradient if minElev >= maxElev: minElev = maxElev - increment df.loc[o, 'minElev'] = minElev efp.write('{}, {:.2f}, {:.2f}, {:.2f}, {}\n'.format(o, maxElev, minElev + increment, maxElev, minElev, oo)) # test if next segment is now higher if int(oo) > 0 and df.loc[oo, 'maxElev'] > minElev: efp.write('{}, {:.2f}, {:.2f}, {:.2f}, {}\n'.format(outsegs[i + 1], df.loc[outsegs[i + 1], 'maxElev'], df.loc[outsegs[i + 1], 'minElev'], minElev, df.loc[outsegs[i + 1], 'minElev'], oo)) df.loc[oo, 'maxElev'] = minElev else: break # check again for zero-gradient lines dg.df = df comids0 = dg.check4zero_gradient(log=False) if len(comids0) > 0: for c in comids0: efp.write('{}\n'.format(c)) print("\nWarning!, the following comids had zero gradients:\n{}".format(comids0)) print("routing for these was turned off. Elevations must be fixed manually.\n" \ "See {}".format(self.error_reporting)) return df
[docs] def drop_crossing_lines(self, df): dg = self.dg crosses = dg.check4crossing_lines()
[docs] def drop_duplicates(self, df): # loops or braids in NHD linework can result in duplicate lines after simplification # create column of line coordinates converted to strings df['ls_coords_str'] = [''.join(map(str, coords)) for coords in df.ls_coords] # identify duplicates; make common set of up and down comids for duplicates duplicates = np.unique(df.loc[df.duplicated('ls_coords_str'), 'ls_coords_str']) for dup in duplicates: alld = df[df.ls_coords_str == dup] upcomids = [] dncomid = [] for i, r in alld.iterrows(): upcomids += r.upcomids dncomid += r.dncomid upcomids = list(set(upcomids).difference({0, '0'})) dncomid = list(set(dncomid).difference({0, '0'})) keep_comid = alld.index[0] df.at[keep_comid, 'upcomids'] = upcomids df.at[keep_comid, 'dncomid'] = dncomid #df.set_value(keep_comid, 'upcomids', upcomids) #df.set_value(keep_comid, 'dncomid', dncomid) for u in upcomids: df.at[u, 'dncomid'] = [keep_comid] #df.set_value(u, 'dncomid', [keep_comid]) for d in dncomid: upids = set(df.loc[d, 'upcomids']).difference(set(alld.index[1:])) upids.add(alld.index[0]) df.at[d, 'upcomids'] = list(upids) #df.set_value(d, 'upcomids', list(upids)) df.drop(alld.index[1:], axis=0, inplace=True) # drop the duplicates (this may cause problems if multiple braids are routed to) #df = df.drop_duplicates('ls_coords_str') # drop rows from dataframe containing duplicates df = df.drop('ls_coords_str', axis=1) return df
[docs] def setup_linesink_lakes(self, df): # read in elevations for NHD waterbodies (from preprocessing routine; needed for isolated lakes) wb_elevs = gisutils.shp2df(self.wb_centroids_w_elevations, index='COMID', index_dtype=self.int_dtype).drop_duplicates('COMID') self._enforce_dtypes(wb_elevs) wb_elevs = wb_elevs[self.elevs_field] * self.DEM_zmult # identify lines that represent lakes # get elevations, up/downcomids, and total lengths for those lines # assign attributes to lakes, then drop the lines df['total_line_length'] = 0 # field to store total shoreline length of lakes for wb_comid in self.wblist: lines = df[df['WBAREACOMI'] == wb_comid] upcomids = [] dncomids = [] # isolated lakes have no overlapping lines and no routing if len(lines) == 0: df.loc[wb_comid, 'maxElev'] = wb_elevs[wb_comid] df.loc[wb_comid, 'minElev'] = wb_elevs[wb_comid] - 0.01 df.loc[wb_comid, 'routing'] = 0 else: df.loc[wb_comid, 'minElev'] = np.min(lines.minElev) df.loc[wb_comid, 'maxElev'] = np.min(lines.maxElev) # get upcomids and downcomid for lake, # by differencing all up/down comids for lines in lake, and comids in the lake upcomids = list(set([c for l in lines.upcomids for c in l]) - set(lines.index)) dncomids = list(set([c for l in lines.dncomid for c in l]) - set(lines.index)) # .at sets an entry for a single row/column location in a DataFrame # (to avoid confusion over specifying a single row but supplying a sequence) df.at[wb_comid, 'upcomids'] = upcomids df.at[wb_comid, 'dncomid'] = dncomids #df.set_value(wb_comid, 'upcomids', upcomids) #df.set_value(wb_comid, 'dncomid', dncomids) try: assert all([True if isinstance(comid, list) else False for comid in df.upcomids]) assert all([True if isinstance(comid, list) else False for comid in df.dncomid]) except: j=2 # make the lake the down-comid for the upcomids of the lake # (instead of the lines that represented the lake in the flowlines dataset) # do the same for the down-comid of the lake for u in [u for u in upcomids if int(u) > 0]: # exclude outlets df.at[u, 'dncomid'] = [wb_comid] #df.set_value(u, 'dncomid', [wb_comid]) for d in [d for d in dncomids if int(d) > 0]: df.at[d, 'upcomids'] = [wb_comid] #df.set_value(d, 'upcomids', [wb_comid]) try: assert all([True if isinstance(comid, list) else False for comid in df.upcomids]) assert all([True if isinstance(comid, list) else False for comid in df.dncomid]) except: j=2 ''' # update all up/dn comids in lines dataframe that reference the lines inside of the lakes # (replace those references with the comids for the lakes) for comid in lines.index: if comid == 937070193: j=2 # make the lake the down-comid for the upcomids of the lake # (instead of the lines that represented the lake in the flowlines dataset) df.loc[upcomids, 'dncomid'] = [wb_comid] df.loc[dncomids, 'upcomids'] = [wb_comid] df.loc[df.FTYPE != 'LakePond', 'dncomid'] = [[wb_comid if v == comid else v for v in l] for l in df[df.FTYPE != 'LakePond'].dncomid] df.loc[df.FTYPE != 'LakePond', 'upcomids'] = [[wb_comid if v == comid else v for v in l] for l in df[df.FTYPE != 'LakePond'].upcomids] ''' # get total length of lines representing lake (used later to estimate width) df.loc[wb_comid, 'total_line_length'] = np.sum(lines.LENGTHKM) # modifications to routed lakes #if df.loc[wb_comid, 'routing'] == 1: # enforce gradient in routed lakes; update elevations in downstream comids if df.loc[wb_comid, 'minElev'] == df.loc[wb_comid, 'maxElev']: df.loc[wb_comid, 'minElev'] -= 0.01 dnids = df.loc[wb_comid, 'dncomid'] for dnid in [d for d in dnids if int(d) > 0]: df.loc[dnid, 'maxElev'] -= 0.01 #df['dncomid'] = [[d] if not isinstance(d, list) else d for d in df.dncomid] #df['upcomids'] = [[u] if not isinstance(u, list) else u for u in df.upcomids] # move begining/end coordinate of linear ring representing lake to outlet location (to ensure correct routing) # some routed lakes may not have an outlet # do this for both routed and unrouted (farfield) lakes, so that the outlet line won't cross the lake # (only tributaries are tested for crossing in step below) lake_coords = uniquelist(df.loc[wb_comid, 'ls_coords']) if len(df.loc[wb_comid, 'dncomid']) > 0 and int(dncomids[0]) != 0: outlet_coords = df.loc[df.loc[wb_comid, 'dncomid'][0], 'ls_coords'][0] closest_ind = closest_vertex_ind(outlet_coords, lake_coords) lake_coords[closest_ind] = outlet_coords next_ind = closest_ind + 1 if closest_ind < (len(lake_coords) - 1) else 0 # for lakes without outlets, make the last coordinate the outlet so that it can be moved below else: outlet_coords = lake_coords[-1] next_ind = 0 inlet_coords = move_point_along_line(lake_coords[next_ind], outlet_coords, 1) new_coords = [inlet_coords] + lake_coords[next_ind:] + lake_coords[:next_ind] df.at[wb_comid, 'ls_coords'] = new_coords # make sure inlets/outlets don't cross lines representing lake wb_geom = LineString(new_coords) crosses = [c for c in upcomids if int(c) != 0 and LineString(df.loc[c, 'ls_coords']).crosses(wb_geom)] if len(crosses) > 0: for comid_crosses in crosses: if comid_crosses == '13103297': j=2 ls_coords = list(df.loc[comid_crosses, 'ls_coords']) # want to copy, to avoid modifying df # find the first intersection point with the lake # (for some reason, two very similar coordinates will be occasionally be returned by intersection) intersection = LineString(ls_coords).intersection(wb_geom) try: if intersection.type == 'MultiPoint': intersection_point = np.array([intersection.geoms[0].xy[0][0], intersection.geoms[0].xy[1][0]]) else: intersection_point = np.array([intersection.xy[0][0], intersection.xy[1][0]]) except: j=2 # sequentially drop last vertex from line until it no longer crosses the lake crossing = True while crossing: ls_coords.pop(-1) if len(ls_coords) < 2: break # need to test for intersection separately, # in case len(ls_coords) == 1 (can't make a LineString) elif not LineString(ls_coords).crosses(wb_geom): break # append new end vertex on line that is close to, but not coincident with lake diff = np.array(ls_coords[-1]) - intersection_point # make a new endpoint that is between the intersection and next to last new_endvert = tuple(intersection_point + np.sign(diff) * np.sqrt(self.nearfield_tolerance)) ls_coords.append(new_endvert) # Use df.at here to avoid error about setting a list # with a different length than existing list df.at[comid_crosses, 'ls_coords'] = ls_coords # drop the lines representing the lake from the lines dataframe df.drop(lines.index, axis=0, inplace=True) return df
[docs] def list_updown_comids(self, df): farfield = df.COMID[df.farfield].tolist() # record up and downstream comids for lines lines = [l for l in df.index if l not in self.wblist and l not in farfield] #df['dncomid'] = len(df)*[[]] #df['upcomids'] = len(df)*[[]] #df.loc[lines, 'dncomid'] = [list(df[df['Hydroseq'] == df.loc[i, 'DnHydroseq']].index) for i in lines] #df.loc[lines, 'upcomids'] = [list(df[df['DnHydroseq'] == df.loc[i, 'Hydroseq']].index) for i in lines] df['upcomids'] = [[]] * len(df) df['dncomid'] = [[]] * len(df) dncomid, upcomids = [], [] for l in lines: # set up/down comids that are not in the model domain to zero try: dncomid.append([d if d in lines else 0 for d in list(df[df['Hydroseq'] == df.loc[l, 'DnHydroseq']].index)]) upcomids.append([u if u in lines else 0 for u in list(df[df['DnHydroseq'] == df.loc[l, 'Hydroseq']].index)]) except: j=2 df.loc[lines, 'upcomids'] = np.array(upcomids, dtype=object) df.loc[lines, 'dncomid'] = np.array(dncomid, dtype=object) # enforce list datatype (apparently pandas flattens lists of lists len()=1 on assignment df['dncomid'] = [[d] if not isinstance(d, list) else d for d in df.dncomid] return df
[docs] def make_linesinks(self, reuse_preprocessed_lines=False, shp=None): with open(self.error_reporting, 'a') as efp: efp.write('\nMaking the lines...\n') if shp is None: shp = self.preprocessed_lines if reuse_preprocessed_lines: try: self.df = gpd.read_file(shp, dtypes={'COMID': self.int_dtype})#index='COMID', #index_dtype=self.int_dtype, #true_values=['True'], #false_values=['False']) self.df.index = self.df['COMID'] self._enforce_dtypes(self.df) except: self.preprocess(save=True) else: self.preprocess(save=True) # enforce integers columns self.df.index = self.df.index.astype(self.int_dtype) self.df['COMID'] = self.df.COMID.astype(self.int_dtype) # simplify the lines in the df (dataframe) attribute df = self.df.copy() df['ls_geom'] = df['geometry'].simplify(df['tol'].values) # add column of lists, containing linesink coordinates df['ls_coords'] = [list(g.coords) for g in df['ls_geom']] # there shouldn't be an empty coordinates assert np.all(np.array([len(c) for c in df.ls_coords]) > 0) self.lines_df = df self.wblist = set(df.loc[df.waterbody].index.values.astype(self.int_dtype)).difference({0}) print('Assigning attributes for GFLOW input...') # routing # need to reformulate simplification so that "routed" means all routed LinesinkData, # not just those between the nearfield and farfield df['routing'] = (df.routed.values | df.nearfield.values).astype(int) # linesink elevations (lakes won't be populated yet) min_elev_col = [c for c in df.columns if 'minelev' in c.lower()][0] max_elev_col = [c for c in df.columns if 'maxelev' in c.lower()][0] df['minElev'] = df[min_elev_col] * self.zmult df['maxElev'] = df[max_elev_col] * self.zmult df['dStage'] = df['maxElev'] - df['minElev'] # list upstream and downstream comids df = self.list_updown_comids(df) # discard duplicate LinesinkData that result from braids in NHD and line simplification df = self.drop_duplicates(df) # method to represent lakes with LinesinkData df = self.setup_linesink_lakes(df) print('\nmerging or splitting lines with only two vertices...') # find all routed comids with only 1 line; merge with neighboring comids # (GFLOW requires two lines for routed streams) def bisect(coords): # add vertex to middle of single line segment coords = np.array(coords) mid = 0.5 * (coords[0] + coords[-1]) new_coords = list(map(tuple, [coords[0], mid, coords[-1]])) return new_coords df['nlines'] = [len(coords) - 1 for i, coords in enumerate(df.ls_coords)] # bisect lines that have only one segment, and are routed ls_coords = df.ls_coords.tolist() singlesegment = ((df['nlines'] < 2) & (df['routing'] == 1)).values df['ls_coords'] = [bisect(line) if singlesegment[i] else line for i, line in enumerate(ls_coords)] # fix LinesinkData where max and min elevations are the same df = self.adjust_zero_gradient(df) # end streams # evaluate whether downstream segment is in farfield downstream_ff = [] for i in range(len(df)): try: dff = df.loc[df.iloc[i].dncomid[0], 'farfield'].item() except: dff = True downstream_ff.append(dff) # set segments with downstream segment in farfield as End Segments df['end_stream'] = len(df) * [0] df.loc[downstream_ff, 'end_stream'] = 1 # set # widths for lines arbolate_sum_col = [c for c in df.columns if 'arbolate' in c.lower()][0] df['width'] = df[arbolate_sum_col].map(lambda x: width_from_arboate(x, self.lmbda)) # widths for lakes if np.any(df['FTYPE'] == 'LakePond'): df.loc[df['FTYPE'] == 'LakePond', 'width'] = \ np.vectorize(lake_width)(df.loc[df['FTYPE'] == 'LakePond', 'AREASQKM'], df.loc[df['FTYPE'] == 'LakePond', 'total_line_length'], self.lmbda) # resistance df['resistance'] = self.resistance df.loc[df['farfield'], 'resistance'] = 0 # depth df['depth'] = self.global_streambed_thickness # resistance parameter (scenario) df['ScenResistance'] = self.ScenResistance df.loc[df['farfield'], 'ScenResistance'] = '__NONE__' # check box for "include in Scenario" in the GUI df['chkScenario'] = self.chkScenario # linesink location df.loc[df['FTYPE'] != 'LakePond', 'AutoSWIZC'] = 1 # Along stream centerline df.loc[df['FTYPE'] == 'LakePond', 'AutoSWIZC'] = 2 # Along surface water boundary # additional check to drop isolated lines isolated = [c for c in df.index if len(df.loc[c].dncomid) == 0 and len(df.loc[c].upcomids) == 0 and c not in self.wblist] #df = df.drop(isolated, axis=0) # drop lines below minimum length that are not Lakes if self.farfield_length_threshold > 0.: criteria = ~df.farfield.values | (df.FTYPE == 'LakePond') |\ (df.farfield.values & (df.LENGTHKM >= self.farfield_length_threshold)) df = df.loc[criteria].copy() if self.routed_area_length_threshold > 0.: criteria = ~df.routed.values | (df.FTYPE == 'LakePond') |\ (df.routed.values & (df.LENGTHKM >= self.routed_area_length_threshold)) df = df.loc[criteria].copy() # names df['ls_name'] = len(df) * [None] df['ls_name'] = df.apply(name, axis=1) # compare number of line segments before and after npoints_orig = sum([len(p) - 1 for p in df['geometry'].map(lambda x: x.xy[0])]) npoints_simp = sum([len(p) - 1 for p in df.ls_coords]) print('\nnumber of lines in original NHD linework: {}'.format(npoints_orig)) print('number of simplified lines: {}\n'.format(npoints_simp)) if npoints_simp > self.maxlines: print("Warning, the number of lines exceeds GFLOW's limit of {}!".format(self.maxlines)) if self.split_by_HUC: self.write_lss_by_huc(df) else: self.write_lss(df, '{}.lss.xml'.format(self.outfile_basename)) # run diagnostics on lines and report errors self.run_diagnostics() # write shapefile of results # convert lists in dn and upcomid columns to strings (for writing to shp) # clean up any dtypes that were altered from sloppy use of pandas self._enforce_dtypes(df) self.df = df self.write_shapefile() print('Done!')
[docs] def map_confluences(self): upsegs = self.df.upcomids.tolist() maxsegs = np.array([np.max([int(uu) for uu in u]) if len(u) > 0 else 0 for u in upsegs]) seglengths = np.array([len(u) for u in upsegs]) # setup dataframe of confluences # confluences are where segments have upsegs (no upsegs means the reach 1 is a headwater) confluences = self.df.loc[(seglengths > 0) & (maxsegs > 0), ['COMID', 'upcomids']].copy() confluences['elev'] = [0] * len(confluences) nconfluences = len(confluences) print('Mapping {} confluences and updating segment min/max elevations...'.format(nconfluences)) for i, r in confluences.iterrows(): # confluence elevation is the minimum of the ending segments minimums, starting segments maximums endsmin = np.min(self.df.loc[self.df.COMID.isin(r.upcomids), 'minElev'].values) startmax = np.max(self.df.loc[self.df.COMID == i, 'maxElev'].values) cfelev = np.min([endsmin, startmax]) confluences.loc[i, 'elev'] = cfelev upcomids = [u for u in r.upcomids if int(u) > 0] if len(upcomids) > 0: self.df.loc[upcomids, 'minElev'] = cfelev self.df.loc[i, 'maxElev'] = cfelev self.confluences = confluences self.df['dStage'] = self.df['maxElev'] - self.df['minElev'] print('Done, see confluences attribute.')
[docs] def run_diagnostics(self): """Run the diagnostic suite on the LinesinkData instance.""" dg = self.dg dg.check_vertices() dg.check4crossing_lines() dg.check4zero_gradient()
[docs] def map_outsegs(self): ''' from Mat2, returns dataframe of all downstream segments (will not work with circular routing!) ''' outsegsmap = pd.DataFrame(self.df.COMID) outsegs = pd.Series([d[0] if len(d) > 0 else 0 for d in self.df.dncomid], index=self.df.index) max_outseg = outsegsmap[outsegsmap.columns[-1]].astype(int).max() knt = 2 while max_outseg > 0: outsegsmap['outseg{}'.format(knt)] = [outsegs[s] if int(s) > 0 else 0 for s in outsegsmap[outsegsmap.columns[-1]]] max_outseg = outsegsmap[outsegsmap.columns[-1]].astype(int).max() if max_outseg == 0: break knt += 1 if knt > 1000: print('Circular routing encountered in segment {}'.format(max_outseg)) break self.outsegs = outsegsmap
[docs] def read_lss(self, lss_xml): """read a linesink string (lss) XML file exported by GFLOW""" xml = ET.parse(lss_xml) root = xml.getroot() for attr in ['ComputationalUnits', 'BasemapUnits']: self.__dict__[attr] = root.findall(attr)[0].text # read fields into dictionary, then DataFrame d = {} for field in ['Label', 'HeadSpecified', 'StartingHead', 'EndingHead', 'Resistance', 'Width', 'Depth', 'Routing', 'EndStream', 'OverlandFlow', 'EndInflow', 'ScenResistance', 'Drain', 'ScenFluxName', 'Gallery', 'TotalDischarge', 'InletStream', 'OutletStream', 'OutletTable', 'Lake', 'Precipitation', 'Evapotranspiration', 'Farfield', 'chkScenario', 'AutoSWIZC', 'DefaultResistance']: if self.dtypes[field] == bool: try: v = np.array([i.text for i in root.iter(field)], dtype=self.int_dtype) d[field] = v.astype(bool) except: d[field] = np.array([self.tf2flag(i.text) for i in root.iter(field)]) else: d[field] = np.array([i.text for i in root.iter(field)], dtype=self.dtypes[field]) # read in the vertices def tolss(ls): X = np.array([v.find('X').text for v in ls.find('Vertices').findall('Vertex')], dtype=float) Y = np.array([v.find('Y').text for v in ls.find('Vertices').findall('Vertex')], dtype=float) return LineString(zip(X, Y)) d['geometry'] = [tolss(ls) for ls in root.iter('LinesinkString')] df = pd.DataFrame(d) # convert labels back to COMIDs for the index; assign unique values otherwise comids = [] for i, r in df.iterrows(): try: comids.append(r.Label.split()[0].strip()) except ValueError: comids.append(i) df.index = comids # rename fields to be consistent with those used in Linesinkmaker # (should eventually just change lsmaker names to conform) df.rename(columns={'DefaultResistance': 'resistance', 'Depth': 'depth', 'EndStream': 'end_stream', 'EndingHead': 'minElev', 'Farfield': 'farfield', 'Label': 'ls_name', 'Resistance': 'reistance', 'Routing': 'routing', 'StartingHead': 'maxElev', 'Width': 'width'}, inplace=True) df['ls_coords'] = [list(g.coords) for g in df.geometry] return df
[docs] def write_lss_by_huc(self, df): print('\nGrouping segments by hydrologic unit...') # intersect lines with HUCs; then group dataframe by HUCs HUCs_df = gisutils.shp2df(self.HUC_shp, index=self.HUC_name_field) df[self.HUC_name_field] = len(df) * [None] for HUC in HUCs_df.index: lines = [line.intersects(HUCs_df.loc[HUC, 'geometry']) for line in df['geometry']] df.loc[lines, self.HUC_name_field] = HUC dfg = df.groupby(self.HUC_name_field) # write lines for each HUC to separate lss file HUCs = np.unique(df.HUC) for HUC in HUCs: dfh = dfg.get_group(HUC) outfile = '{}_{}.lss.xml'.format(self.outfile_basename, HUC) self.write_lss(dfh, outfile)
[docs] def write_lss(self, df, outfile): """write GFLOW linesink XML (lss) file from dataframe df """ df = df.copy() df['chkScenario'] = [s.lower() for s in df.chkScenario.astype(str)] nlines = sum([len(p) - 1 for p in df.ls_coords]) print('writing {} lines to {}'.format(nlines, outfile)) ofp = open(outfile, 'w') ofp.write('<?xml version="1.0"?>\n') ofp.write('<LinesinkStringFile version="1">\n') ofp.write('\t<ComputationalUnits>{}</ComputationalUnits>\n' '\t<BasemapUnits>{}</BasemapUnits>\n\n'.format(self.ComputationalUnits.capitalize(), self.BasemapUnits.capitalize())) for comid in df.index: ofp.write('\t<LinesinkString>\n') ofp.write('\t\t<Label>{}</Label>\n'.format(df.loc[comid, 'ls_name'])) ofp.write('\t\t<HeadSpecified>1</HeadSpecified>\n') ofp.write('\t\t<StartingHead>{:.2f}</StartingHead>\n'.format(df.loc[comid, 'maxElev'])) ofp.write('\t\t<EndingHead>{:.2f}</EndingHead>\n'.format(df.loc[comid, 'minElev'])) ofp.write('\t\t<Resistance>{}</Resistance>\n'.format(df.loc[comid, 'resistance'])) ofp.write('\t\t<Width>{:.2f}</Width>\n'.format(df.loc[comid, 'width'])) ofp.write('\t\t<Depth>{:.2f}</Depth>\n'.format(df.loc[comid, 'depth'])) ofp.write('\t\t<Routing>{}</Routing>\n'.format(df.loc[comid, 'routing'])) ofp.write('\t\t<EndStream>{}</EndStream>\n'.format(df.loc[comid, 'end_stream'])) ofp.write('\t\t<OverlandFlow>0</OverlandFlow>\n') ofp.write('\t\t<EndInflow>0</EndInflow>\n') ofp.write('\t\t<ScenResistance>{}</ScenResistance>\n'.format(df.loc[comid, 'ScenResistance'])) ofp.write('\t\t<Drain>0</Drain>\n') ofp.write('\t\t<ScenFluxName>__NONE__</ScenFluxName>\n') ofp.write('\t\t<Gallery>0</Gallery>\n') ofp.write('\t\t<TotalDischarge>0</TotalDischarge>\n') ofp.write('\t\t<InletStream>0</InletStream>\n') ofp.write('\t\t<OutletStream>0</OutletStream>\n') ofp.write('\t\t<OutletTable>__NONE__</OutletTable>\n') ofp.write('\t\t<Lake>0</Lake>\n') ofp.write('\t\t<Precipitation>0</Precipitation>\n') ofp.write('\t\t<Evapotranspiration>0</Evapotranspiration>\n') ofp.write('\t\t<Farfield>{:.0f}</Farfield>\n'.format(df.loc[comid, 'farfield'])) ofp.write('\t\t<chkScenario>{}</chkScenario>\n'.format(df.loc[comid, 'chkScenario'])) # include linesink in PEST 'scenarios' ofp.write('\t\t<AutoSWIZC>{:.0f}</AutoSWIZC>\n'.format(df.loc[comid, 'AutoSWIZC'])) ofp.write('\t\t<DefaultResistance>{:.2f}</DefaultResistance>\n'.format(df.loc[comid, 'resistance'])) ofp.write('\t\t<Vertices>\n') # now write out linesink vertices for x, y in df.loc[comid, 'ls_coords']: ofp.write('\t\t\t<Vertex>\n') ofp.write('\t\t\t\t<X> {:.2f}</X>\n'.format(x)) ofp.write('\t\t\t\t<Y> {:.2f}</Y>\n'.format(y)) ofp.write('\t\t\t</Vertex>\n') ofp.write('\t\t</Vertices>\n') ofp.write('\t</LinesinkString>\n\n') ofp.write('</LinesinkStringFile>') ofp.close()
[docs] def write_shapefile(self, outfile=None, use_ls_coords=True): if outfile is None: outfile = self.outfile_basename.split('.')[0] + '.shp' df = self.df.copy() for routid in {'dncomid', 'upcomids'}.intersection(set(df.columns)): df[routid] = df[routid].map(lambda x: ' '.join([str(c) for c in x])) # handles empties # recreate shapely geometries from coordinates column; drop all other coords/geometries if use_ls_coords and 'ls_coords' in df.columns: df['geometry'] = [LineString(g) for g in df.ls_coords] df.drop(['ls_coords'], axis=1, inplace=True) # drop original linesink geometry column # (geopandas doesn't allow multiple geometry cols) # note: additional refactoring or code inspection # might allow this column to be used directly # to eliminate above operation # of rebuilding geometry column from the linesink coordinates df.drop('ls_geom', axis=1, errors='ignore', inplace=True) df.to_file(outfile, index=False) print(f"wrote {outfile}")
[docs]class linesinks(LinesinkData): def __init__(self, *args, **kwargs): warnings.warn("The 'linesinks' class was renamed to LinesinkData to better follow pep 8.", DeprecationWarning) LinesinkData.__init__(self, *args, **kwargs)
[docs]class InputFileMissing(Exception): def __init__(self, infile): self.infile = infile def __str__(self): return ('\n\nCould not open or parse input file {0}.\nCheck for errors in XML formatting.'.format(self.infile))
[docs]class EmptyDataFrame(Exception): def __init__(self): pass def __str__(self): return ('\n\nEmpty DataFrame!')