# Prepare for download Part 1 of 1
## Import packages that will help with...

# Reproducible file paths
import os # Reproducible file paths
import pathlib # Find the home folder
from glob import glob  # returns list of paths
import zipfile # Work with zip files

# Find files by pattern
import matplotlib.pyplot as plt # Overlay pandas and xarry plots,Overlay raster and vector data
import rioxarray as rxr # Work with geospatial raster data


# Work with tabular, vector, and raster data
import cartopy.crs as ccrs # CRSs (Coordinate Reference Systems)
import geopandas as gpd # work with vector data
import hvplot.pandas # Interactive tabular and vector data
import hvplot.xarray # Interactive raster
from math import floor, ceil # working with bounds, floor rounds down ciel rounds up
import numpy as np # numerical computing
import pandas as pd # Group and aggregate
from rioxarray.merge import merge_arrays # Merge rasters
import xarray as xr # Adjust images
import xrspatial # calculate slope

# Access NASA data
import earthaccess # Access NASA data from the cloud

# Set Up Analysis Part 2 of 2

# Define and create the project data directory
habitat_suitability_data_dir = os.path.join(
    pathlib.Path.home(),
    'earth-analytics',
    'data',
    'habitat_suitability'
)
os.makedirs(habitat_suitability_data_dir, exist_ok=True)

# Call the data directory to check its location
habitat_suitability_data_dir

'/Users/briannagleason/earth-analytics/data/habitat_suitability'

# Download USFS National Grasslands Units Data Part 1 of 1

# Define info for USFS Grasslands download
usfs_grasslands_url = (
    "https://data.fs.usda.gov/geodata/edw/"
    "edw_resources/shp/S_USA.NationalGrassland.zip"
)
usfs_grasslands_dir = os.path.join(habitat_suitability_data_dir, 'usfs_grasslands')
os.makedirs(usfs_grasslands_dir, exist_ok=True)
usfs_grasslands_path = os.path.join(usfs_grasslands_dir, 'usfs_grasslands.shp')

# Only download once - conditional
if not os.path.exists(usfs_grasslands_path):
    usfs_grasslands_gdf = gpd.read_file(usfs_grasslands_url)
    usfs_grasslands_gdf.to_file(usfs_grasslands_path)

# Load from file
usfs_grasslands_gdf = gpd.read_file(usfs_grasslands_path)

# Create plots of each study area, Part 1 of 2

# Create an interactive site map, select data from Comanche National Grassland
comanche_grassland_gdf = (
    usfs_grasslands_gdf[usfs_grasslands_gdf.GRASSLANDN=='Comanche National Grassland']
)
comanche_grassland_gdf.hvplot(
    geo=True, tiles='EsriImagery',
    title='Comanche National Grassland - Site Map',
    fill_color=None, line_color='blue', line_width=2.5,
    frame_width=600
)

# Create plots of each study area, Part 2 of 2

# Create an interactive site map, select data from Pawnee National Grassland
pawnee_grassland_gdf = (
    usfs_grasslands_gdf[usfs_grasslands_gdf.GRASSLANDN=='Pawnee National Grassland']
)
pawnee_grassland_gdf.hvplot(
    geo=True, tiles='EsriImagery',
    title='Pawnee National Grassland - Site Map',
    fill_color=None, line_color='blue', line_width=2,
    frame_width=600
)

# Process POLARIS Raster Image Part 1 of 2

# Create function with description to process raster images
def process_image(url, soil_prop, soil_stat, soil_depth, bounds_gdf):
    """
    Load, crop, and scale raster images for multiple sites.

    Parameters
    ----------
    url: str
      URL or path for raster files.
    soil_prop: str
      Soil property (e.g., "sand", "clay", etc.)
    soil_stat: str
      Soil statistic (e.g., "mean", "median", etc.)
    soil_depth: str
      Soil depth (e.g., "30-60cm", "60-100cm", etc.)
    bounds_gdf: gpd.GeoDataFrame
      Area of interest to crop to.
    site_names: list
      List of site names to be used as dictionary keys.
    Returns
    -------
   merged_da: rxr.DataArray
      Processed rasters 
    """

    # Iterate through the list of bounding GeoDataFrames (areas of interest)
    #for site_name, bounds_gdf in zip(site_names, bounds_gdfs):

    # Get the study bounds
    bounds_min_lon, bounds_min_lat, bounds_max_lon, bounds_max_lat = (
    bounds_gdf
    .to_crs(4326)
    .total_bounds 
    )

    # List to store cropped DataArrays for the current site
    da_list = []
    
    # Loop through bounding box coordinates
    for min_lon in range(floor(bounds_min_lon), ceil(bounds_max_lon)):
      for min_lat in range(floor(bounds_min_lat), ceil(bounds_max_lat)):

        # Format the URL with the current coordinates and other parameters
        formated_url = (
          url.format( 
              soil_prop = soil_prop, 
              soil_stat = soil_stat, 
              soil_depth = soil_depth,
              min_lat=min_lat , max_lat=min_lat+1,
              min_lon=min_lon, max_lon=min_lon+1 )
        )

        # Connect to the raster image
        da = rxr.open_rasterio(
        formated_url, 
        mask_and_scale=True
        ).squeeze()
        
        # Crop the raster image to the bounds of the study area
        cropped_da = (
        da.rio.clip_box(bounds_min_lon, bounds_min_lat, bounds_max_lon, bounds_max_lat)
        )

        # Append the cropped DataArray to the list
        da_list.append(cropped_da)   

    # Merge the cropped DataArrays for this site
    merged_da = merge_arrays(da_list)

    return merged_da

# Process POLARIS raster image part 2 of 2
# Test the function by defining variables and plotting

# Set the site parameters
# soil variables
soil_prop = 'ph'
soil_stat = 'mean'
soil_depth = '60_100'
# set up url template
soil_url_template = (
            "http://hydrology.cee.duke.edu"
            "/POLARIS/PROPERTIES/v1.0"
            "/{soil_prop}"
            "/{soil_stat}"
            "/{soil_depth}"
            "/lat{min_lat}{max_lat}_lon{min_lon}{max_lon}.tif"
            )

# bounds
chosen_grasslands_bounds_gdfs = [comanche_grassland_gdf, pawnee_grassland_gdf]

# output_directory - create data dir for polaris data 
polaris_dir= os.path.join(habitat_suitability_data_dir, 'polaris')
os.makedirs(polaris_dir, exist_ok=True)

# Create new variables for each study area using the process_image function

#Comanche National Grassland
polaris_comanche_processed = (process_image(
    soil_url_template,
    soil_prop, soil_stat, soil_depth,
    comanche_grassland_gdf
))

# Pawnee National Grassland
polaris_pawnee_processed = (process_image(
    soil_url_template,
    soil_prop, soil_stat, soil_depth,
    pawnee_grassland_gdf
))

# Create a list to save both previous polaris processed study areas
polaris_processed_da_list = [
polaris_comanche_processed, polaris_pawnee_processed]

# Call the list to make sure it worked/looks right
polaris_processed_da_list

[<xarray.DataArray (y: 3310, x: 6286)> Size: 83MB
 array([[8.002184 , 8.002184 , 8.008743 , ..., 8.155816 , 8.063853 ,
               nan],
        [8.069297 , 8.034306 , 8.023516 , ..., 8.307451 , 8.283122 ,
               nan],
        [8.051508 , 7.7751083, 7.990466 , ..., 8.31568  , 8.339434 ,
               nan],
        ...,
        [6.3944798, 7.7612224, 7.757359 , ..., 8.048992 , 8.057934 ,
               nan],
        [7.1970034, 7.7459726, 8.146693 , ..., 8.048004 , 7.993289 ,
               nan],
        [7.4051204, 7.7196107, 7.906928 , ..., 8.051418 , 8.04357  ,
               nan]], dtype=float32)
 Coordinates:
   * x            (x) float64 50kB -104.1 -104.1 -104.1 ... -102.3 -102.3 -102.3
   * y            (y) float64 26kB 37.91 37.91 37.91 37.91 ... 37.0 36.99 36.99
     band         int64 8B 1
     spatial_ref  int64 8B 0
 Attributes:
     AREA_OR_POINT:  Area
     _FillValue:     nan,
 <xarray.DataArray (y: 1413, x: 4387)> Size: 25MB
 array([[7.9047933, 7.9026217, 7.733551 , ..., 7.970495 , 7.995625 ,
         8.038868 ],
        [7.904716 , 6.283721 , 6.283721 , ..., 7.8886786, 7.98419  ,
         7.850977 ],
        [6.722    , 6.640232 , 6.640232 , ..., 7.8389506, 8.113432 ,
         7.6897535],
        ...,
        [8.009729 , 7.986484 , 7.973793 , ..., 8.204172 , 8.208988 ,
         8.207926 ],
        [8.027447 , 7.963191 , 7.5485535, ..., 8.292488 , 8.255722 ,
         8.054027 ],
        [7.9777775, 7.982121 , 7.82591  , ..., 8.331369 , 8.068407 ,
         8.054653 ]], dtype=float32)
 Coordinates:
   * x            (x) float64 35kB -104.8 -104.8 -104.8 ... -103.6 -103.6 -103.6
   * y            (y) float64 11kB 41.0 41.0 41.0 41.0 ... 40.61 40.61 40.61
     band         int64 8B 1
     spatial_ref  int64 8B 0
 Attributes:
     AREA_OR_POINT:  Area
     _FillValue:     nan]

# Plot Pawnee to make sure it works/ looks right
polaris_comanche_processed.plot(
    cbar_kwargs={"label": "pH"},
    robust=True,
    )

comanche_grassland_gdf.to_crs(polaris_comanche_processed.rio.crs).boundary.plot(
    ax=plt.gca(),
    color='white').set(
        title='Comanche Grassland - pH',
        xlabel='Longitude', 
        ylabel='Latitude',
    )
plt.show()

# Plot Pawnee to make sure it works/ looks right
polaris_pawnee_processed.plot(
    cbar_kwargs={"label": "pH"},
    robust=True,
    )

pawnee_grassland_gdf.to_crs(polaris_pawnee_processed.rio.crs).boundary.plot(
    ax=plt.gca(),
    color='white').set(
        title='Pawnee Grassland - pH',
        xlabel='Longitude', 
        ylabel='Latitude',
    )
plt.show()

# Prep for downloading SRTM 

# Create data dir 
elevation_dir= os.path.join(habitat_suitability_data_dir, 'srtm')
os.makedirs(elevation_dir, exist_ok=True)

# call the variable to check location
elevation_dir

'/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm'

# Download Raster data through earthaccess Part 1 of 1
# Login and search earthaccess, download results

# login to earthaccess
earthaccess.login(strategy="interactive", persist=True)

# Iterate through the list of bounding GeoDataFrames (areas of interest)
for bounds_gdf in chosen_grasslands_bounds_gdfs:

    # Only download once - conditional
    #if not glob (os.path.join(elevation_dir, '*hgt.zip')):
    # *when I used this my code wouldn't work*
   
    # Set bounds
    bounds = tuple(bounds_gdf.total_bounds)

    # Search earthaccess
    elevation_results = earthaccess.search_data(
        short_name = "SRTMGL1",
        bounding_box = bounds
    )
    elevation_results

    # Download earthaccess results
    srtm_files = earthaccess.download(elevation_results, elevation_dir)

    # Return a list of file paths that match the pattern
    srtm_files = glob (os.path.join(
    elevation_dir, 
    '*hgt.zip')
    )

# Call srtm_file to see it 
srtm_files

QUEUEING TASKS | :   0%|          | 0/6 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/6 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/6 [00:00<?, ?it/s]

QUEUEING TASKS | :   0%|          | 0/4 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/4 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/4 [00:00<?, ?it/s]

['/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N36W105.SRTMGL1.hgt.zip',
 '/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N37W105.SRTMGL1.hgt.zip',
 '/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N36W104.SRTMGL1.hgt.zip',
 '/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N37W104.SRTMGL1.hgt.zip',
 '/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N37W103.SRTMGL1.hgt.zip',
 '/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N36W103.SRTMGL1.hgt.zip',
 '/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N41W105.SRTMGL1.hgt.zip',
 '/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N40W105.SRTMGL1.hgt.zip',
 '/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N41W104.SRTMGL1.hgt.zip',
 '/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N40W104.SRTMGL1.hgt.zip']

# Create list of files for each study area
comanche_srtm_files = [
    srtm_files[0], 
    srtm_files[1], 
    srtm_files[2], 
    srtm_files[3], 
    srtm_files[4], 
    srtm_files[5]
    ]
pawnee_srtm_files = [
    srtm_files[6], 
    srtm_files[7], 
    srtm_files[8], 
    srtm_files[9]
    ]

# Create list of each sites files # Call list to make sure it's right
srtm_files_list = [comanche_srtm_files , pawnee_srtm_files]

# Call list to make sure it's right
srtm_files_list

[['/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N36W105.SRTMGL1.hgt.zip',
  '/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N37W105.SRTMGL1.hgt.zip',
  '/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N36W104.SRTMGL1.hgt.zip',
  '/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N37W104.SRTMGL1.hgt.zip',
  '/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N37W103.SRTMGL1.hgt.zip',
  '/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N36W103.SRTMGL1.hgt.zip'],
 ['/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N41W105.SRTMGL1.hgt.zip',
  '/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N40W105.SRTMGL1.hgt.zip',
  '/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N41W104.SRTMGL1.hgt.zip',
  '/Users/briannagleason/earth-analytics/data/habitat_suitability/srtm/N40W104.SRTMGL1.hgt.zip']]

# Create function with description to process srtm raster images
# Part 1 of 1
def process_image_list(url_list, chosen_buffer, bounds_gdf):
    """
    Load, crop, and scale a raster image 

    Parameters
    ----------
    url: file-like or path-like
      File accessor downloaded or obtained 
    chosen_buffer: float number
      Amount of degrees to extend past the bounds of the bounds_gdf 
    bounds_gdf: gpd.GeoDataFrame
      Area of interest to crop to

    Returns
    -------
    merged_da: rxr.DataArray
      Processed raster
    """   
        
    # List to store cropped DataArrays for the current site 
    da_list= []
      
    buffer= chosen_buffer

    for url in url_list:

        # Connect to the raster image
        da = rxr.open_rasterio(
          url, 
          mask_and_scale=True
          ).squeeze()
        
          # Get the study bounds
        bounds_min_lon, bounds_min_lat, bounds_max_lon, bounds_max_lat = (
          bounds_gdf
          .to_crs(da.rio.crs)
          .total_bounds 
          )

        # Crop the raster image to the bounds of the study area
        cropped_da = (
          da.rio.clip_box(bounds_min_lon-buffer, bounds_min_lat-buffer, bounds_max_lon+buffer, bounds_max_lat+buffer)
          )
        
        # Append the cropped DataArray to the list
        da_list.append(cropped_da)

    # Merge the cropped DataArrays for this site
    merged_da = (
      merge_arrays(da_list)
      )
        
    return merged_da

# Use process_image_list function on each set of site files 
# save to new variable names to use later

# Use process_image_list function on comanche srtm files
srtm_comanche_result_da = process_image_list(comanche_srtm_files, .025, comanche_grassland_gdf)

# Use process_image_list function on comanche srtm files
srtm_pawnee_result_da = process_image_list(pawnee_srtm_files, .025, pawnee_grassland_gdf)

# Create a list to save the site srtm results to
# Call this list to make sure it worked
srtm_da_results = [
    srtm_comanche_result_da,
    srtm_pawnee_result_da
]
srtm_da_results

[<xarray.DataArray (y: 3491, x: 6467)> Size: 90MB
 array([[1400., 1400., 1399., ..., 1146., 1146.,   nan],
        [1401., 1401., 1400., ..., 1147., 1146.,   nan],
        [1402., 1402., 1400., ..., 1148., 1148.,   nan],
        ...,
        [1893., 1894., 1893., ..., 1133., 1132.,   nan],
        [1892., 1894., 1895., ..., 1134., 1133.,   nan],
        [  nan,   nan,   nan, ...,   nan,   nan,   nan]], dtype=float32)
 Coordinates:
   * x            (x) float64 52kB -104.1 -104.1 -104.1 ... -102.3 -102.3 -102.3
   * y            (y) float64 28kB 37.94 37.94 37.94 37.94 ... 36.97 36.97 36.97
     band         int64 8B 1
     spatial_ref  int64 8B 0
 Attributes:
     AREA_OR_POINT:  Point
     units:          m
     _FillValue:     nan,
 <xarray.DataArray (y: 1594, x: 4566)> Size: 29MB
 array([[1899., 1899., 1899., ..., 1487., 1487., 1487.],
        [1899., 1900., 1901., ..., 1488., 1488., 1487.],
        [1901., 1901., 1902., ..., 1487., 1487., 1487.],
        ...,
        [1546., 1545., 1545., ..., 1368., 1369., 1368.],
        [1546., 1545., 1544., ..., 1368., 1367., 1366.],
        [1545., 1544., 1543., ..., 1368., 1366., 1366.]], dtype=float32)
 Coordinates:
   * x            (x) float64 37kB -104.8 -104.8 -104.8 ... -103.5 -103.5 -103.5
   * y            (y) float64 13kB 41.03 41.03 41.03 41.03 ... 40.58 40.58 40.58
     band         int64 8B 1
     spatial_ref  int64 8B 0
 Attributes:
     AREA_OR_POINT:  Point
     units:          m
     _FillValue:     nan]

# Plot the processed raster on Comanche National Grassland
srtm_comanche_result_da.plot(
    cbar_kwargs={"label": "Elevation (meters)"},
    robust=True,
    cmap='terrain',
)
# Overlay the boundary of the same study area
comanche_grassland_gdf.boundary.plot(ax=plt.gca(),
    color='black').set(
        title='Comanche Grassland - Elevation ',
        xlabel='Longitude', 
        ylabel='Latitude',
        xticks=[],
        yticks=[] 
    )
plt.show()

# Plot the processed raster on Pawnee National Grassland
srtm_pawnee_result_da.plot(
    cbar_kwargs={"label": "Elevation (meters)"},
    robust=True,
    cmap='terrain',
)
# Overlay the boundary of the same study area
pawnee_grassland_gdf.boundary.plot(ax=plt.gca(),
    color='black').set(
        title='Pawnee Grassland - Elevation ',
        xlabel='Longitude', 
        ylabel='Latitude',
        xticks=[],
        yticks=[] 
    )
plt.show()

# Calculate Slope using for loop

# Create list to initialize
slope_da_list = []

# Iterate through a list of sites 
for srtm_result in srtm_da_results:

    # Reproject into epsg utm zone so units are in meters
    utm13_epsg = 32613
    srtm_proj_da = srtm_result.rio.reproject(utm13_epsg)

    # Calculate slope
    slope_da = xrspatial.slope(srtm_proj_da)
    
    # Append the data array to the list
    slope_da_list.append(slope_da)

slope_da_list

[<xarray.DataArray 'slope' (y: 4237, x: 6163)> Size: 104MB
 array([[nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        ...,
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]], dtype=float32)
 Coordinates:
   * x            (x) float64 49kB 5.805e+05 5.805e+05 ... 7.414e+05 7.414e+05
   * y            (y) float64 34kB 4.203e+06 4.202e+06 ... 4.092e+06 4.092e+06
     band         int64 8B 1
     spatial_ref  int64 8B 0
 Attributes:
     AREA_OR_POINT:  Point
     units:          m
     _FillValue:     nan,
 <xarray.DataArray 'slope' (y: 2060, x: 4413)> Size: 36MB
 array([[nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        ...,
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]], dtype=float32)
 Coordinates:
   * x            (x) float64 35kB 5.154e+05 5.155e+05 ... 6.228e+05 6.228e+05
   * y            (y) float64 16kB 4.543e+06 4.543e+06 ... 4.493e+06 4.493e+06
     band         int64 8B 1
     spatial_ref  int64 8B 0
 Attributes:
     AREA_OR_POINT:  Point
     units:          m
     _FillValue:     nan]

# Name each of the slope da's in the slope_da_list
slope_comanche = slope_da_list[0]
slope_pawnee = slope_da_list[1]

# Test to make sure the slope function worked by plotting
# Plot Comanche
slope_comanche.plot(
    cbar_kwargs={"label": "Slope (degrees)"},
    cmap='terrain',
)
# Overlay the boundary of the same study area
comanche_grassland_gdf.to_crs(utm13_epsg).boundary.plot(
    ax=plt.gca(),
    color='white').set(
        title='Comanche Grassland - Caluclated Slope ',
        xlabel='Longitude', 
        ylabel='Latitude',
        xticks=[],
        yticks=[] 
    )
plt.show()

# Test to make sure the slope function worked by plotting
# Plot Pawnee
slope_pawnee.plot(
    cbar_kwargs={"label": "Slope (degrees)"},
    cmap='terrain',
)
# Overlay the boundary of the same study area
pawnee_grassland_gdf.to_crs(utm13_epsg).boundary.plot(
    ax=plt.gca(),
    color='white').set(
        title='Pawnee Grassland - Caluclated Slope ',
        xlabel='Longitude', 
        ylabel='Latitude',
        xticks=[],
        yticks=[] 
    )
plt.show()

# Create function  that converts longitude that is in the 0-360 
# range, to the -180 to 180 range 
def convert_longitude(longitude):
    """ Convert logitude range from  0-360 to -180-180"""
    return (longitude - 360) if longitude > 180 else longitude

# Create list to save data arrays back to
maca_da_list = []

# Iterate through multiple sites or study areas
for site_name, site_gdf in {
    'comanche':comanche_grassland_gdf,'pawnee':pawnee_grassland_gdf}.items():
    # Iterate through multiple variables, e.g. precipitation
    for variable in ['pr']:
        # Iterate through start years and end years
        for scenario, start_year in {
            'historical': 2000, 'rcp85': 2091}.items():
            end_year = start_year + 4
            # Define template url for MACA v2 download
            maca_url = (
                'http://thredds.northwestknowledge.net:8080/thredds/dodsC/MACAV2'
                f'/CCSM4/macav2metdata_{variable}_CCSM4_r6i1p1_'
                f'{scenario}_{start_year}_{end_year}_CONUS_monthly.nc')
            # Connect to the raster image
            maca_da = xr.open_dataset(maca_url).squeeze().precipitation
            # Get the study bounds
            bounds = site_gdf.to_crs(maca_da.rio.crs).total_bounds
            # Apply function convert_longitude to convert longitude
            maca_da = maca_da.assign_coords(
                lon = ('lon', 
                [convert_longitude(l) for l in maca_da.lon.values]))
            # Set spatial dimensions - need lon = x-axis and lat = y-axis.
            maca_da = maca_da.rio.set_spatial_dims(x_dim='lon', y_dim='lat')
            # Crop the raster image to the bounds of the study area(s)
            maca_da = maca_da.rio.clip_box(*bounds)
            # Append the data array to the list
            maca_da_list.append(dict(
                site_name = site_name,
                variable = variable,
                scenario = scenario,
                start_year = start_year,
                da = maca_da
                ))      
                     
# Convert maca_da_list to df, call maca_df to see it
maca_df = pd.DataFrame(maca_da_list)
maca_df.da.values

array([<xarray.DataArray 'precipitation' (time: 60, lat: 23, lon: 43)> Size: 237kB
       [59340 values with dtype=float32]
       Coordinates:
         * lat      (lat) float64 184B 36.98 37.02 37.06 37.1 ... 37.81 37.85 37.9
         * time     (time) object 480B 2000-01-15 00:00:00 ... 2004-12-15 00:00:00
         * lon      (lon) float64 344B -104.1 -104.0 -104.0 ... -102.4 -102.4 -102.3
           crs      int64 8B 0
       Attributes:
           long_name:      Monthly Precipitation Amount
           units:          mm
           standard_name:  precipitation
           cell_methods:   time: sum(interval: 24 hours): sum over days
           comments:       Total monthly precipitation at surface: includes both liq...
           _ChunkSizes:    [ 10  44 107]                                               ,
       <xarray.DataArray 'precipitation' (time: 60, lat: 23, lon: 43)> Size: 237kB
       [59340 values with dtype=float32]
       Coordinates:
         * lat      (lat) float64 184B 36.98 37.02 37.06 37.1 ... 37.81 37.85 37.9
         * time     (time) object 480B 2091-01-15 00:00:00 ... 2095-12-15 00:00:00
         * lon      (lon) float64 344B -104.1 -104.0 -104.0 ... -102.4 -102.4 -102.3
           crs      int64 8B 0
       Attributes:
           long_name:      Monthly Precipitation Amount
           units:          mm
           standard_name:  precipitation
           cell_methods:   time: sum(interval: 24 hours): sum over days
           comments:       Total monthly precipitation at surface: includes both liq...
           _ChunkSizes:    [ 10  44 107]                                               ,
       <xarray.DataArray 'precipitation' (time: 60, lat: 11, lon: 30)> Size: 79kB
       [19800 values with dtype=float32]
       Coordinates:
         * lat      (lat) float64 88B 40.6 40.65 40.69 40.73 ... 40.9 40.94 40.98 41.02
         * time     (time) object 480B 2000-01-15 00:00:00 ... 2004-12-15 00:00:00
         * lon      (lon) float64 240B -104.8 -104.7 -104.7 ... -103.6 -103.6 -103.6
           crs      int64 8B 0
       Attributes:
           long_name:      Monthly Precipitation Amount
           units:          mm
           standard_name:  precipitation
           cell_methods:   time: sum(interval: 24 hours): sum over days
           comments:       Total monthly precipitation at surface: includes both liq...
           _ChunkSizes:    [ 10  44 107]                                               ,
       <xarray.DataArray 'precipitation' (time: 60, lat: 11, lon: 30)> Size: 79kB
       [19800 values with dtype=float32]
       Coordinates:
         * lat      (lat) float64 88B 40.6 40.65 40.69 40.73 ... 40.9 40.94 40.98 41.02
         * time     (time) object 480B 2091-01-15 00:00:00 ... 2095-12-15 00:00:00
         * lon      (lon) float64 240B -104.8 -104.7 -104.7 ... -103.6 -103.6 -103.6
           crs      int64 8B 0
       Attributes:
           long_name:      Monthly Precipitation Amount
           units:          mm
           standard_name:  precipitation
           cell_methods:   time: sum(interval: 24 hours): sum over days
           comments:       Total monthly precipitation at surface: includes both liq...
           _ChunkSizes:    [ 10  44 107]                                               ],
      dtype=object)

# Pull out a row of the dataframe in order to try to plot

#Pull out Comanche maca scenarios
maca_comanche_2000_da = (
    maca_df[(maca_df.start_year==2000) & (maca_df.site_name=='comanche')]
    .da.values.item()
    # lat lon crs
    .rio.write_crs(4326)
    # Set spatial dimensions
    .rio.set_spatial_dims('lat', 'lon')
    # Group by year and take sum to get annual average
    .groupby('time.year')
    .sum()
    # Take minimum - because that info on the plant species was found
    .min('year')
)

maca_comanche_2091_da = (
    maca_df[(maca_df.start_year==2091) & (maca_df.site_name=='comanche')]
    .da.values.item()
    # lat lon crs
    .rio.write_crs(4326)
    # Set spatial dimensions
    .rio.set_spatial_dims('lat', 'lon')
    # Group by year and take sum to get annual average
    .groupby('time.year')
    .sum()
    # Take minimum - because that info on the plant species was found
    .min('year')
)

# Pull out Pawnee maca scenarios
maca_pawnee_2000_da = (
    maca_df[(maca_df.start_year==2000) & (maca_df.site_name=='comanche')]
    .da.values.item()
    # lat lon crs
    .rio.write_crs(4326)
    # Set spatial dimensions
    .rio.set_spatial_dims('lat', 'lon')
    # Group by year and take sum to get annual average
    .groupby('time.year')
    .sum()
    # Take minimum - because that info on the plant species was found
    .min('year')
)

maca_pawnee_2091_da = (
    maca_df[(maca_df.start_year==2091) & (maca_df.site_name=='comanche')]
    .da.values.item()
    # lat lon crs
    .rio.write_crs(4326)
    # Set spatial dimensions
    .rio.set_spatial_dims('lat', 'lon')
    # Group by year and take sum to get annual average
    .groupby('time.year')
    .sum()
    # Take minimum - because that info on the plant species was found
    .min('year')
)

# Test by plotting 1 climate scenario on 1 study area
maca_comanche_2091_da.plot(
    cbar_kwargs={"label": "precipitation (mm)"},
    robust=True
)
# Overlay the boundary of the same study area
comanche_grassland_gdf.boundary.plot(ax=plt.gca(),
    color='black').set(
        title='Comanche Grassland-CCSM4 Climate Model 2091-2096 ',
        xlabel='Longitude', 
        ylabel='Latitude',
        xticks=[],
        yticks=[] 
    )
plt.show()

# Prep for harmonizing Part 1 of 1
# Create output directory for this harmonized raster
harmonized_raster_dir = os.path.join(habitat_suitability_data_dir, 'harmonized_rasters')
os.makedirs(harmonized_raster_dir, exist_ok=True)

# Prep for harmonizing Part 2 of 2

# Comanche raster data arrays
comanche_reference_raster = slope_comanche

comanche_input_rasters = [
    polaris_comanche_processed,
    maca_comanche_2091_da,
    maca_comanche_2000_da
]

# Pawnee data arrays
pawnee_reference_raster = slope_pawnee

pawnee_input_rasters = [
    polaris_pawnee_processed,
    maca_pawnee_2091_da, 
    maca_pawnee_2000_da
    ]

# Harmonize raster layers by creating function
def harmonize_raster_layers(reference_raster, input_rasters, output_dir):
    """
    Harmonize raster layers to ensure consistent spatial resolution and projection.

    Args:
        reference_raster (str): Path to the reference raster file that other rasters will match.
        input_rasters (list): List of paths to the input raster files to harmonize.
        output_dir (str): Directory to save the harmonized raster files.

    Returns:
        list: List of file paths to the harmonized raster files.
    """
    
    # Load the reference raster
    if isinstance(reference_raster, str):  # If it's a path
        ref_raster = rxr.open_rasterio(reference_raster, masked=True)
    else:  # If it's a DataArray
        ref_raster = reference_raster

    # Create list to save the harmonized data arrays back to
    harmonized_rasters = []

    # Create for loop to iterate through multiple rasters
    for raster_path in input_rasters:

        # Load the input raster
        if isinstance(raster_path, str):  # If it's a path
            input_raster = rxr.open_rasterio(raster_path, masked=True)
        else:  # If it's a DataArray
            input_raster = raster_path

        # Reproject and align the input raster to match the reference raster
        harmonized_raster = input_raster.rio.reproject_match(ref_raster)

        # Save the harmonized raster to the output directory
        harmonized_output_file = os.path.join(output_dir, os.path.basename(raster_path))
        harmonized_raster.rio.to_raster(harmonized_output_file)

        # Print and append
        print(f"Harmonized raster saved to: {harmonized_output_file}")
        harmonized_rasters.append(harmonized_output_file)

    # Return the list of harmonized raster paths
    return harmonized_rasters

# Use harmonize_raster_layers function to get Comanche harmonized raster
comanche_harmonized_rasters = harmonize_raster_layers(comanche_reference_raster, comanche_input_rasters, harmonized_raster_dir)

# Use harmonize_raster_layers function to get Pawnee harmonized raster
pawnee_harmonized_rasters = harmonize_raster_layers(pawnee_reference_raster, pawnee_input_rasters, harmonized_raster_dir)

# Fuzzy logic model
"""    
   - The `calculate_suitability_score` function uses 
   a **fuzzy Gaussian function** to assign suitability 
   scores between 0 and 1 for each raster cell.
   - Cells with values closer to the `optimal_value` 
   receive scores closer to 1, while cells farther away receive 
   lower scores.
   - The `tolerance_range` determines how quickly the score drops 
   off as values deviate from the optimal value.
"""

def calculate_suitability_score(raster, optimal_value, tolerance_range):
    """
    Calculate a fuzzy suitability score (0–1) for each raster 
    cell based on proximity to the optimal value.

    Args:
        raster (xarray.DataArray): Input raster layer.
        optimal_value (float): The optimal value for the variable.
        tolerance_range (float): The range within which values are considered suitable.

    Returns:
        xarray.DataArray: A raster of suitability scores (0–1).
    """
    # Calculate suitability scores using a fuzzy Gaussian function
    suitability = np.exp(-((raster - optimal_value) ** 2) / (2 * tolerance_range ** 2))
    return suitability 

suitability

def build_habitat_suitability_model(
        input_rasters, optimal_values, tolerance_ranges, output_dir, threshold=None):
    """
    Build a habitat suitability model by combining fuzzy 
    suitability scores for each variable.

    Args:
        input_rasters (list): List of paths to input raster files 
            representing environmental variables.

        optimal_values (list): List of optimal values for each variable.

        tolerance_ranges (list): List of tolerance ranges for each variable.

        output_dir (str): Directory to save the combined suitability raster.

        threshold (float, optional): Threshold for highlighting highly 
            suitable areas (default is None).

    Returns:
        str: Path to the final combined suitability raster.
    """
    os.makedirs(output_dir, exist_ok=True)

    # Load and calculate suitability scores for each raster
    suitability_layers = []
    for raster_path, optimal_value, tolerance_range in zip(input_rasters, optimal_values, tolerance_ranges):
        raster = rxr.open_rasterio(raster_path, masked=True).squeeze()
        suitability_layer = calculate_suitability_score(raster, optimal_value, tolerance_range)
        suitability_layers.append(suitability_layer)

    # Combine suitability scores by multiplying across all layers
    combined_suitability = suitability_layers[0]
    for layer in suitability_layers[1:]:
        combined_suitability *= layer

    # Apply a threshold if provided
    if threshold is not None:
        combined_suitability = xr.where(combined_suitability >= threshold, combined_suitability, 0)

    # Save the combined suitability raster
    output_file = os.path.join(output_dir, "combined_suitability.tif")
    combined_suitability.rio.to_raster(output_file)
    print(f"Combined suitability raster saved to: {output_file}")

    return output_file

# Example 

for site_name, site_gdf in {
    'comanche':comanche_grassland_gdf,'pawnee':pawnee_grassland_gdf}.items():
    if __name__ == "__main__":
        # Paths to input raster files (e.g., temperature, precipitation, soil pH)
        input_rasters = [
            "path_to_slope_raster/slope.tif",
            "path_to_precipitation_raster/precipitation.tif",
            "path_to_soil_ph_raster/soil_ph.tif"
        ]

        # Optimal values for Rocky Mountain Juniper for each variable
        # slope in degrees, precipitation in mm (228.6 - 660.4 mm), soil pH (5-8.5)
        optimal_values = [30, 440, 6.5]  # slope in degrees, precipitation in mm, soil pH

        # Tolerance ranges for each variable
        tolerance_ranges = [10, 220, 0.5]  # acceptable deviation for each variable

        # Output directory to save the combined suitability raster
        output_dir = "path_to_output_directory"

        # Optional threshold to highlight highly suitable areas (e.g., 0.8)
        threshold = 0.8

        # Build the habitat suitability model
        combined_suitability_file = build_habitat_suitability_model(
            input_rasters, optimal_values, tolerance_ranges, output_dir, threshold
        )

        print("Habitat suitability model created:", combined_suitability_file)

# Comanche 2000-2004 scenario (historical)
comanche_combined_suitability_file.plot(
    cbar_kwargs={"label": "suitability score)"},
    robust=True,
    cmap='terrain',
)

# Overlay the boundary of the same study area
comanche_grassland_gdf.boundary.plot(ax=plt.gca(),
    color='black').set(
        title='Comanche Grassland-Historical Scenario 2000-2004 ',
        xlabel='Longitude', 
        ylabel='Latitude',
        xticks=[],
        yticks=[] 
    )
plt.show()

# Comanche 2091-2095 scenario (rcp85)
comanche_combined_suitability_file.plot(
    cbar_kwargs={"label": "suitability score)"},
    robust=True,
    cmap='terrain',
)
# Overlay the boundary of the same study area
comanche_grassland_gdf.boundary.plot(ax=plt.gca(),
    color='black').set(
        title='Comanche Grassland- rcp85 Scenario 2091-2096  ',
        xlabel='Longitude', 
        ylabel='Latitude',
        xticks=[],
        yticks=[] 
    )
plt.show()

# Pawnee 2000-2004 scenario (historical)
pawnee_combined_suitability_file.plot(
    cbar_kwargs={"label": "suitability score)"},
    robust=True,
    cmap='terrain',
)
# Overlay the boundary of the same study area
pawnee_grassland_gdf.boundary.plot(ax=plt.gca(),
    color='black').set(
        title='Pawnee Grassland- Historical Scenario 2000-2004 ',
        xlabel='Longitude', 
        ylabel='Latitude',
        xticks=[],
        yticks=[] 
    )
plt.show()

# Pawnee 2091-2095 scenario (rcp85)
pawnee_combined_suitability_file.plot(
    cbar_kwargs={"label": "suitability score)"},
    robust=True,
    cmap='terrain',
)

# Overlay the boundary of the same study area
pawnee_grassland_gdf.boundary.plot(ax=plt.gca(),
    color='black').set(
        title='Pawnee Grassland- rcp85 Scenario 2091-2096 ',
        xlabel='Longitude', 
        ylabel='Latitude',
        xticks=[],
        yticks=[] 
    )
plt.show()

Habitat Suitability - Project on the Rocky Mountain Juniper¶

(Juniperus scopulorum Sargent) in Colorado¶

Project Description¶

Citations¶

Plant Species Description¶

Citations¶

Site Descriptions¶

Comanche National Grassland¶

Pawnee National Grassland¶

Citations¶

Data Descriptions¶

Administrative Boundaries: USFS National Grassland Units (used for study sites)¶

Soil Data: POLARIS soil properties database (variables related to soil)¶

Elevation Data: earthaccess API (elevation from the SRTM - used to calculate slope)¶

Climate Data: MACAv2 via THREDDS data server (climate scenarios)¶

Citations¶

Methods Description¶

Citations¶

1. Define Study Areas - USFS National Grassland Units¶

(Comanche and Pawnee National Grasslands)¶

Comanche National Grassland has discontinuous boundaries - this reflects¶

how the grasslands were purchased¶

Pawnee National Grassland has discontinuous boundaries - this reflects¶

how the grasslands were purchased¶

2. Wrangle the Raster Data (3 layers)¶

Part 1: POLARIS dataset - download 1 soil variable¶

Comanche Grassland - pH - plotted correctly - the slightly acidic soil¶

areas appear to be outside the grassland visually. Full pH scale plotted¶

would work for the Rocky Mountain Juniper¶

Pawnee Grassland - pH - plotted correctly - left area of grassland¶

appears to have slightly lower pH areas¶

2. Wrangle the Raster Data (3 layers)¶

Part 2: Elevation Data using SRTM¶

Comanche Grassland Elevation - plotted correctly, wide range in¶

elevation is seen, the lower half of this range is not within the¶

perferred range for Rocky Mountain Juniper¶

Pawnee Grassland Elevation - plotted correctly, smaller range in¶

elevation is seen comapred to Comanche, most of this¶

range is within the perferred range for Rocky Mountain Juniper¶

Comanche Caluculated Slope - plotted correctly, visually¶

there are some areas of degress slope 10-30 which¶

would potentially be areas that the Rocky Mountain Juiper is¶

commonly found¶

Pawnee Caluculated Slope - plotted correctly, visually¶

there are few areas of degress slope 10-30 which¶

would potentially be areas that the Rocky Mountain Juiper is¶

commonly found¶

2. Wrangle the Raster Data (3 layers)¶

Part 3: MACA v2 THREDDS - download 2 climate scenarios¶

For what I'm about to do I know it's not DRY¶

Comanche CCSM4 Climate model, rcp85, 2091-2096 is lower¶

resolution than the other raster data downloaded, but did plot¶

correctly. Stark difference between upper and lower unit.¶

3. Harmonize Raster Data and Create Model¶

The below code does not work¶

4. Build a Model¶

The following is psuedocode and has not been tested¶

5. Plotting¶

Below Code is Psuedocode on how I would plot the¶

each site and climate scenario combo¶

Model Description - what I would want to go over if this¶

ended up working¶

Conclusion:¶