# Download GBIF Data Part 1

# Install pygbif in order to access the GBIF Data

%%bash
pip install pygbif

Requirement already satisfied: pygbif in /opt/conda/lib/python3.11/site-packages (0.6.4)
Requirement already satisfied: requests>2.7 in /opt/conda/lib/python3.11/site-packages (from pygbif) (2.32.3)
Requirement already satisfied: requests-cache in /opt/conda/lib/python3.11/site-packages (from pygbif) (1.2.1)
Requirement already satisfied: geojson-rewind in /opt/conda/lib/python3.11/site-packages (from pygbif) (1.1.0)
Requirement already satisfied: geomet in /opt/conda/lib/python3.11/site-packages (from pygbif) (1.1.0)
Requirement already satisfied: appdirs>=1.4.3 in /opt/conda/lib/python3.11/site-packages (from pygbif) (1.4.4)
Requirement already satisfied: matplotlib in /opt/conda/lib/python3.11/site-packages (from pygbif) (3.9.2)
Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.11/site-packages (from requests>2.7->pygbif) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.11/site-packages (from requests>2.7->pygbif) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.11/site-packages (from requests>2.7->pygbif) (2.2.3)
Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.11/site-packages (from requests>2.7->pygbif) (2024.8.30)
Requirement already satisfied: click in /opt/conda/lib/python3.11/site-packages (from geomet->pygbif) (8.1.7)
Requirement already satisfied: contourpy>=1.0.1 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (1.3.0)
Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (4.54.1)
Requirement already satisfied: kiwisolver>=1.3.1 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (1.4.7)
Requirement already satisfied: numpy>=1.23 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (2.0.2)
Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (24.1)
Requirement already satisfied: pillow>=8 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (10.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (3.1.4)
Requirement already satisfied: python-dateutil>=2.7 in /opt/conda/lib/python3.11/site-packages (from matplotlib->pygbif) (2.9.0)
Requirement already satisfied: attrs>=21.2 in /opt/conda/lib/python3.11/site-packages (from requests-cache->pygbif) (24.2.0)
Requirement already satisfied: cattrs>=22.2 in /opt/conda/lib/python3.11/site-packages (from requests-cache->pygbif) (24.1.2)
Requirement already satisfied: platformdirs>=2.5 in /opt/conda/lib/python3.11/site-packages (from requests-cache->pygbif) (4.3.6)
Requirement already satisfied: url-normalize>=1.4 in /opt/conda/lib/python3.11/site-packages (from requests-cache->pygbif) (1.4.3)
Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.11/site-packages (from python-dateutil>=2.7->matplotlib->pygbif) (1.16.0)

# Download GBIF Data Part 2

# Import Packages that will help with reproducible file paths, tabular data, and geospatial data
import os
import pathlib
import time
import zipfile
from getpass import getpass
from glob import glob

import pandas as pd
import geopandas as gpd
import pygbif.occurrences as occ
import pygbif.species as species

# Download GBIF Data Part 3

# Create data directory in the home folder
data_dir_bobcat = os.path.join(
    # Home directory
    pathlib.Path.home(),
    # Earth analytics data directory
    'earth-analytics',
    'data',
    # Project directory
    'species_distribution_bobcat',
)
os.makedirs(data_dir_bobcat, exist_ok=True)

# Define the directory name for GBIF data
gbif_dir_bobcat = os.path.join(data_dir_bobcat, 'gbif_bobcat')

# Check the location for the data_dir_bobcat
data_dir_bobcat

'/home/jovyan/earth-analytics/data/species_distribution_bobcat'

# Download GBIF Data Part 4

# Create GBIF account, must enter user, pwd, and email correctly otherwise will not pull correctly
reset_credentials = False
# GBIF needs a username, password, and email
credentials = dict(
    GBIF_USER=(input, 'GBIF username:'),
    GBIF_PWD=(getpass, 'GBIF password'),
    GBIF_EMAIL=(input, 'GBIF email')
)
for env_variable, (prompt_func, prompt_text) in credentials.items():
    # Delete credential from environment if requested
    if reset_credentials and (env_variable in os.environ):
        os.environ.pop(env_variable)
    # Ask for credential and save to environment
    if not env_variable in os.environ:
        os.environ[env_variable] = prompt_func(prompt_text)

# Check and make sure my username is correct 
os.environ ['GBIF_USER']

'brglea'

# Also double check that the password has been saved
'GBIF_PWD' in os.environ

True

# Download GBIF Data Part 5
# Get the species key, will need the number related to the species

# Query species
species_info = species.name_lookup('lynx rufus', rank='SPECIES')

# Get the first result
first_result = species_info['results'][0]

# Get the species key (nubKey)
species_key = first_result['nubKey']

# Check the result
first_result['species'], species_key

('Lynx rufus', 2435246)

# Download GBIF Data Part 6
# Download data from GBIF

# Only download once
gbif_pattern = os.path.join(gbif_dir_bobcat, '*.csv')
if not glob(gbif_pattern):
    # Only submit one request
    if not 'GBIF_DOWNLOAD_KEY' in os.environ:
        # Submit query to GBIF
        gbif_query = occ.download([
            "speciesKey = 2435246",
            "hasCoordinate = True",
            "year = 2023",
        ])
        os.environ['GBIF_DOWNLOAD_KEY'] = gbif_query[0]

    # Wait for the download to build
    download_key = os.environ['GBIF_DOWNLOAD_KEY']
    wait = occ.download_meta(download_key)['status']
    while not wait=='SUCCEEDED':
        wait = occ.download_meta(download_key)['status']
        time.sleep(5)

    # Download GBIF data
    download_info = occ.download_get(
        os.environ['GBIF_DOWNLOAD_KEY'], 
        path=data_dir_bobcat)

    # Unzip GBIF data
    with zipfile.ZipFile(download_info['path']) as download_zip:
        download_zip.extractall(path=gbif_dir_bobcat)

# Find the extracted .csv file path (take the first result)
gbif_path = glob(gbif_pattern)[0]

# Check the gbif_path
gbif_path

'/home/jovyan/earth-analytics/data/species_distribution_bobcat/gbif_bobcat/0010842-241007104925546.csv'

# Run the following code to look at the beginning of my file:
!head -n 2 $gbif_path

gbifID	datasetKey	occurrenceID	kingdom	phylum	class	order	family	genus	species	infraspecificEpithet	taxonRank	scientificName	verbatimScientificName	verbatimScientificNameAuthorship	countryCode	locality	stateProvince	occurrenceStatus	individualCount	publishingOrgKey	decimalLatitude	decimalLongitude	coordinateUncertaintyInMeters	coordinatePrecision	elevation	elevationAccuracy	depth	depthAccuracy	eventDate	day	month	year	taxonKey	speciesKey	basisOfRecord	institutionCode	collectionCode	catalogNumber	recordNumber	identifiedBy	dateIdentified	license	rightsHolder	recordedBy	typeStatus	establishmentMeans	lastInterpreted	mediaType	issue
4953158569	50c9509d-22c7-4a22-a47d-8c48425ef4a7	https://www.inaturalist.org/observations/151699352	Animalia	Chordata	Mammalia	Carnivora	Felidae	Lynx	Lynx rufus		SPECIES	Lynx rufus (Schreber, 1777)	Lynx rufus		US		California	PRESENT		28eb1a3f-1c15-4a95-931a-4af90ecb574d	34.205588	-118.36292	28846.0						2023-03-16T03:21	16	3	2023	2435246	2435246	HUMAN_OBSERVATION	iNaturalist	Observations	151699352		Devon	2023-03-20T04:33:39	CC_BY_NC_4_0	Devon	Devon			2024-10-12T11:10:13.765Z	StillImage	COORDINATE_ROUNDED;CONTINENT_DERIVED_FROM_COORDINATES;TAXON_MATCH_TAXON_ID_IGNORED

# Load the GBIF data
bobcat_gbif_df = pd.read_csv(
    gbif_path, 
    delimiter='\t',
    index_col='gbifID',
    usecols=['gbifID', 'month', 'decimalLatitude', 'decimalLongitude']
)

# Call this variable to see the beginning of the table
bobcat_gbif_df.head()

# Download and save ecoregion boundaries

# Set up the ecoregion boundary URL
ecoregions_url = (
 "https://storage.googleapis.com/teow2016"
 "/Ecoregions2017.zip")

# Set up a path to save the data on your machine
ecoregions_dir = os.path.join(data_dir_bobcat, 'resolve_ecoregions')

# Make the ecoregions directory
os.makedirs(ecoregions_dir, exist_ok=True)

# Join ecoregions shapefile path
ecoregions_path = os.path.join(ecoregions_dir, 'ecoregions.shp')

# Only download once
if not os.path.exists(ecoregions_path):
    ecoregions_gdf = gpd.read_file(ecoregions_url)
    ecoregions_gdf.to_file(ecoregions_path)

# Look for all the files in my project directory with the `.shp` extension to make sure the download worked

%%bash
find ~/earth-analytics/data/species_distribution_bobcat -name '*.shp'

/home/jovyan/earth-analytics/data/species_distribution_bobcat/resolve_ecoregions/ecoregions.shp

# Load Ecoregions into Python

# Open up the ecoregions boundaries
ecoregions_gdf = gpd.read_file(ecoregions_path)

# Name the index so it will match the other data later on
ecoregions_gdf.index.name = 'ecoregion'

# Plot the ecoregions to check download
ecoregions_gdf.plot(edgecolor='black', color='lightgreen')

<Axes: >

# Check the ecoregions_gdf - need to check which columns and the specific names of columns to use next
ecoregions_gdf.head()

#Convert dataframe (df) into geo dataframe (gdf)
bobcat_gbif_gdf = (
    gpd.GeoDataFrame(
        bobcat_gbif_df, 
        geometry=gpd.points_from_xy(
            bobcat_gbif_df.decimalLongitude, 
            bobcat_gbif_df.decimalLatitude), 
        crs="EPSG:4326")
    # Select the desired columns
    [['month', 'geometry']]
)

# Call the variable to see the table
bobcat_gbif_gdf

# Store the new version of your dataframe for other notebooks as needed
%store ecoregions_gdf bobcat_gbif_gdf

Stored 'ecoregions_gdf' (GeoDataFrame)
Stored 'bobcat_gbif_gdf' (GeoDataFrame)

# Normalize Data Part 1
## Perform a Spatial Join 

# Define new variable for this new geodataframe 
gbif_ecoregion_gdf = (
    ecoregions_gdf
    # Match the CRS of the GBIF data and the ecoregions
    .to_crs(bobcat_gbif_gdf.crs)
    # Find ecoregion for each observation
    .sjoin(
        bobcat_gbif_gdf,
        how='inner', 
        predicate='contains')
    # Select the required columns
    [['month','ECO_NAME','gbifID', 'OBJECTID']]

    # rename columns as needed
    .reset_index()
    .rename(columns={
       'ECO_NAME': 'name',
       'gbifID': 'observation_id',
       'OBJECTID': 'object_id'})
)

# Call this variable to see the table
gbif_ecoregion_gdf

# Normalize Data Part 2

# Define new dataframe variable to count the occurences
bobcat_occurrence_df = (
    gbif_ecoregion_gdf
    # For each ecoregion, for each month...
    .groupby(['ecoregion', 'month'])
    # ...count the number of occurrences
    .agg(occurrences=('observation_id', 'count'))
)

# Get rid of rare observations (possible misidentification?)
bobcat_occurrence_df = bobcat_occurrence_df[bobcat_occurrence_df.occurrences>1]

bobcat_occurrence_df

# Take the mean by ecoregion
mean_occurrences_by_ecoregion = (
     bobcat_occurrence_df
     .groupby(['ecoregion'])
     .mean()
 )
# Take the mean by month
mean_occurrences_by_month = (
     bobcat_occurrence_df
     .groupby(['month'])
     .mean()
 )

# Call this new variable to see the table
bobcat_occurrence_df

# Call the mean occurences by ecoregion to see this table
mean_occurrences_by_ecoregion

# Call the mean occurences by month to see this table
mean_occurrences_by_month

# Normalize the Data Part 3

# Normalize the occurences (bobcat_occurence_df) by space and time for sampling effort
bobcat_occurrence_df['norm_occurrences'] = (
    bobcat_occurrence_df 
    /mean_occurrences_by_month 
    /mean_occurrences_by_ecoregion
)

# Call this variable to see the new table
bobcat_occurrence_df

# Store the new version of your dataframe for other notebooks as needed
%store bobcat_occurrence_df

Stored 'bobcat_occurrence_df' (DataFrame)

#Plot Data Part 1
# Import Packages Needed for aking interactive maps with vector data as well as 
# calendar in order to get month names that will be used when creating the plot.

# Get month names
import calendar

# Libraries for Dynamic mapping
import cartopy
import cartopy.feature as cf
import cartopy.crs as ccrs
import geopandas as gpd
import geoviews as gv
import geoviews.feature as gf
import holoviews as hv
import hvplot.pandas
import panel as pn

/opt/conda/lib/python3.11/site-packages/dask/dataframe/__init__.py:42: FutureWarning: 
Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.

  warnings.warn(msg, FutureWarning)

# Plot Data Part 2

# Simplify the geometry to speed up processing
ecoregions_gdf.geometry = ecoregions_gdf.simplify(
    .01, preserve_topology=False
    )

# Change the CRS to Mercator for mapping
ecoregions_gdf = ecoregions_gdf.to_crs(ccrs.Mercator())

# Check that the plot runs in a reasonable amount of time
ecoregions_gdf.hvplot(
    x='Longitude',
    y='Latitude',
    geo=True, 
    crs=ccrs.Mercator()
    )

# Plot Data Part 3
# Map Migration Over Time - customize 

# Join the occurrences with the plotting GeoDataFrame
bobcat_occurrence_gdf = ecoregions_gdf.join(bobcat_occurrence_df)

# Get the plot bounds so they don't change with the slider
xmin, ymin, xmax, ymax = bobcat_occurrence_gdf.total_bounds

# Define the slider widget
slider = pn.widgets.DiscreteSlider(
    name='month', 
    options={calendar.month_name[i]: i for i in range(1, 13)}
)

# Plot occurrence by ecoregion and month
bobcat_migration_plot = (
    bobcat_occurrence_gdf
    .hvplot(
        c='norm_occurrences',
        groupby='month',
        # Use background tiles
        geo=True, crs=ccrs.Mercator(), tiles='CartoLight',
        title="Bobcat Migration Observations Over Time",
        x='Longitude',
        y='Latitude',
        xlim=(xmin, xmax), ylim=(ymin, ymax),
        frame_height=600,
        widgets={'month': slider},
        widget_location='bottom'
    )
)

# Save the plot
bobcat_migration_plot.save('bobcat_migration.html', embed=True)

# Show the plot
bobcat_migration_plot

WARNING:W-1005 (FIXED_SIZING_MODE): 'fixed' sizing mode requires width and height to be set: figure(id='p100575', ...)

BokehModel(combine_events=True, render_bundle={'docs_json': {'f9cd214a-5d81-4479-8c9c-7793729098ee': {'version…

	decimalLatitude	decimalLongitude	month
gbifID
4953158569	34.205588	-118.362920	3
4953055247	36.537634	-121.890603	10
4953008628	44.122360	-119.848505	11
4952902566	34.270494	-118.320036	9
4952869276	41.546645	-72.608720	1

	OBJECTID	ECO_NAME	BIOME_NUM	BIOME_NAME	REALM	ECO_BIOME_	NNH	ECO_ID	SHAPE_LENG	SHAPE_AREA	NNH_NAME	COLOR	COLOR_BIO	COLOR_NNH	LICENSE	geometry
ecoregion
0	1.0	Adelie Land tundra	11.0	Tundra	Antarctica	AN11	1	117	9.749780	0.038948	Half Protected	#63CFAB	#9ED7C2	#257339	CC-BY 4.0	MULTIPOLYGON (((158.7141 -69.60657, 158.71264 ...
1	2.0	Admiralty Islands lowland rain forests	1.0	Tropical & Subtropical Moist Broadleaf Forests	Australasia	AU01	2	135	4.800349	0.170599	Nature Could Reach Half Protected	#70A800	#38A700	#7BC141	CC-BY 4.0	MULTIPOLYGON (((147.28819 -2.57589, 147.2715 -...
2	3.0	Aegean and Western Turkey sclerophyllous and m...	12.0	Mediterranean Forests, Woodlands & Scrub	Palearctic	PA12	4	785	162.523044	13.844952	Nature Imperiled	#FF7F7C	#FE0000	#EE1E23	CC-BY 4.0	MULTIPOLYGON (((26.88659 35.32161, 26.88297 35...
3	4.0	Afghan Mountains semi-desert	13.0	Deserts & Xeric Shrublands	Palearctic	PA13	4	807	15.084037	1.355536	Nature Imperiled	#FA774D	#CC6767	#EE1E23	CC-BY 4.0	MULTIPOLYGON (((65.48655 34.71401, 65.52872 34...
4	5.0	Ahklun and Kilbuck Upland Tundra	11.0	Tundra	Nearctic	NE11	1	404	22.590087	8.196573	Half Protected	#4C82B6	#9ED7C2	#257339	CC-BY 4.0	MULTIPOLYGON (((-160.26404 58.64097, -160.2673...

	month	geometry
gbifID
4953158569	3	POINT (-118.36292 34.20559)
4953055247	10	POINT (-121.8906 36.53763)
4953008628	11	POINT (-119.8485 44.12236)
4952902566	9	POINT (-118.32004 34.27049)
4952869276	1	POINT (-72.60872 41.54664)
...	...	...
4011868162	1	POINT (-121.81632 37.43427)
4011836346	1	POINT (-122.44225 42.11348)
4011733344	1	POINT (-97.10298 32.58653)
4011611239	1	POINT (-121.7627 36.66162)
4011547228	1	POINT (-96.57154 32.53426)

	ecoregion	month	name	observation_id	object_id
0	16	10	Allegheny Highlands forests	4420901180	17.0
1	16	5	Allegheny Highlands forests	4116293969	17.0
2	16	7	Allegheny Highlands forests	4165965742	17.0
3	16	3	Allegheny Highlands forests	4535584198	17.0
4	16	2	Allegheny Highlands forests	4055045625	17.0
...	...	...	...	...	...
3592	833	6	Northern Rockies conifer forests	4438948603	839.0
3593	833	11	Northern Rockies conifer forests	4458403810	839.0
3594	833	2	Northern Rockies conifer forests	4067548544	839.0
3595	833	11	Northern Rockies conifer forests	4454006926	839.0
3596	833	9	Northern Rockies conifer forests	4414403437	839.0

	occurrences
ecoregion
16	2.333333
32	6.000000
33	3.300000
34	2.750000
43	4.200000
...	...
783	8.416667
790	19.666667
793	3.000000
832	2.000000
833	2.000000

Bobcat Migration¶

Species Description¶

Data Description¶

Ecoregion Data¶

GBIF Occurences Data¶

Methods Description¶

Bobcat Migration Over Time Plot- While this species does move around, they are not migratory. The map reflects the ecoregions they habitate as well as the time of year they would be more active.¶

	occurrences
month
1	10.129032
2	9.062500
3	9.303030
4	8.382353
5	8.540541
6	8.800000
7	7.517241
8	7.000000
9	7.800000
10	7.550000
11	6.952381
12	10.483871

		occurrences	norm_occurrences
ecoregion	month
16	5	3	0.150542
	9	2	0.109890
	10	2	0.113529
32	1	3	0.049363
32	2	8	0.147126
...	...	...	...
832	3	2	0.107492
832	9	2	0.128205
833	2	2	0.110345
	10	2	0.132450
	11	2	0.143836