What’s in the Database?

Below is a simple count of what is in the database.

[6]:
# Import all the python db table reflections
from snowexsql.data import SiteData, PointData, LayerData, ImageData

# Import the function to get access
from snowexsql.db import get_db


def get_all_distinct(attribute):
    """
    Function to count all the unique & valid entries of a column
    in a table.
    """
    final = {}
    final['Total'] = 0

    # Loop over all the tables
    tables = [PointData, LayerData, ImageData]
    for tbl_cls in tables:

        # Grab the column from the table class
        obj_att = getattr(tbl_cls, attribute)

        # Count all the distinct and non-None values
        result = session.query(obj_att).filter(obj_att != None).distinct().count()

        # Save the result and keep track of the total
        final[tbl_cls.__name__] = result
        final['Total'] += result

    return final


def print_result(count_dict):
    """
    Simple function to print out the counts nicely
    """

    print('\nDB Table Breakdown:')
    for k in ['PointData', 'LayerData', 'ImageData']:
        print(f'\t* {k} = {count_dict[k]}')
[7]:
# Get connection to the DB
db_name = 'db.snowexdata.org/snowex'
engine, session = get_db(db_name, credentials='./credentials.json')

DOIS

[8]:
dois = get_all_distinct('doi')
print(f'Published Datasets: {dois["Total"]}')
print_result(dois)
Published Datasets: 6

DB Table Breakdown:
        * PointData = 2
        * LayerData = 2
        * ImageData = 2

Datasets

[9]:
data_types = get_all_distinct('type')
print(f'Unique Data types: {data_types["Total"]}')
print_result(data_types)
Unique Data types: 19

DB Table Breakdown:
        * PointData = 3
        * LayerData = 9
        * ImageData = 7

Instruments

[10]:
instruments = get_all_distinct('instrument')
print(f'Unique Instruments: {instruments["Total"]}')
print_result(instruments)
Unique Instruments: 8

DB Table Breakdown:
        * PointData = 5
        * LayerData = 1
        * ImageData = 2

Pits

[13]:
pits = session.query(SiteData.site_id, SiteData.date).distinct().count()
print(f'Unique Pits: {pits}')
Unique Pits: 155
[5]:
session.close()
[ ]: