Module `snowpat.icsv.icsv_file`

Expand source code

import pandas as pd
import xarray as xr
from typing import Optional

from .header import MetaDataSection, FieldsSection, Geometry


VERSIONS = ["1.0"]
FIRSTLINES = [f"# iCSV {version} UTF-8" for version in VERSIONS]

class iCSVFile:
    """
    Class to represent an iCSV file.
    
    Attributes:
        metadata (MetadataSection): Metadata section of the iCSV file.
        fields (FieldsSection): Fields section of the iCSV file.
        geometry (Representation class): Geometry section of the iCSV file.
        data (pd.Dataframe): Data section of the iCSV file.
        filename: The name of the iCSV file.
        skip_lines: The number of lines to skip when reading the file.
        
    Methods:
        load_file(filename: str = None): Load an iCSV file.
        parse_geometry(): Parse the geometry section of the iCSV file.
        info(): Print a summary of the iCSV file.
        to_xarray(): Convert the iCSV file to an xarray dataset.
        setData(data: pd.DataFrame, colnames: Optional[list] = None): Set the data of the iCSV file.
        write(filename: str = None): Write the iCSV file to a file.
    """
    def __init__(self, filename:str = None):
        self.metadata = MetaDataSection()
        self.fields = FieldsSection()
        self.geometry = Geometry()
        self.data = None
        self.filename = filename
        self.skip_lines = 0
        
        if self.filename:
            self.load_file()
            
    
    def __str__(self) -> str:
        return f"File: {self.filename}\n{self.metadata}\n{self.fields}\n{self.geometry}"
    
    def __eq__(self, value: object) -> bool:
        try:
            for attr in ['metadata', 'fields', 'geometry']:
                self_value = getattr(self, attr)
                value_value = getattr(value, attr)
                
                if self_value != value_value:
                    return False
            return True
        except AttributeError:
            return False
    
    def _parse_comment_line(self, line, section):
        if line == "[METADATA]":
            return "metadata"
        elif line == "[FIELDS]":
            self.metadata.check_validity()  # to parse fields we need valid metadata
            return "fields"
        elif line == "[DATA]":
            return "data"
        else:
            return self._parse_section_line(line, section)

    def _parse_section_line(self, line, section):
        if not section:
            raise ValueError("No section specified")
        line_vals = line.split("=")
        if len(line_vals) != 2:
            raise ValueError(f"Invalid {section} line: {line}, got 2 assignment operators \"=\"")

        if section == "metadata":
            self.metadata.set_attribute(line_vals[0].strip(), line_vals[1].strip())
        elif section == "fields":
            fields_vec = [field.strip() for field in line_vals[1].split(self.metadata.get_attribute("field_delimiter"))]
            self.fields.set_attribute(line_vals[0].strip(), fields_vec)
        elif section == "data":
            raise TypeError("Data section should not contain any comments")

        return section

    def _update_columns(self):
        self.data.columns = self.fields.fields
        for field in ["time", "timestamp"]:
            if field in self.fields.fields:
                self.data[field] = pd.to_datetime(self.data[field])          
    
    def load_file(self, filename: str = None):
        """Loads an iCSV file and parses its contents.

        Args:
            filename (str, optional): The path to the iCSV file. If not provided, the previously set filename will be used.

        Raises:
            ValueError: If the file is not a valid iCSV file or if the data section is not specified.

        Returns:
            None
        """
        if filename:
            self.filename = filename
            
        section = ""
        with open(self.filename, 'r') as file:
            first_line = file.readline().rstrip()  # rstrip() is used to remove the trailing newline
            if first_line not in FIRSTLINES:
                raise ValueError("Not an iCSV file")
        
            line_number = 1 # need to find the line number where the data starts
            for line in file:
                if line.startswith("#"):
                    line_number += 1
                    line = line[1:].strip()
                    section = self._parse_comment_line(line.strip(), section)
                else:
                    if section != "data":
                        raise ValueError("Data section was not specified")
                    self.skip_lines = line_number
                    break
        
        self.data = pd.read_csv(self.filename, skiprows=self.skip_lines, header=None, sep=self.metadata.get_attribute("field_delimiter"))
        self.fields.check_validity(self.data.shape[1]) # check if the number of fields match the number of columns
        self._update_columns()           
        self.parse_geometry()
        
    def parse_geometry(self):
        if self.metadata.get_attribute("geometry") in self.fields.get_attribute("fields"):
            self.geometry.geometry = self.metadata.get_attribute("geometry")
            self.geometry.srid = self.metadata.get_attribute("srid")
            self.geometry.column_name = self.metadata.get_attribute("column_name")
        else:
            self.geometry.geometry = self.metadata.get_attribute("geometry")
            self.geometry.srid = self.metadata.get_attribute("srid")
            self.geometry.set_location()    
            
    def info(self):
        """
        Prints information about the object and its data.

        This method prints the object itself and the head of its data.

        Args:
            None

        Returns:
            None
        """
        print(self)
        print("\nData:")
        print(self.data.head())
    
    def to_xarray(self) -> xr.Dataset:
        """
        Converts the data to an xarray dataset.

        Returns:
            xarray.Dataset: The converted xarray dataset.
        """
        arr = self.data.to_xarray()
        arr.attrs = self.metadata.metadata
        for i,var in enumerate(arr.data_vars):
            for _, vec in self.fields.miscalleneous_fields.items():
                arr[var].attrs = vec[i]
                
    def setData(self, data: pd.DataFrame, colnames: Optional[list] = None):
        """
        Sets the data of the iCSV file.

        Args:
            data (pd.DataFrame): The data to set.
            colnames (list): The names of the columns in the data.

        Returns:
            None
        """
        self.data = data
        if colnames:
            if len(colnames) != self.data.shape[1]:
                raise ValueError("Number of columns in data does not match the number of column names")
            self.fields.set_attribute("fields", colnames)
        else:
            colnames = self.data.columns.to_list()
            if colnames[0] == "0" or colnames[0] == 0:
                raise ValueError("Column names are not provided")
            self.fields.set_attribute("fields", colnames)
                # Ensure 'timestamp' is the first column if it exists
        if 'timestamp' in self.data.columns:
            cols = self.data.columns.tolist()
            if cols[0] != 'timestamp':
                cols.insert(0, cols.pop(cols.index('timestamp')))
                self.data = self.data[cols]
            self.fields.set_attribute("fields", self.data.columns)

            
        
                
    def write(self, filename: str = None):
        """
        Writes the metadata, fields, and data to a CSV file.

        Args:
            filename (str, optional): The name of the file to write. If not provided, the current filename will be used.

        Returns:
            None
        """
        
        if filename:
            self.filename = filename
            
        self.metadata.check_validity()
        self.fields.check_validity(self.data.shape[1])
        
            

        
        with open(self.filename, 'w') as file:
            file.write(f"{FIRSTLINES[-1]}\n")
            file.write("# [METADATA]\n")
            for key, val in self.metadata.metadata.items():
                file.write(f"# {key} = {val}\n")
            file.write("# [FIELDS]\n")
            for key, val in self.fields.all_fields.items():
                fields_string = self.metadata.get_attribute("field_delimiter").join(str(value) for value in val)
                file.write(f"# {key} = {fields_string}\n")
            file.write("# [DATA]\n")
            
        self.data.to_csv(self.filename, mode='a', index=False, header=False, sep=self.metadata.get_attribute("field_delimiter"))

Classes

class iCSVFile (filename: str = None)

Class to represent an iCSV file.

Attributes

metadata : MetadataSection: Metadata section of the iCSV file.
fields : FieldsSection: Fields section of the iCSV file.
geometry : Representation class: Geometry section of the iCSV file.
data : pd.Dataframe: Data section of the iCSV file.
filename: The name of the iCSV file.
skip_lines: The number of lines to skip when reading the file.

Methods

load_file(filename: str = None): Load an iCSV file. parse_geometry(): Parse the geometry section of the iCSV file. info(): Print a summary of the iCSV file. to_xarray(): Convert the iCSV file to an xarray dataset. setData(data: pd.DataFrame, colnames: Optional[list] = None): Set the data of the iCSV file. write(filename: str = None): Write the iCSV file to a file.

Expand source code

class iCSVFile:
    """
    Class to represent an iCSV file.
    
    Attributes:
        metadata (MetadataSection): Metadata section of the iCSV file.
        fields (FieldsSection): Fields section of the iCSV file.
        geometry (Representation class): Geometry section of the iCSV file.
        data (pd.Dataframe): Data section of the iCSV file.
        filename: The name of the iCSV file.
        skip_lines: The number of lines to skip when reading the file.
        
    Methods:
        load_file(filename: str = None): Load an iCSV file.
        parse_geometry(): Parse the geometry section of the iCSV file.
        info(): Print a summary of the iCSV file.
        to_xarray(): Convert the iCSV file to an xarray dataset.
        setData(data: pd.DataFrame, colnames: Optional[list] = None): Set the data of the iCSV file.
        write(filename: str = None): Write the iCSV file to a file.
    """
    def __init__(self, filename:str = None):
        self.metadata = MetaDataSection()
        self.fields = FieldsSection()
        self.geometry = Geometry()
        self.data = None
        self.filename = filename
        self.skip_lines = 0
        
        if self.filename:
            self.load_file()
            
    
    def __str__(self) -> str:
        return f"File: {self.filename}\n{self.metadata}\n{self.fields}\n{self.geometry}"
    
    def __eq__(self, value: object) -> bool:
        try:
            for attr in ['metadata', 'fields', 'geometry']:
                self_value = getattr(self, attr)
                value_value = getattr(value, attr)
                
                if self_value != value_value:
                    return False
            return True
        except AttributeError:
            return False
    
    def _parse_comment_line(self, line, section):
        if line == "[METADATA]":
            return "metadata"
        elif line == "[FIELDS]":
            self.metadata.check_validity()  # to parse fields we need valid metadata
            return "fields"
        elif line == "[DATA]":
            return "data"
        else:
            return self._parse_section_line(line, section)

    def _parse_section_line(self, line, section):
        if not section:
            raise ValueError("No section specified")
        line_vals = line.split("=")
        if len(line_vals) != 2:
            raise ValueError(f"Invalid {section} line: {line}, got 2 assignment operators \"=\"")

        if section == "metadata":
            self.metadata.set_attribute(line_vals[0].strip(), line_vals[1].strip())
        elif section == "fields":
            fields_vec = [field.strip() for field in line_vals[1].split(self.metadata.get_attribute("field_delimiter"))]
            self.fields.set_attribute(line_vals[0].strip(), fields_vec)
        elif section == "data":
            raise TypeError("Data section should not contain any comments")

        return section

    def _update_columns(self):
        self.data.columns = self.fields.fields
        for field in ["time", "timestamp"]:
            if field in self.fields.fields:
                self.data[field] = pd.to_datetime(self.data[field])          
    
    def load_file(self, filename: str = None):
        """Loads an iCSV file and parses its contents.

        Args:
            filename (str, optional): The path to the iCSV file. If not provided, the previously set filename will be used.

        Raises:
            ValueError: If the file is not a valid iCSV file or if the data section is not specified.

        Returns:
            None
        """
        if filename:
            self.filename = filename
            
        section = ""
        with open(self.filename, 'r') as file:
            first_line = file.readline().rstrip()  # rstrip() is used to remove the trailing newline
            if first_line not in FIRSTLINES:
                raise ValueError("Not an iCSV file")
        
            line_number = 1 # need to find the line number where the data starts
            for line in file:
                if line.startswith("#"):
                    line_number += 1
                    line = line[1:].strip()
                    section = self._parse_comment_line(line.strip(), section)
                else:
                    if section != "data":
                        raise ValueError("Data section was not specified")
                    self.skip_lines = line_number
                    break
        
        self.data = pd.read_csv(self.filename, skiprows=self.skip_lines, header=None, sep=self.metadata.get_attribute("field_delimiter"))
        self.fields.check_validity(self.data.shape[1]) # check if the number of fields match the number of columns
        self._update_columns()           
        self.parse_geometry()
        
    def parse_geometry(self):
        if self.metadata.get_attribute("geometry") in self.fields.get_attribute("fields"):
            self.geometry.geometry = self.metadata.get_attribute("geometry")
            self.geometry.srid = self.metadata.get_attribute("srid")
            self.geometry.column_name = self.metadata.get_attribute("column_name")
        else:
            self.geometry.geometry = self.metadata.get_attribute("geometry")
            self.geometry.srid = self.metadata.get_attribute("srid")
            self.geometry.set_location()    
            
    def info(self):
        """
        Prints information about the object and its data.

        This method prints the object itself and the head of its data.

        Args:
            None

        Returns:
            None
        """
        print(self)
        print("\nData:")
        print(self.data.head())
    
    def to_xarray(self) -> xr.Dataset:
        """
        Converts the data to an xarray dataset.

        Returns:
            xarray.Dataset: The converted xarray dataset.
        """
        arr = self.data.to_xarray()
        arr.attrs = self.metadata.metadata
        for i,var in enumerate(arr.data_vars):
            for _, vec in self.fields.miscalleneous_fields.items():
                arr[var].attrs = vec[i]
                
    def setData(self, data: pd.DataFrame, colnames: Optional[list] = None):
        """
        Sets the data of the iCSV file.

        Args:
            data (pd.DataFrame): The data to set.
            colnames (list): The names of the columns in the data.

        Returns:
            None
        """
        self.data = data
        if colnames:
            if len(colnames) != self.data.shape[1]:
                raise ValueError("Number of columns in data does not match the number of column names")
            self.fields.set_attribute("fields", colnames)
        else:
            colnames = self.data.columns.to_list()
            if colnames[0] == "0" or colnames[0] == 0:
                raise ValueError("Column names are not provided")
            self.fields.set_attribute("fields", colnames)
                # Ensure 'timestamp' is the first column if it exists
        if 'timestamp' in self.data.columns:
            cols = self.data.columns.tolist()
            if cols[0] != 'timestamp':
                cols.insert(0, cols.pop(cols.index('timestamp')))
                self.data = self.data[cols]
            self.fields.set_attribute("fields", self.data.columns)

            
        
                
    def write(self, filename: str = None):
        """
        Writes the metadata, fields, and data to a CSV file.

        Args:
            filename (str, optional): The name of the file to write. If not provided, the current filename will be used.

        Returns:
            None
        """
        
        if filename:
            self.filename = filename
            
        self.metadata.check_validity()
        self.fields.check_validity(self.data.shape[1])
        
            

        
        with open(self.filename, 'w') as file:
            file.write(f"{FIRSTLINES[-1]}\n")
            file.write("# [METADATA]\n")
            for key, val in self.metadata.metadata.items():
                file.write(f"# {key} = {val}\n")
            file.write("# [FIELDS]\n")
            for key, val in self.fields.all_fields.items():
                fields_string = self.metadata.get_attribute("field_delimiter").join(str(value) for value in val)
                file.write(f"# {key} = {fields_string}\n")
            file.write("# [DATA]\n")
            
        self.data.to_csv(self.filename, mode='a', index=False, header=False, sep=self.metadata.get_attribute("field_delimiter"))

Subclasses

iCSVProfiles

Methods

def info(self)

Prints information about the object and its data.

This method prints the object itself and the head of its data.

Args

None

Returns

None

Expand source code

def info(self):
    """
    Prints information about the object and its data.

    This method prints the object itself and the head of its data.

    Args:
        None

    Returns:
        None
    """
    print(self)
    print("\nData:")
    print(self.data.head())

def load_file(self, filename: str = None)

Loads an iCSV file and parses its contents.

Args

filename : str, optional: The path to the iCSV file. If not provided, the previously set filename will be used.

Raises

ValueError: If the file is not a valid iCSV file or if the data section is not specified.

Returns

None

Expand source code

def load_file(self, filename: str = None):
    """Loads an iCSV file and parses its contents.

    Args:
        filename (str, optional): The path to the iCSV file. If not provided, the previously set filename will be used.

    Raises:
        ValueError: If the file is not a valid iCSV file or if the data section is not specified.

    Returns:
        None
    """
    if filename:
        self.filename = filename
        
    section = ""
    with open(self.filename, 'r') as file:
        first_line = file.readline().rstrip()  # rstrip() is used to remove the trailing newline
        if first_line not in FIRSTLINES:
            raise ValueError("Not an iCSV file")
    
        line_number = 1 # need to find the line number where the data starts
        for line in file:
            if line.startswith("#"):
                line_number += 1
                line = line[1:].strip()
                section = self._parse_comment_line(line.strip(), section)
            else:
                if section != "data":
                    raise ValueError("Data section was not specified")
                self.skip_lines = line_number
                break
    
    self.data = pd.read_csv(self.filename, skiprows=self.skip_lines, header=None, sep=self.metadata.get_attribute("field_delimiter"))
    self.fields.check_validity(self.data.shape[1]) # check if the number of fields match the number of columns
    self._update_columns()           
    self.parse_geometry()

def parse_geometry(self)

Expand source code

def parse_geometry(self):
    if self.metadata.get_attribute("geometry") in self.fields.get_attribute("fields"):
        self.geometry.geometry = self.metadata.get_attribute("geometry")
        self.geometry.srid = self.metadata.get_attribute("srid")
        self.geometry.column_name = self.metadata.get_attribute("column_name")
    else:
        self.geometry.geometry = self.metadata.get_attribute("geometry")
        self.geometry.srid = self.metadata.get_attribute("srid")
        self.geometry.set_location()

def setData(self, data: pandas.core.frame.DataFrame, colnames: Optional[list] = None)

Sets the data of the iCSV file.

Args

data : pd.DataFrame: The data to set.
colnames : list: The names of the columns in the data.

Returns

None

Expand source code

def setData(self, data: pd.DataFrame, colnames: Optional[list] = None):
    """
    Sets the data of the iCSV file.

    Args:
        data (pd.DataFrame): The data to set.
        colnames (list): The names of the columns in the data.

    Returns:
        None
    """
    self.data = data
    if colnames:
        if len(colnames) != self.data.shape[1]:
            raise ValueError("Number of columns in data does not match the number of column names")
        self.fields.set_attribute("fields", colnames)
    else:
        colnames = self.data.columns.to_list()
        if colnames[0] == "0" or colnames[0] == 0:
            raise ValueError("Column names are not provided")
        self.fields.set_attribute("fields", colnames)
            # Ensure 'timestamp' is the first column if it exists
    if 'timestamp' in self.data.columns:
        cols = self.data.columns.tolist()
        if cols[0] != 'timestamp':
            cols.insert(0, cols.pop(cols.index('timestamp')))
            self.data = self.data[cols]
        self.fields.set_attribute("fields", self.data.columns)

def to_xarray(self) ‑> xarray.core.dataset.Dataset

Converts the data to an xarray dataset.

Returns

xarray.Dataset: The converted xarray dataset.

Expand source code

def to_xarray(self) -> xr.Dataset:
    """
    Converts the data to an xarray dataset.

    Returns:
        xarray.Dataset: The converted xarray dataset.
    """
    arr = self.data.to_xarray()
    arr.attrs = self.metadata.metadata
    for i,var in enumerate(arr.data_vars):
        for _, vec in self.fields.miscalleneous_fields.items():
            arr[var].attrs = vec[i]

def write(self, filename: str = None)

Writes the metadata, fields, and data to a CSV file.

Args

filename : str, optional: The name of the file to write. If not provided, the current filename will be used.

Returns

None

Expand source code

def write(self, filename: str = None):
    """
    Writes the metadata, fields, and data to a CSV file.

    Args:
        filename (str, optional): The name of the file to write. If not provided, the current filename will be used.

    Returns:
        None
    """
    
    if filename:
        self.filename = filename
        
    self.metadata.check_validity()
    self.fields.check_validity(self.data.shape[1])
    
        

    
    with open(self.filename, 'w') as file:
        file.write(f"{FIRSTLINES[-1]}\n")
        file.write("# [METADATA]\n")
        for key, val in self.metadata.metadata.items():
            file.write(f"# {key} = {val}\n")
        file.write("# [FIELDS]\n")
        for key, val in self.fields.all_fields.items():
            fields_string = self.metadata.get_attribute("field_delimiter").join(str(value) for value in val)
            file.write(f"# {key} = {fields_string}\n")
        file.write("# [DATA]\n")
        
    self.data.to_csv(self.filename, mode='a', index=False, header=False, sep=self.metadata.get_attribute("field_delimiter"))