Module snowpat.pysmet
Expand source code
from .pymset import read, locFromEPSG, locFromLatLon
from .SMET import SMETFile
__all__ = ['read', 'locFromEPSG', 'locFromLatLon', 'SMETFile']
Sub-modules
snowpat.pysmet.MetaDatasnowpat.pysmet.SMETsnowpat.pysmet.pymset
Functions
def locFromEPSG(epsg: int, x: float, y: float, z: Optional[float] = None) ‑> Location-
Create a Location object from EPSG code, x, y, and optional z coordinates.
Args
epsg:int- The EPSG code of the location.
x:float- The x-coordinate of the location.
y:float- The y-coordinate of the location.
z:float, optional- The z-coordinate of the location. Defaults to None.
Returns
Location- The created Location object.
Expand source code
def locFromEPSG(epsg: int, x: float, y: float, z:Optional[float]=None) -> Location: """ Create a Location object from EPSG code, x, y, and optional z coordinates. Args: epsg (int): The EPSG code of the location. x (float): The x-coordinate of the location. y (float): The y-coordinate of the location. z (float, optional): The z-coordinate of the location. Defaults to None. Returns: Location: The created Location object. """ loc = Location() loc.epsg = epsg loc.easting = x loc.northing = y loc.altitude = z return loc def locFromLatLon(lat: float, lon: float, alt: Optional[float] = None) ‑> Location-
Create a Location object from latitude, longitude, and altitude.
Args
lat:float- The latitude value.
lon:float- The longitude value.
alt:float, optional- The altitude value. Defaults to None.
Returns
Location- The created Location object.
Expand source code
def locFromLatLon(lat: float, lon: float, alt: Optional[float]=None) -> Location: """ Create a Location object from latitude, longitude, and altitude. Args: lat (float): The latitude value. lon (float): The longitude value. alt (float, optional): The altitude value. Defaults to None. Returns: Location: The created Location object. """ loc = Location() loc.latitude = lat loc.longitude = lon loc.altitude = alt return loc def read(filename: str) ‑> SMETFile-
Reads a file in SMET format and returns a SMETFile object representing the file.
This function reads a file in SMET format and creates a SMET object that provides access to the data and metadata in the file. The data can be accessed via the
dataattribute of the returned object. The metadata is divided into three categories: standard metadata, optional metadata, and ACDD metadata, which can be accessed via themeta_data.key,optional_meta_data.key, andacdd_meta_data.keyattributes, respectively.Args
filename:str- The path to the SMET file.
Returns
SMETFile- A SMET object representing the file. The following attributes are available:
-
data: The data from the SMET file, as a pandas DataFrame. -meta_data.key: The standard metadata from the SMET file. -optional_meta_data.key: The optional metadata from the SMET file. -acdd_meta_data.key: The ACDD (Attribute Convention for Data Discovery) metadata from the SMET file.
Examples
>>> import pysmet as smet >>> file = smet.read("path/to/smet/file") >>> data = smet.data >>> data_numpy = smet.toNumpy() >>> location_lat = smet.meta_data.location.lat >>> location_lon = smet.meta_data.location.lon >>> acdd_creator_name = smet.acdd_meta_data.get_attribute("creator_name") >>> smet.info() # Print a summary of the SMET fileExpand source code
def read(filename: str) -> SMETFile: """Reads a file in SMET format and returns a SMETFile object representing the file. This function reads a file in SMET format and creates a SMET object that provides access to the data and metadata in the file. The data can be accessed via the `data` attribute of the returned object. The metadata is divided into three categories: standard metadata, optional metadata, and ACDD metadata, which can be accessed via the `meta_data.key`, `optional_meta_data.key`, and `acdd_meta_data.key` attributes, respectively. Args: filename (str): The path to the SMET file. Returns: SMETFile: A SMET object representing the file. The following attributes are available: - `data`: The data from the SMET file, as a pandas DataFrame. - `meta_data.key`: The standard metadata from the SMET file. - `optional_meta_data.key`: The optional metadata from the SMET file. - `acdd_meta_data.key`: The ACDD (Attribute Convention for Data Discovery) metadata from the SMET file. Examples: >>> import pysmet as smet >>> file = smet.read("path/to/smet/file") >>> data = smet.data >>> data_numpy = smet.toNumpy() >>> location_lat = smet.meta_data.location.lat >>> location_lon = smet.meta_data.location.lon >>> acdd_creator_name = smet.acdd_meta_data.get_attribute("creator_name") >>> smet.info() # Print a summary of the SMET file """ num_header_lines = 0 with open(filename, 'r') as f: for line in f: num_header_lines += 1 if "[DATA]" in line: break return SMETFile(filename, read=True, num_header_lines=num_header_lines)
Classes
class SMETFile (filename, read: bool, num_header_lines: int = None, fun: bool = False)-
A class used to represent a SMET file.
Attributes
identifier:str- A string that represents the identifier of the SMET data.
meta_data:snowpat.pysmet.MetaData- An instance of the MetaData class that stores the mandatory metadata of the SMET data.
data:pd.DataFrame- A pandas DataFrame that stores the SMET data.
optional_meta_data:OptionalMetadata- A dictionary that stores the optional metadata of the SMET data.
acdd_meta_data:ACDDMetadata- An instance of the ACDDMetadata class that stores the ACDD metadata of the SMET data.
other_meta_data(dict): A dictionary that stores the other metadata of the SMET data.
Expand source code
class SMETFile: """A class used to represent a SMET file. Attributes: identifier (str): A string that represents the identifier of the SMET data. meta_data (MetaData): An instance of the MetaData class that stores the mandatory metadata of the SMET data. data (pd.DataFrame): A pandas DataFrame that stores the SMET data. optional_meta_data (OptionalMetadata): A dictionary that stores the optional metadata of the SMET data. acdd_meta_data (ACDDMetadata): An instance of the ACDDMetadata class that stores the ACDD metadata of the SMET data. other_meta_data(dict): A dictionary that stores the other metadata of the SMET data. """ def __init__(self, filename, read: bool, num_header_lines: int = None, fun:bool = False) -> None: if not os.path.isfile(filename) and read: raise FileNotFoundError(f"The file {filename} does not exist.") self.fun = fun self.num_header_lines = num_header_lines if read else 1 self.identifier = None if read else self.getIdentifier() self.optional_meta_data = None if read else OptionalMetaData() self.filename = filename self.data_header = None if read else "" self.acdd_meta_data = ACDDMetadata() self.other_meta_data = dict() self.meta_data = self.read_meta_data() if read else MetaData() self.data = self.read_data() if read else pd.DataFrame() @property def all_meta_data(self): return {**self.meta_data.combined_dict, **self.optional_meta_data.adjusted_dict, **self.acdd_meta_data.adjusted_dict, **self.other_meta_data} def read_data(self) -> pd.DataFrame: if not self.meta_data: print( "Something went wrong reading MetaData, in the worst case please contact patrick.leibersperger@slf.ch" ) if self.meta_data.fields: self.data_header = self.meta_data.fields else: print("Unable to read data header from SMET file. Using no header.") data = pd.read_csv( self.filename, delimiter="\s+", skiprows=self.num_header_lines, header=None ) if self.meta_data.nodata: data = data.replace(self.meta_data.nodata, np.nan) if self.data_header: data.columns = self.data_header data["timestamp"] = pd.to_datetime(data["timestamp"], format="ISO8601") return data def read_meta_data(self) -> MetaData: meta_data = MetaData() optional_meta_data = OptionalMetaData() with open(self.filename, "r") as f: self.identifier = next(f).strip() for line in f: if "[DATA]" in line: break if "[HEADER]" in line: continue val = line.split("=")[1].strip() if "station_id" in line: meta_data.station_id = val elif "latitude" in line: meta_data.location.latitude = float(val) elif "longitude" in line: meta_data.location.longitude = float(val) elif "altitude" in line: meta_data.location.altitude = float(val) elif "easting" in line: meta_data.location.easting = float(val) elif "northing" in line: meta_data.location.northing = float(val) elif "epsg" in line: meta_data.location.epsg = int(val) elif "nodata" in line: meta_data.nodata = float(val) elif "fields" in line: meta_data.fields = self.__parseFields(val.split()) elif "station_name" in line: optional_meta_data.station_name = val elif "tz" == line.split("=")[0].strip(): optional_meta_data.tz = int(val) elif "slope_angle" in line: optional_meta_data.slope_angle = float(val) elif "slope_azi" in line: optional_meta_data.slope_azi = float(val) elif "creation" in line: optional_meta_data.creation = val elif "source" in line: optional_meta_data.source = val elif "units_offset" in line: optional_meta_data.units_offset = list(map(float, val.split())) elif "units_multiplier" in line: optional_meta_data.units_multiplier = list(map(float, val.split())) elif "comment" in line: optional_meta_data.comment = val elif "acdd" in line: key, value = line.split("=") self.acdd_meta_data.set_attribute(key.strip(), value.strip()) else: self.other_meta_data[line.split("=")[0].strip()] = line.split("=")[1].strip() unknown_metadata = [key for key in self.other_meta_data.keys()] if unknown_metadata: if self.fun: print(bad()) print(empire()) print(f"Unknown metadata: {unknown_metadata}") meta_data.checkValidity(self.fun) self.optional_meta_data = optional_meta_data return meta_data def __parseFields(self, fields: List[str]): possible_fields = [ "P", "TA", "TSS", "TSG", "RH", "VW", "DW", "ISWR", "RSWR", "ILWR", "OLWR", "PINT", "PSUM", "HS", "timestamp", "julian", ] non_conforming_fields = [ field for field in fields if field not in possible_fields ] if non_conforming_fields: if self.fun: print(not_way()) print(yoda()) print( "The following fields do not conform to the SMET standard:", non_conforming_fields, ) elif self.fun and not non_conforming_fields: print(way()) print(mando()) return fields def __str__(self): return ( f"SMETFile:\n" f"{self.meta_data}\n" f"Optional MetaData: {self.optional_meta_data}\n" f"{self.acdd_meta_data}\n" f"Other MetaData: {self.other_meta_data}\n" f"Data:\n{self.data.head()}" ) def toNumpy(self) -> np.ndarray: """Return a Numpy array of the data. This method uses the pandas DataFrame's to_numpy() method to convert the DataFrame to a Numpy array. Returns: np.ndarray: The data as a Numpy array. Examples: >>> instance = pysmet.read("path/to/file.smet") >>> array = instance.toNumpy() """ return self.data.to_numpy() def toDf(self) -> pd.DataFrame: """Return the data as a pandas DataFrame. This method returns the data stored in the instance as a pandas DataFrame. Returns: pd.DataFrame: The data as a pandas DataFrame. Examples: >>> instance = pysmet.read("path/to/file.smet") >>> df = instance.toDf() """ return self.data def toXarray(self, time_name: str = "time") -> xr.DataArray: """Return the data as an xarray Dataset. This method returns the data stored in the instance as an xarray Dataset. Returns: xr.Dataset: The data as an xarray Dataset. Examples: >>> instance = pysmet.read("path/to/file.smet") >>> ds = instance.toXarray() """ df = self.data.copy() sp_da = xr.DataArray( df.drop(columns='timestamp').values, # Data values dims=[time_name, 'variables'], # Names of dimensions coords={time_name: df['timestamp'].values, # Set 'timestamp' as coordinate 'variables': df.drop(columns='timestamp').columns} # Column names as coordinate ) return sp_da def info(self): """ Print a summary of the SMET file. """ print(self) def write(self, output_filename: str = None): """Writes the SMET file to disk. This method writes the SMET file , with the given metadata. If the 'timestamp' column exists in the data, it is formatted as an ISO 8601 string. Args: output_filename (str, optional): The path to the output file. If not provided, the original filename is used. Returns: None """ output_filename = output_filename if output_filename else self.filename # Check if fields in MetaData match columns in data if self.meta_data.fields != self.data.columns.to_list(): print("Fields in MetaData do not match columns in data. Using data columns.") print("MetaData fields: ", self.meta_data.fields) print("Data columns: ", self.data.columns.to_list()) self.meta_data.fields = self.data.columns.to_list() self.meta_data.checkValidity() out_data = self.data.copy() if self.meta_data.nodata: out_data = out_data.fillna(self.meta_data.nodata) if "timestamp" in out_data.columns: if pd.api.types.is_datetime64_any_dtype(out_data["timestamp"].dtype): out_data["timestamp"] = out_data["timestamp"].apply(lambda x: x.isoformat()) elif pd.api.types.is_string_dtype(out_data["timestamp"].dtype): out_data["timestamp"] = pd.to_datetime(out_data["timestamp"], errors='coerce').apply( lambda x: x.isoformat() if pd.notnull(x) else 'Invalid timestamp' ) if out_data["timestamp"].str.contains('Invalid timestamp').any(): print("Some timestamps could not be converted to datetime.") else: print("The 'timestamp' column is neither in string nor datetime format.") elif pd.api.types.is_datetime64_any_dtype(out_data.index): if "timestamp" in out_data.columns: print("Warning: 'timestamp' column exists, and the index is a datetime. Using 'timestamp' column. (Index will be ignored)") else: print("Using datetime index as 'timestamp' column.") out_data["timestamp"] = out_data.index.to_series().apply(lambda x: x.isoformat()) else: print("Info: You are not using any timestamp information.") with open(output_filename, "w") as f: # Write identifier f.write(self.identifier + "\n") # Write [HEADER] f.write("[HEADER]\n") # Write metadata for key, value in self.meta_data.combined_dict.items(): f.write(f"{key} = {value}\n") # Write optional metadata if it exists if self.optional_meta_data: for key, value in self.optional_meta_data.adjusted_dict.items(): f.write(f"{key} = {value}\n") # Write ACDD metadata if it exists if self.acdd_meta_data: for key, value in self.acdd_meta_data.adjusted_dict.items(): f.write(f"{key} = {value}\n") # Write any other metadata if self.other_meta_data: for key, value in self.other_meta_data.items(): f.write(f"{key} = {value}\n") # Write [DATA] f.write("[DATA]\n") # Check if "timestamp" is in columns timestamp_exists = "timestamp" in self.data.columns # Write data for row in out_data.itertuples(index=False): if timestamp_exists: f.write( "\t".join( ( f"{str(item):<20}" if field == "timestamp" else f"{str(item):<10}" ) for field, item in zip(self.data.columns, row) ) + "\n" ) else: f.write("\t".join(f"{str(item):<20}" for item in row) + "\n") # for creating a smet file from scratch def getIdentifier(self, version=1.1) -> str: """Get the identifier for the SMET file. The identifier is the first line in a SMET file and is used to identify the file format. It is generated based on the provided version. Args: version (float, optional): The version number of the SMET file. Default is 1.1. Returns: str: The identifier for the SMET file. """ self.num_header_lines += 1 return f"SMET {version} ASCII" def setIdentifier(self, version=1.1) -> None: """Set the identifier for the SMET file. The identifier is the first line in a SMET file and is used to identify the file format. It is generated based on the provided version. Args: version (float, optional): The version number of the SMET file. Default is 1.1. Returns: None """ self.num_header_lines += 1 self.identifier = f"SMET {version} ASCII" def setData(self, data: pd.DataFrame, colnames: Optional[List[str]] = None) -> None: """ Sets the data for the SMET object. Args: data (pd.DataFrame): The data to be set. colnames (Optional[List[str]], optional): A list of column names to be used as fields in the MetaData. Defaults to None. Raises: ValueError: If `colnames` is not provided when the first field in the data is "0" or 0. Returns: None """ self.data = data fields = data.columns.to_list() if fields[0] == "0" or fields[0] == 0: if not colnames: raise ValueError("Please provide a meaningful list of fields in the MetaData") else: print("Using fields: ", colnames) self.meta_data.fields = colnames else: print("Using fields: ", fields) self.meta_data.fields = fields def fromNumpy( self, data: np.ndarray, header: List[str], timestamp: Optional[List[str]] = None ) -> None: """ Load data from a numpy array into the SMET object. Args: data (np.ndarray): The numpy array containing the data. header (List[str]): The list of column names for the data. timestamp (Optional[List[str]], optional): The list of timestamps for the data. Defaults to None. Returns: None """ self.data = pd.DataFrame(data, columns=header) self.meta_data.fields = header self.num_header_lines += 1 self.data_header = header if timestamp: self.data["timestamp"] = pd.to_datetime(timestamp) def setMetaData(self, key: str, value): """ Sets the metadata attribute with the given key to the specified value. If the metadata attribute does not exist, it is added to the other_meta_data dictionary. Args: key (str): The key of the metadata attribute. value: The value to set for the metadata attribute. Necessary Metadata: station_id: str location: Location nodata: float fields: List[str] Supported ACDD Keys: title summary keywords conventions id naming_authority source history comment date_created creator_name creator_url creator_email institution processing_level project geospatial_bounds geospatial_lat_min geospatial_lat_max geospatial_lon_min geospatial_lon_max geospatial_vertical_min geospatial_vertical_max time_coverage_start time_coverage_end Wigos ID Returns: None """ if hasattr(self.meta_data, key): setattr(self.meta_data, key, value) elif "acdd" in key or (key in self.acdd_meta_data.attributes.keys()): print(f"Adding {key} to acdd_meta_data.") self.acdd_meta_data.set_attribute(key, value) elif hasattr(self.optional_meta_data, key): setattr(self.optional_meta_data, key, value) else: print( f"MetaData does not have attribute {key}. Adding to other_meta_data." ) self.other_meta_data[key] = value self.num_header_lines += 1 def mergeFromFile(self, other_filename:str, override:bool = False): """ Merge the data of the current SMET file with the data of another SMET file. Args: other_filename (str): The path to the other SMET file. override (bool, optional): If True, duplicates in the data of the current SMET file are overridden by the data of the other SMET file. If False, only missing values are filld. Defaults to False. Returns: None """ n_header = 0 with open(other_filename, 'r') as f: for line in f: n_header += 1 if "[DATA]" in line: break other = SMETFile(other_filename, read=True, num_header_lines=n_header) self.merge(self, other, override) def merge(self, other:'SMETFile', override=False): """ Merge the data of the current SMET file with the data of another SMET file. Args: other (SMETFile): The other SMET file. override (bool, optional): If True, duplicates in the data of the current SMET file are overridden by the data of the other SMET file. If False, only missing values are filld. Defaults to False. Returns: None """ if self.meta_data != other.meta_data: raise ValueError("The MetaData of the two SMET files are not equal.") self.optional_meta_data.join(other.optional_meta_data) self.acdd_meta_data.join(other.acdd_meta_data) # Set "timestamp" as index for both dataframes self.data.set_index("timestamp", inplace=True) other.data.set_index("timestamp", inplace=True) if override: self.data.update(other.data) else: self.data = self.data.combine_first(other.data) self.data = pd.concat([self.data, other.data]) # timestamp is now a column again self.data.reset_index(inplace=True) # Sort by "timestamp" self.data.sort_values("timestamp", inplace=True) # Drop duplicates, keeping the last occurrence self.data.drop_duplicates(subset="timestamp", keep="first", inplace=True) # Reset the index again after dropping duplicates self.data.reset_index(drop=True, inplace=True) self.meta_data.checkValidity() print("Merged data from", other.filename)Instance variables
var all_meta_data-
Expand source code
@property def all_meta_data(self): return {**self.meta_data.combined_dict, **self.optional_meta_data.adjusted_dict, **self.acdd_meta_data.adjusted_dict, **self.other_meta_data}
Methods
def fromNumpy(self, data: numpy.ndarray, header: List[str], timestamp: Optional[List[str]] = None) ‑> None-
Load data from a numpy array into the SMET object.
Args
data:np.ndarray- The numpy array containing the data.
header:List[str]- The list of column names for the data.
timestamp:Optional[List[str]], optional- The list of timestamps for the data. Defaults to None.
Returns
None
Expand source code
def fromNumpy( self, data: np.ndarray, header: List[str], timestamp: Optional[List[str]] = None ) -> None: """ Load data from a numpy array into the SMET object. Args: data (np.ndarray): The numpy array containing the data. header (List[str]): The list of column names for the data. timestamp (Optional[List[str]], optional): The list of timestamps for the data. Defaults to None. Returns: None """ self.data = pd.DataFrame(data, columns=header) self.meta_data.fields = header self.num_header_lines += 1 self.data_header = header if timestamp: self.data["timestamp"] = pd.to_datetime(timestamp) def getIdentifier(self, version=1.1) ‑> str-
Get the identifier for the SMET file.
The identifier is the first line in a SMET file and is used to identify the file format. It is generated based on the provided version.
Args
version:float, optional- The version number of the SMET file. Default is 1.1.
Returns
str- The identifier for the SMET file.
Expand source code
def getIdentifier(self, version=1.1) -> str: """Get the identifier for the SMET file. The identifier is the first line in a SMET file and is used to identify the file format. It is generated based on the provided version. Args: version (float, optional): The version number of the SMET file. Default is 1.1. Returns: str: The identifier for the SMET file. """ self.num_header_lines += 1 return f"SMET {version} ASCII" def info(self)-
Print a summary of the SMET file.
Expand source code
def info(self): """ Print a summary of the SMET file. """ print(self) def merge(self, other: SMETFile, override=False)-
Merge the data of the current SMET file with the data of another SMET file.
Args
other:SMETFile- The other SMET file.
override:bool, optional- If True, duplicates in the data of the current SMET file are overridden by the data of the other SMET file. If False, only missing values are filld. Defaults to False.
Returns
None
Expand source code
def merge(self, other:'SMETFile', override=False): """ Merge the data of the current SMET file with the data of another SMET file. Args: other (SMETFile): The other SMET file. override (bool, optional): If True, duplicates in the data of the current SMET file are overridden by the data of the other SMET file. If False, only missing values are filld. Defaults to False. Returns: None """ if self.meta_data != other.meta_data: raise ValueError("The MetaData of the two SMET files are not equal.") self.optional_meta_data.join(other.optional_meta_data) self.acdd_meta_data.join(other.acdd_meta_data) # Set "timestamp" as index for both dataframes self.data.set_index("timestamp", inplace=True) other.data.set_index("timestamp", inplace=True) if override: self.data.update(other.data) else: self.data = self.data.combine_first(other.data) self.data = pd.concat([self.data, other.data]) # timestamp is now a column again self.data.reset_index(inplace=True) # Sort by "timestamp" self.data.sort_values("timestamp", inplace=True) # Drop duplicates, keeping the last occurrence self.data.drop_duplicates(subset="timestamp", keep="first", inplace=True) # Reset the index again after dropping duplicates self.data.reset_index(drop=True, inplace=True) self.meta_data.checkValidity() print("Merged data from", other.filename) def mergeFromFile(self, other_filename: str, override: bool = False)-
Merge the data of the current SMET file with the data of another SMET file.
Args
other_filename:str- The path to the other SMET file.
override:bool, optional- If True, duplicates in the data of the current SMET file are overridden by the data of the other SMET file. If False, only missing values are filld. Defaults to False.
Returns
None
Expand source code
def mergeFromFile(self, other_filename:str, override:bool = False): """ Merge the data of the current SMET file with the data of another SMET file. Args: other_filename (str): The path to the other SMET file. override (bool, optional): If True, duplicates in the data of the current SMET file are overridden by the data of the other SMET file. If False, only missing values are filld. Defaults to False. Returns: None """ n_header = 0 with open(other_filename, 'r') as f: for line in f: n_header += 1 if "[DATA]" in line: break other = SMETFile(other_filename, read=True, num_header_lines=n_header) self.merge(self, other, override) def read_data(self) ‑> pandas.core.frame.DataFrame-
Expand source code
def read_data(self) -> pd.DataFrame: if not self.meta_data: print( "Something went wrong reading MetaData, in the worst case please contact patrick.leibersperger@slf.ch" ) if self.meta_data.fields: self.data_header = self.meta_data.fields else: print("Unable to read data header from SMET file. Using no header.") data = pd.read_csv( self.filename, delimiter="\s+", skiprows=self.num_header_lines, header=None ) if self.meta_data.nodata: data = data.replace(self.meta_data.nodata, np.nan) if self.data_header: data.columns = self.data_header data["timestamp"] = pd.to_datetime(data["timestamp"], format="ISO8601") return data def read_meta_data(self) ‑> MetaData-
Expand source code
def read_meta_data(self) -> MetaData: meta_data = MetaData() optional_meta_data = OptionalMetaData() with open(self.filename, "r") as f: self.identifier = next(f).strip() for line in f: if "[DATA]" in line: break if "[HEADER]" in line: continue val = line.split("=")[1].strip() if "station_id" in line: meta_data.station_id = val elif "latitude" in line: meta_data.location.latitude = float(val) elif "longitude" in line: meta_data.location.longitude = float(val) elif "altitude" in line: meta_data.location.altitude = float(val) elif "easting" in line: meta_data.location.easting = float(val) elif "northing" in line: meta_data.location.northing = float(val) elif "epsg" in line: meta_data.location.epsg = int(val) elif "nodata" in line: meta_data.nodata = float(val) elif "fields" in line: meta_data.fields = self.__parseFields(val.split()) elif "station_name" in line: optional_meta_data.station_name = val elif "tz" == line.split("=")[0].strip(): optional_meta_data.tz = int(val) elif "slope_angle" in line: optional_meta_data.slope_angle = float(val) elif "slope_azi" in line: optional_meta_data.slope_azi = float(val) elif "creation" in line: optional_meta_data.creation = val elif "source" in line: optional_meta_data.source = val elif "units_offset" in line: optional_meta_data.units_offset = list(map(float, val.split())) elif "units_multiplier" in line: optional_meta_data.units_multiplier = list(map(float, val.split())) elif "comment" in line: optional_meta_data.comment = val elif "acdd" in line: key, value = line.split("=") self.acdd_meta_data.set_attribute(key.strip(), value.strip()) else: self.other_meta_data[line.split("=")[0].strip()] = line.split("=")[1].strip() unknown_metadata = [key for key in self.other_meta_data.keys()] if unknown_metadata: if self.fun: print(bad()) print(empire()) print(f"Unknown metadata: {unknown_metadata}") meta_data.checkValidity(self.fun) self.optional_meta_data = optional_meta_data return meta_data def setData(self, data: pandas.core.frame.DataFrame, colnames: Optional[List[str]] = None) ‑> None-
Sets the data for the SMET object.
Args
data:pd.DataFrame- The data to be set.
colnames:Optional[List[str]], optional- A list of column names to be used as fields in the MetaData. Defaults to None.
Raises
ValueError- If
colnamesis not provided when the first field in the data is "0" or 0.
Returns
None
Expand source code
def setData(self, data: pd.DataFrame, colnames: Optional[List[str]] = None) -> None: """ Sets the data for the SMET object. Args: data (pd.DataFrame): The data to be set. colnames (Optional[List[str]], optional): A list of column names to be used as fields in the MetaData. Defaults to None. Raises: ValueError: If `colnames` is not provided when the first field in the data is "0" or 0. Returns: None """ self.data = data fields = data.columns.to_list() if fields[0] == "0" or fields[0] == 0: if not colnames: raise ValueError("Please provide a meaningful list of fields in the MetaData") else: print("Using fields: ", colnames) self.meta_data.fields = colnames else: print("Using fields: ", fields) self.meta_data.fields = fields def setIdentifier(self, version=1.1) ‑> None-
Set the identifier for the SMET file.
The identifier is the first line in a SMET file and is used to identify the file format. It is generated based on the provided version.
Args
version:float, optional- The version number of the SMET file. Default is 1.1.
Returns
None
Expand source code
def setIdentifier(self, version=1.1) -> None: """Set the identifier for the SMET file. The identifier is the first line in a SMET file and is used to identify the file format. It is generated based on the provided version. Args: version (float, optional): The version number of the SMET file. Default is 1.1. Returns: None """ self.num_header_lines += 1 self.identifier = f"SMET {version} ASCII" def setMetaData(self, key: str, value)-
Sets the metadata attribute with the given key to the specified value. If the metadata attribute does not exist, it is added to the other_meta_data dictionary.
Args
key:str- The key of the metadata attribute.
value- The value to set for the metadata attribute.
Necessary Metadata: station_id: str location: Location nodata: float fields: List[str]
Supported ACDD Keys: title summary keywords conventions id naming_authority source history comment date_created creator_name creator_url creator_email institution processing_level project geospatial_bounds geospatial_lat_min geospatial_lat_max geospatial_lon_min geospatial_lon_max geospatial_vertical_min geospatial_vertical_max time_coverage_start time_coverage_end Wigos ID
Returns
None
Expand source code
def setMetaData(self, key: str, value): """ Sets the metadata attribute with the given key to the specified value. If the metadata attribute does not exist, it is added to the other_meta_data dictionary. Args: key (str): The key of the metadata attribute. value: The value to set for the metadata attribute. Necessary Metadata: station_id: str location: Location nodata: float fields: List[str] Supported ACDD Keys: title summary keywords conventions id naming_authority source history comment date_created creator_name creator_url creator_email institution processing_level project geospatial_bounds geospatial_lat_min geospatial_lat_max geospatial_lon_min geospatial_lon_max geospatial_vertical_min geospatial_vertical_max time_coverage_start time_coverage_end Wigos ID Returns: None """ if hasattr(self.meta_data, key): setattr(self.meta_data, key, value) elif "acdd" in key or (key in self.acdd_meta_data.attributes.keys()): print(f"Adding {key} to acdd_meta_data.") self.acdd_meta_data.set_attribute(key, value) elif hasattr(self.optional_meta_data, key): setattr(self.optional_meta_data, key, value) else: print( f"MetaData does not have attribute {key}. Adding to other_meta_data." ) self.other_meta_data[key] = value self.num_header_lines += 1 def toDf(self) ‑> pandas.core.frame.DataFrame-
Return the data as a pandas DataFrame.
This method returns the data stored in the instance as a pandas DataFrame.
Returns
pd.DataFrame- The data as a pandas DataFrame.
Examples
>>> instance = pysmet.read("path/to/file.smet") >>> df = instance.toDf()Expand source code
def toDf(self) -> pd.DataFrame: """Return the data as a pandas DataFrame. This method returns the data stored in the instance as a pandas DataFrame. Returns: pd.DataFrame: The data as a pandas DataFrame. Examples: >>> instance = pysmet.read("path/to/file.smet") >>> df = instance.toDf() """ return self.data def toNumpy(self) ‑> numpy.ndarray-
Return a Numpy array of the data.
This method uses the pandas DataFrame's to_numpy() method to convert the DataFrame to a Numpy array.
Returns
np.ndarray- The data as a Numpy array.
Examples
>>> instance = pysmet.read("path/to/file.smet") >>> array = instance.toNumpy()Expand source code
def toNumpy(self) -> np.ndarray: """Return a Numpy array of the data. This method uses the pandas DataFrame's to_numpy() method to convert the DataFrame to a Numpy array. Returns: np.ndarray: The data as a Numpy array. Examples: >>> instance = pysmet.read("path/to/file.smet") >>> array = instance.toNumpy() """ return self.data.to_numpy() def toXarray(self, time_name: str = 'time') ‑> xarray.core.dataarray.DataArray-
Return the data as an xarray Dataset.
This method returns the data stored in the instance as an xarray Dataset.
Returns
xr.Dataset- The data as an xarray Dataset.
Examples
>>> instance = pysmet.read("path/to/file.smet") >>> ds = instance.toXarray()Expand source code
def toXarray(self, time_name: str = "time") -> xr.DataArray: """Return the data as an xarray Dataset. This method returns the data stored in the instance as an xarray Dataset. Returns: xr.Dataset: The data as an xarray Dataset. Examples: >>> instance = pysmet.read("path/to/file.smet") >>> ds = instance.toXarray() """ df = self.data.copy() sp_da = xr.DataArray( df.drop(columns='timestamp').values, # Data values dims=[time_name, 'variables'], # Names of dimensions coords={time_name: df['timestamp'].values, # Set 'timestamp' as coordinate 'variables': df.drop(columns='timestamp').columns} # Column names as coordinate ) return sp_da def write(self, output_filename: str = None)-
Writes the SMET file to disk.
This method writes the SMET file , with the given metadata. If the 'timestamp' column exists in the data, it is formatted as an ISO 8601 string.
Args
output_filename:str, optional- The path to the output file. If not provided, the original filename is used.
Returns
None
Expand source code
def write(self, output_filename: str = None): """Writes the SMET file to disk. This method writes the SMET file , with the given metadata. If the 'timestamp' column exists in the data, it is formatted as an ISO 8601 string. Args: output_filename (str, optional): The path to the output file. If not provided, the original filename is used. Returns: None """ output_filename = output_filename if output_filename else self.filename # Check if fields in MetaData match columns in data if self.meta_data.fields != self.data.columns.to_list(): print("Fields in MetaData do not match columns in data. Using data columns.") print("MetaData fields: ", self.meta_data.fields) print("Data columns: ", self.data.columns.to_list()) self.meta_data.fields = self.data.columns.to_list() self.meta_data.checkValidity() out_data = self.data.copy() if self.meta_data.nodata: out_data = out_data.fillna(self.meta_data.nodata) if "timestamp" in out_data.columns: if pd.api.types.is_datetime64_any_dtype(out_data["timestamp"].dtype): out_data["timestamp"] = out_data["timestamp"].apply(lambda x: x.isoformat()) elif pd.api.types.is_string_dtype(out_data["timestamp"].dtype): out_data["timestamp"] = pd.to_datetime(out_data["timestamp"], errors='coerce').apply( lambda x: x.isoformat() if pd.notnull(x) else 'Invalid timestamp' ) if out_data["timestamp"].str.contains('Invalid timestamp').any(): print("Some timestamps could not be converted to datetime.") else: print("The 'timestamp' column is neither in string nor datetime format.") elif pd.api.types.is_datetime64_any_dtype(out_data.index): if "timestamp" in out_data.columns: print("Warning: 'timestamp' column exists, and the index is a datetime. Using 'timestamp' column. (Index will be ignored)") else: print("Using datetime index as 'timestamp' column.") out_data["timestamp"] = out_data.index.to_series().apply(lambda x: x.isoformat()) else: print("Info: You are not using any timestamp information.") with open(output_filename, "w") as f: # Write identifier f.write(self.identifier + "\n") # Write [HEADER] f.write("[HEADER]\n") # Write metadata for key, value in self.meta_data.combined_dict.items(): f.write(f"{key} = {value}\n") # Write optional metadata if it exists if self.optional_meta_data: for key, value in self.optional_meta_data.adjusted_dict.items(): f.write(f"{key} = {value}\n") # Write ACDD metadata if it exists if self.acdd_meta_data: for key, value in self.acdd_meta_data.adjusted_dict.items(): f.write(f"{key} = {value}\n") # Write any other metadata if self.other_meta_data: for key, value in self.other_meta_data.items(): f.write(f"{key} = {value}\n") # Write [DATA] f.write("[DATA]\n") # Check if "timestamp" is in columns timestamp_exists = "timestamp" in self.data.columns # Write data for row in out_data.itertuples(index=False): if timestamp_exists: f.write( "\t".join( ( f"{str(item):<20}" if field == "timestamp" else f"{str(item):<10}" ) for field, item in zip(self.data.columns, row) ) + "\n" ) else: f.write("\t".join(f"{str(item):<20}" for item in row) + "\n")