Commit 0a3a90e7 authored by Markus Gaug's avatar Markus Gaug
Browse files

Ferrans updates

parent 6af19899
Pipeline #6760 failed with stages
in 4 minutes and 23 seconds
# Ignore compiled python files
*.pyc
*.zip
# Ignore emacs autosave files
*~
......
from abc import ABC, abstractmethod
class AtmosphericDataReader(ABC):
"""
Abstract class for all the possible data file type to be read from
Attributes
---------
name : str
General name of the file
filename: str
Real name of the file to read from
pressure_levels: int
Number of pressure levels per block of data in the file
atmospheric_data: dict
Contains all the data from the read file
keys: datetime.datetime
values: tuple of header info and block info
Constants
---------
(Every subclass will need to have this information at least, even if they are full of zeros)
HEADER_NAMES: list of strings
Contains the column names for the headers data
BLOCK_NAMES: list of strings
Contains the column names for the hour block data
"""
HEADER_NAMES = [
"press_at_surface", "P", # P is here Pressure reduced to mean sea level
"precipitation", # Accumulated precipitation (6 h accumulation) in m
"moment_u", "moment_v", # u,v-component of momentum flux (3- or 6-h average)
"heat_flux", "radiation_flux", # Sensible heat net flux at surface, Downward short wave radiation flux (3- or 6-h average)
"rh", # Relative Humidity at 2m AGL
"u", "v", # U,V-component of wind at 10 m AGL
"T", # Temperature at 2m AGL
"cloud_cover", # Total cloud cover (3- or 6-h average)
"alt", # Geopotential height (geopotential meters)
]
BLOCK_NAMES = [
"geopotential_height", # Geopotential height (geopotential meters)
"T_average", # Temperature (K)
"u", "v", # U,V-component of wind with respect to grid (m/s)
"w", # Pressure vertical velocity
"rh", # Relative humidity
]
name : str
filenames: str
pressure_levels: int
atmospheric_data: dict
@abstractmethod
def _read_data(self) -> None:
pass
\ No newline at end of file
This diff is collapsed.
import datetime
import pandas as pd
from atmospheric_data_reader import AtmosphericDataReader
class ECWMFReaderError(Exception):
"""Helper class to handle ECWMFReader thrown exceptions"""
pass
class ECWMFReader(AtmosphericDataReader):
"""
Custom ECWMF format parser.
Attributes
---------
pressure_levels: int
Number of pressure levels of data for an hour block
atmospheric_data: dict
key: datetime
date Y/M/D/H
value: pandas dataframe (37x5)
columns = Plevel, T_average, geopotential_average, height, n\n
"""
def __init__(self, filenames: list, pressure_levels: int=37) -> None:
"""Instantiate class attributes and make call to read GDAS file"""
if not isinstance(filenames, list):
raise ECWMFReaderError("Pass a list of filenames to open")
self.name = "ecwmf"
self.filenames = filenames
self.pressure_levels = pressure_levels
self.atmospheric_data = {}
self._read_data()
def _read_data(self) -> None:
"""Reads data from the ECWMF file and saves it to a dictionary"""
for filename in self.filenames:
#Read GDAS file using pandas
try:
with open(filename, 'r+') as f:
first_line = f.readline().replace(',', '').split(' ')
if '#' in first_line:
first_line = first_line[1:]
df = pd.read_csv(filename, skiprows=(1), names=first_line, delimiter='\s+')
except Exception as e:
raise ECWMFReaderError(e)
for i in range(0, len(df), self.pressure_levels):
#Parse 1 to 5 columns numbers as datetime
date = datetime.datetime.strptime(
"/".join(df.iloc[i, 1:5].astype('uint32').to_string(index=False).split()),
'%Y/%m/%d/%H')
## create empty headers
header = pd.DataFrame(columns=self.HEADER_NAMES)
## add missing columns to the block
for x in self.BLOCK_NAMES:
if x not in df.columns:
df[x] = 0
## Convert height in m to km
block = df.iloc[i:i+self.pressure_levels, 5:].iloc[::-1]
block['height'] = block['height'].div(1000)
# Saves empty header + block data
# save block data in reverse order so that the height is increasing as rows increase
self.atmospheric_data[date] = (header, block)
\ No newline at end of file
{
"Linear Approx": [
null,
[
5000,
2000
],
[
-Infinity,
Infinity
]
],
"Quadratic Approx": [
[
0,
-0.95,
-0.17
],
[
Infinity,
3000
],
[
-Infinity,
Infinity
]
],
"Cubic Approx": [
[],
[
Infinity,
3000
],
[
-Infinity,
Infinity
]
],
"Quartic Approx": [
[],
[
Infinity,
3000
],
[
-Infinity,
Infinity
]
],
"Quintic Approx": [
[],
[
Infinity,
3000
],
[
-Infinity,
Infinity
]
],
"Quadratic Sine Approx": [
[
0.2,
0.3,
1.0
],
[
21000,
3000
],
[
[
-Infinity,
-Infinity,
-0.45,
-25,
0.2,
-30
],
[
Infinity,
Infinity,
0.40,
35,
2.2,
15
]
]
]
}
\ No newline at end of file
"""
Reads 1-degree binned Global Data Assimilation System (GDAS1) data in
A description of the file format can be found here:
https://www.ready.noaa.gov/gdas1.php
The data can be downloaded from the ARL file server here:
ftp://arlftp.arlhq.noaa.gov/pub/archives/gdas1/
.. moduleauthor:: Scott Griffiths <sgriffiths@ifae.es>
"""
from __future__ import division, print_function
from fortran_style_parser import parse_string
from collections import namedtuple
from datetime import datetime
import numpy as np
import struct
GDASLabelFields = ['year', 'month', 'day', 'hour', 'forecast', 'level', 'grid',
'variable', 'exponent', 'precision', 'value11']
GDASLabelBase = namedtuple('GDASLabel', GDASLabelFields)
class GDASLabel(GDASLabelBase):
"""
A namedtuple to contain data from GDAS record labels.
parameters
----------
year, month, day, hour : int
Greenwich date for which the data are valid.
forecast : int
Hours forecast; zero for analysis, -1 for missing data.
level : int
Level from the surface (level 0) up.
grid : int
Grid identification (should be 99 for GDAS data).
variable : string
Variable label (e.g., INDX, PRSS, TEMP, RELH).
exponent : float
Scaling exponent needed for unpacking.
precision : float
Precision of the unpacked data.
value11 : float
Unpacked data value at grid point (1, 1).
"""
__slots__ = ()
def __repr__(self):
repr_dict = {'year': repr(self.year),
'month': repr(self.month),
'day': repr(self.day),
'hour': repr(self.hour),
'forecast': repr(self.forecast),
'level': repr(self.level),
'grid': repr(self.grid),
'variable': repr(self.variable),
'exponent': repr(self.exponent),
'precision': repr(self.precision),
'value11': repr(self.value11)}
repr_str = 'GDASLabel(year={year}, month={month}, day={day}, ' \
'hour={hour}, forecast={forecast}, level={level}, ' \
'grid={grid}, variable={variable}, exponent={exponent}, ' \
'precision={precision}, value11={value11})'.format(**repr_dict)
return repr_str
GDASHeaderFields = ['data_source', 'forecast_hour', 'forecast_min',
'pole_lat', 'pole_lon', 'ref_lat', 'ref_lon',
'grid_size', 'orientation', 'cone_angle',
'sync_xp', 'sync_yp', 'sync_lat', 'sync_lon', 'reserved',
'nx', 'ny', 'nlevels', 'k_flag', 'index_record_length']
GDASHeaderBase = namedtuple('GDASHeader', GDASHeaderFields)
class GDASHeader(GDASHeaderBase):
"""
A namedtuple to contain data from a GDAS INDX record extended header.
parameters
----------
data_source : string
A four character string that identifies the source of the
meteorological data (e.g., 'GDAS').
forecast_hour, forecast_min : int
Hour and minute time stamp of the forecast.
pole_lat, pole_lon : float
Identifies the pole position of the grid projection. Most
projections will either be defined at +90 or -90 depending upon the
hemisphere. The longitude would be the point 180 degrees from which
the projection is cut.
ref_lat, ref_lon : float
The reference position at which the grid spacing is defined.
grid_size : float
The grid spacing in km at the reference position.
orientation : float
The grid orientation or the longitude of the meridian which is
parallel to the up-down direction of the grid.
cone_angle : float
The angle between the axis and the surface of the cone. For regular
projections it is equal to the latitude at which the grid is tangent
to the earth's surface. A polar stereographic grid would be tangent
at either 90 or -90, while a Mercator projection is tangent at 0
latitude. A Lambert Conformal projection would be in between the two
limits. An oblique stereographic projection would have a cone angle
of 90.
sync_xp, sync_yp : float
Used to equate a position on the grid with a position on the earth
as given in sync_lat and sync_lon. Any position is acceptable. It
need not even be on the grid.
sync_lat, sync_lon : float
Used with sync_xp and sync_yp to equate a position on the grid with
a position on the earth.
reserved : float
Currently unused; reserved for future use.
nx, ny : int
The number of grid points in each direction.
nlevels : int
The number of levels in the vertical, including the surface level.
k_flag : int
An integer number that identifies the vertical coordinate system:
1 = pressure sigma (fraction)
2 = pressure absolute (mb)
3 = terrain sigma (fraction)
4 = hybrid sigma (mb - offset*fraction)
index_record_length : int
Length of the index record (excluding the initial 50-byte label).
"""
__slots__ = ()
def __repr__(self):
repr_dict = {'data_source': repr(self.data_source),
'forecast_hour': repr(self.forecast_hour),
'forecast_min': repr(self.forecast_min),
'pole_lat': repr(self.pole_lat),
'pole_lon': repr(self.pole_lon),
'ref_lat': repr(self.ref_lat),
'ref_lon': repr(self.ref_lon),
'grid_size': repr(self.grid_size),
'orientation': repr(self.orientation),
'cone_angle': repr(self.cone_angle),
'sync_xp': repr(self.sync_xp),
'sync_yp': repr(self.sync_yp),
'sync_lat': repr(self.sync_lat),
'sync_lon': repr(self.sync_lon),
'reserved': repr(self.reserved),
'nx': repr(self.nx),
'ny': repr(self.ny),
'nlevels': repr(self.nlevels),
'k_flag': repr(self.k_flag),
'index_record_length': repr(self.index_record_length)}
repr_str = 'GDASHeader(data_source={data_source}, ' \
'forecast_hour={forecast_hour}, ' \
'forecast_min={forecast_min}, ' \
'pole_lat={pole_lat}, pole_lon={pole_lon}, ' \
'ref_lat={ref_lat}, ref_lon={ref_lon}, ' \
'grid_size={grid_size}, ' \
'orientation={orientation}, ' \
'cone_angle={cone_angle}, ' \
'sync_xp={sync_xp}, sync_yp={sync_yp}, ' \
'sync_lat={sync_lat}, sync_lon={sync_lon}, ' \
'reserved={reserved}, ' \
'nx={nx}, ny={ny}, nlevels={nlevels}, ' \
'k_flag={k_flag}, ' \
'index_record_length={index_record_length}' \
')'.format(**repr_dict)
return repr_str
class GDASReader:
label_length = 50 # length of label in bytes; preceeds records
header_length = 108 # length of INDX record header in bytes
def __init__(self, fname):
self.fname = fname
with open(fname, 'rb') as f:
self._read_index_label(f)
self._read_header(f)
self._read_index_record(f)
self._compute_record_cycle()
def _read_index_label(self, fstream):
"""
Read the INDX file lable, which is contained in the first 50 bytes.
Format example:
17 322 0 0 099INDX 0 0.0000000E+00 0.0000000E+00
"""
label = fstream.read(self.label_length)
#parsed_data need a input string, label is in bytes
label = str(label, "utf-8")
print("input string", label)
parsed_data = parse_string(label, '5I2,4X,A4')
year, month, day, hour, ifc, kvar = parsed_data
if kvar != 'INDX':
raise Exception("Old format GDAS file. Header:\n" + label)
self.date = datetime(year, month, day, hour)
def _read_header(self, fstream):
"""
Read the index record header, which is the first 108 bytes of the index
record.
"""
data = fstream.read(self.header_length)
parsed_data = parse_string(data, 'A4,I3,I2,12F7.0,3I3,I2,I4')
self.header = GDASHeader(*parsed_data)
nxy = self.header.nx*self.header.ny
self.record_length = self.label_length + nxy
def _read_index_record(self, fstream):
"""
Reads the loop section of the index record and stores the results in the
following dictionary fields:
level_height_dict:
Correlates the level number with the "height" of the pressure
surface, measured in hPa. Larger level numbers are at lower
pressures, and therefore occur at heigher altitudes.
level_variable_dict:
Contains the four-letter variables labels for the variables that are
measured at each level.
variable_checksum_dict:
Contains the rotating checksum for each tuple of (level, variable).
"""
self.variable_order_list = [(0, 'INDX')]
self.level_height_dict = {}
self.level_variable_dict = {}
self.variable_checksum_dict = {}
length = self.header.index_record_length - self.header_length
index_record = fstream.read(length)
for level in range(self.header.nlevels):
data = index_record[:8]
index_record = index_record[8:]
height, nvars = parse_string(data, 'F6,I2')
self.level_height_dict[level] = height
self.level_variable_dict[level] = []
for _ in range(nvars):
data = index_record[:8]
index_record = index_record[8:]
variable, checksum = parse_string(data, 'A4,I3,1X')
self.variable_order_list.append((level, variable))
self.level_variable_dict[level].append(variable)
self.variable_checksum_dict[(level, variable)] = checksum
def _compute_record_cycle(self):
self.record_cycle = 0
for level,variables in self.level_variable_dict.iteritems():
self.record_cycle += len(variables)
def read_record(self, rec_number):
with open(self.fname, 'rb') as f:
f.seek(rec_number*self.record_length, 0)
record = f.read(self.record_length)
if not record:
return
record_label = record[:self.label_length]
record_body = record[self.label_length:]
label = self.parse_label(record_label)
if label.variable == 'INDX':
return label
data = self.unpack_record(record_body, label.exponent, label.value11)
# self.rotating_checksum(label, record_body)
return label, data
def iter_record(self, start=0, cycle_length=1):
index = start
while True:
record = self.read_record(index)
if not record:
break
index += cycle_length
yield record
def parse_label(self, label):
"""
Read the record label, which is ASCII text contained in the first 50
bytes of each GDAS record.
See the GDASLabel docstring in gdas_namedtuples.py for details of what
is contained in the label.
Label example:
17 322 0 0 099INDX 0 0.0000000E+00 0.0000000E+00
"""
parsed_data = parse_string(label, '7I2,A4,I4,2E14.7')
return GDASLabel(*parsed_data)
def unpack_record(self, record, exponent, first_value):
"""
Unpacks data contained in a GDAS record and returns a numpy array.
The data is packed according to the formula:
Pi,j = (Ri,j - Ri-1,j)*(2**(7-N))
where Pi,j is the packed data byte, Ri-1,j and Ri,j are unpacked data,
and N is the exponent. The data is unpacked iteratively using
first_value as the initial Ri-1,j.
"""
scale = 2.0**(7 - exponent)
unpacked = np.zeros((self.header.nx, self.header.ny))
unpacked_prev = first_value
index = 0
for j in range(self.header.ny):
for i in range(self.header.nx):
packed = struct.unpack('B', record[index])[0] - 127
unpacked[i, j] = packed/scale + unpacked_prev
unpacked_prev = unpacked[i, j]
index += 1
unpacked_prev = unpacked[0, j]
return unpacked
def rotating_checksum(self, label, record):
checksum = 0
for byte in record:
value = struct.unpack('B', byte)[0]
checksum += value
if checksum >= 256:
checksum -= 255
header_cksum = self.variable_checksum_dict[(label.level, label.variable)]
print("Calculated checksum:", checksum, " From header:", header_cksum)
return checksum
def get_datetimes(self):
cyclen = len(self.variable_order_list)
return [datetime(*x[:4]) for x in self.iter_record(cycle_length=cyclen)]
def get_variable_at_level(self, variable, level):
cycle_start = self.variable_order_list.index((level, variable))
cycle_length = len(self.variable_order_list)
for label,data in self.iter_record(cycle_start, cycle_length):
time = datetime(*label[:4])
# TODO: add ability to get list of variables; return pandas table
if __name__ == "__main__":
gdas = GDASReader("../gdas/gdas1.nov15.w3_magic1")
dt = gdas.date
date_dict = {'year': dt.year, 'month': dt.month, 'day': dt.day, 'hour': dt.hour}
print('Opened file: {year} {month} {day} {hour}'.format(**date_dict))
print('Grid size and lrec:', gdas.header.nx, gdas.header.ny, gdas.record_length)
print('Header record size:', gdas.header.index_record_length)
datetimes = gdas.get_datetimes()
for dt in datetimes:
print(dt.isoformat())
# index = 1
# while True:
# gdas.read_record(index)
# index += 1
# raw_input()
"""
Reads 1-degree binned Global Data Assimilation System (GDAS1) data in
A description of the file format can be found here:
https://www.ready.noaa.gov/gdas1.php
The data can be downloaded from the ARL file server here:
ftp://arlftp.arlhq.noaa.gov/pub/archives/gdas1/
.. moduleauthor:: Scott Griffiths <sgriffiths@ifae.es>
"""
from __future__ import print_function, division
from fortran_style_parser import parse_string
from collections import namedtuple
from datetime import datetime
import numpy as np
import struct
import os
GDASLabelFields = ['year', 'month', 'day', 'hour', 'forecast', 'level', 'grid',
'variable', 'exponent', 'precision', 'value11']
GDASLabelBase = namedtuple('GDASLabel', GDASLabelFields)
class GDASLabel(GDASLabelBase):
"""
A namedtuple to contain data from GDAS record labels.