Commit 8916f146 authored by Markus Gaug's avatar Markus Gaug
Browse files

Markus updates

parent 0a3a90e7
Pipeline #6762 failed with stages
in 4 minutes and 4 seconds
{ "NUMBER_CHANNELS_ZERO": 10 ,
"ALL_CHANNELS_ZERO": 11 ,
"ALL_PC_CHANNELS_ZERO": 12 ,
"CHANNEL_NOT_CONNECTED": 13 ,
"INEFFICIENT_PC_CHANNEL": 14 ,
"OSCILLATING_CHANNEL": 15
}
def savitzky_golay(y, window_size, order, deriv=0, rate=1):
"""Smooth (and optionally differentiate) data with a Savitzky-Golay filter.
The Savitzky-Golay filter removes high frequency noise from data.
It has the advantage of preserving the original shape and
features of the signal better than other types of filtering
approaches, such as moving averages techniques.
Parameters
----------
y : array_like, shape (N,)
the values of the time history of the signal.
window_size : int
the length of the window. Must be an odd integer number.
order : int
the order of the polynomial used in the filtering.
Must be less then `window_size` - 1.
deriv: int
the order of the derivative to compute (default = 0 means only smoothing)
Returns
-------
ys : ndarray, shape (N)
the smoothed signal (or it's n-th derivative).
Notes
-----
The Savitzky-Golay is a type of low-pass filter, particularly
suited for smoothing noisy data. The main idea behind this
approach is to make for each point a least-square fit with a
polynomial of high order over a odd-sized window centered at
the point.
Examples
--------
t = np.linspace(-4, 4, 500)
y = np.exp( -t**2 ) + np.random.normal(0, 0.05, t.shape)
ysg = savitzky_golay(y, window_size=31, order=4)
import matplotlib.pyplot as plt
plt.plot(t, y, label='Noisy signal')
plt.plot(t, np.exp(-t**2), 'k', lw=1.5, label='Original signal')
plt.plot(t, ysg, 'r', label='Filtered signal')
plt.legend()
plt.show()
References
----------
.. [1] A. Savitzky, M. J. E. Golay, Smoothing and Differentiation of
Data by Simplified Least Squares Procedures. Analytical
Chemistry, 1964, 36 (8), pp 1627-1639.
.. [2] Numerical Recipes 3rd Edition: The Art of Scientific Computing
W.H. Press, S.A. Teukolsky, W.T. Vetterling, B.P. Flannery
Cambridge University Press ISBN-13: 9780521880688
"""
import numpy as np
from math import factorial
try:
window_size = np.abs(np.int(window_size))
order = np.abs(np.int(order))
except ValueError, msg:
raise ValueError("window_size and order have to be of type int")
if window_size % 2 != 1 or window_size < 1:
raise TypeError("window_size size must be a positive odd number")
if window_size < order + 2:
raise TypeError("window_size is too small for the polynomials order")
order_range = range(order+1)
half_window = (window_size -1) // 2
# precompute coefficients
b = np.mat([[k**i for i in order_range] for k in range(-half_window, half_window+1)])
m = np.linalg.pinv(b).A[deriv] * rate**deriv * factorial(deriv)
# pad the signal at the extremes with
# values taken from the signal itself
firstvals = y[0] - np.abs( y[1:half_window+1][::-1] - y[0] )
lastvals = y[-1] + np.abs(y[-half_window-1:-1][::-1] - y[-1])
y = np.concatenate((firstvals, y, lastvals))
return np.convolve( m[::-1], y, mode='valid')
......@@ -4,7 +4,8 @@ import matplotlib.pyplot as plt
from datetime import datetime
import struct
import sys
from scipy.optimize import curve_fit
from scipy.stats import trimboth
class LICEL:
def __init__(self, fname):
......@@ -92,11 +93,13 @@ class LICEL:
recorders used to make the measurement.
"""
h = list(map(int, header_str.split()))
assert len(h) == 5
# assert len(h) == 5
self.laser1 = {'shots': h[0], 'freq': h[1]}
self.laser2 = {'shots': h[2], 'freq': h[3]}
self.num_channels = h[4]
def print_header(self):
"""
Prints the header information contained in the first three lines
......@@ -118,6 +121,10 @@ class LICEL:
print("Latitude:", self.latitude)
print("Zenith:", self.zenith_ang)
def get_day(self):
return start_datetime.day
def _read_channel_headers(self, fin):
"""
Read the header line corresponding to each channel.
......@@ -133,11 +140,13 @@ class LICEL:
assert empty_line.isspace()
return channel_headers
class LICELChannel:
LICEL_SAMPLING_FREQ = 20 # MHz
_lidar_type_dict = {'1': 'Elastic', '2': 'Raman'}
_polarization_dict = {'o': 'None', 'p': 'Parallel', 's': 'Perpendicular'}
_dataset_dict = {'BT': 'Analog', 'BC': 'Photon Counting'}
_dataset_dict = {'BC': 'Analog', 'BT': 'Photon Counting'}
def __init__(self, header_str, fstream):
"""
......@@ -152,6 +161,18 @@ class LICEL:
self._parse_header(header_str)
self._read_data(fstream)
"""
Defaults:
---------
offset_range : Default offset range (first 3 bins)
bg_range: Default background range (last 500 bins)
"""
self.offset_range = 6
self.bg_range = 500
if self.analog is False:
self._apply_pulse_pileup_correction()
def _parse_header(self, header_str):
"""
Parses a line of LICEL channel header text.
......@@ -182,6 +203,7 @@ class LICEL:
* PMT high-voltage field is only used when using a LICEL PMT power supply.
* Device identifier (BT/BC) is equivalent to data set type flag (analog/photon counting).
"""
header = header_str.split()
assert 16 == len(header)
self.active = bool(int(header[0]))
......@@ -200,10 +222,12 @@ class LICEL:
self.num_shots = int(header[13])
self.adc_discrim = float(header[14])
self.dataset_type = self._dataset_dict[header[15][:2]]
if self.dataset_type == 'Analog':
assert self.analog is True
elif self.dataset_type == 'Photon Counting':
assert self.analog is False
#if self.dataset_type == 'Analog':
# assert self.analog is True
#self.analog = True
#elif self.dataset_type == 'Photon Counting':
# assert self.analog is False
#self.analog = False
self.address = int(header[15][-1], base=16)
def print_header(self):
......@@ -237,19 +261,243 @@ class LICEL:
"""
if self.active is False:
return
data_bytes = fin.read(4*self.num_data) # 4-byte integers
raw_data = struct.unpack(str(self.num_data) + 'i', data_bytes)
data_bytes = fin.read(4*(self.num_data)) # 4-byte integers
raw_data = struct.unpack(str(self.num_data) + 'I', data_bytes)
empty_line = fin.readline() # read <CRLF> after data set
assert empty_line.isspace()
if (not empty_line.isspace()):
print ('EMPTY LINE EXCEPTED, instead got: ',empty_line)
print ('num_data is: ',self.num_data)
#assert empty_line.isspace()
# convert to physical units
if self.analog:
dScale = self.num_shots*(2**self.adc_bits)/(self.adc_discrim*1000)
# converts raw ADC bits to mV
dScale = self.num_shots*(2**self.adc_bits)/(self.adc_discrim*1000)
else:
# converts raw ADC bits to MHz
dScale = self.num_shots / self.LICEL_SAMPLING_FREQ
#dScale = self.num_shots # convert to sampling rate per 7.5m bin
self.data = np.array(raw_data) / dScale
def plot_data(self, raw=False):
def _apply_pulse_pileup_correction(self,deadtime=0.0062):
"""
Applies the deadtime correction according to the Licel manual "Analog + Photon Counting" from B. Mielke
or: R. K. Newsom, D. D. Turner, B. Mielke, M. Clayton, R. Ferrare, and C. Sivaraman.
Simultaneous analog and photon counting detection for Raman lidar.
Appl. Opt., 48(20):3903–3914, Jul 2009.
Eq.
deadtime: must be in micro-seconds! Default: 0.0062 mus for the Hamamatsu R11920-100 with 1500 V cathode voltage
Data (in physical units) are stored in a numpy array (self.data).
"""
self.data = np.where(self.data * deadtime < 1,self.data/(1-self.data*deadtime),self.data)
def _apply_rebinning(self,x,y,offset,offseterr,rebin_start, rebin_thres, rebin_seq, rebin_fac, re_scale=1):
"""
Return the offset-corrected and re-binned data
Arguments:
* x: Range
* y: Offset-corrected raw data
* offset: Previously applied offset (needed for calculation of uncertainties)
* offseterr: Stat. uncertainty of offset (needed for calculation of uncertainties)
* rebin_start: Array index up to which no rebinning is applied
* rebin_thres: If not zero, applies an additional dynamic rebinning,
for data below the given threshold (in pe).
* rebin_seq: If rebin_thres>0, apply rebinning by increasing bin every rebin_seq bins
* rebin_fac: If rebin_thres>0, multiply binsize with rebin_fac rebin_seq bins, otherwise collapse all 'rebin_fac' bins into one
"""
xx = np.zeros(len(x))
yy = np.zeros(len(y))
yyerr = np.zeros(len(y))
scale = self.num_shots * re_scale
xx[:rebin_start] = x[:rebin_start]
yy[:rebin_start] = y[:rebin_start]
yyerr[:rebin_start] = np.sqrt((y[:rebin_start]+offset)/scale + offseterr**2)
ii = rebin_start
cnt = rebin_start
binsize = rebin_fac
pure_rebin = (rebin_seq >= len(y))
current_bins = 0
current_seq = 0
summ_t = 0
summ_v = 0
while (ii < len(x)):
time = x[ii]
cont = y[ii]
ii = ii + 1
if ((not pure_rebin) and (cont / re_scale > rebin_thres)):
#print ('time =',time, ' cont= ',cont, 'scale=',scale, 'cont/re_scale=',cont/re_scale, 'rebin_thres=',rebin_thres)
if (current_bins != 0):
xx[cnt] = summ_t / (current_bins)
yy[cnt] = summ_v / (current_bins)
yyerr[cnt] = np.sqrt(np.maximum(0.5,(summ_v + current_bins*offset) * scale)/current_bins**2/scale**3 + current_bins * offseterr**2)
#print ('1 binsize=',binsize, ' cnt=',cnt, ' xx=',xx[cnt],' summ_t=',summ_t,' yy=',yy[cnt],', summ_v=',summ_v)
if (current_seq == rebin_seq):
binsize = binsize * rebin_fac
current_seq = 0
# reset sums
summ_t = 0
summ_v = 0
current_bins = 0
cnt = cnt + 1
current_seq = current_seq + 1
xx[cnt] = time
yy[cnt] = cont
yyerr[cnt] = np.sqrt(np.maximum(0.5,(cont+offset) * scale)/scale**3 + offseterr**2)
cnt = cnt + 1
continue
summ_t = summ_t + time
summ_v = summ_v + cont
current_bins = current_bins + 1
if (current_bins == binsize):
xx[cnt] = summ_t / binsize
yy[cnt] = summ_v / binsize
yyerr[cnt] = np.sqrt(np.maximum(0.5,(summ_v + binsize*offset) * scale)/binsize**2/scale**3 + binsize * offseterr**2)
#print ('2 binsize=',binsize, ' cnt=',cnt, ' xx=',xx[cnt],' summ_t=',summ_t,' yy=',yy[cnt],', summ_v=',summ_v)
if (current_seq == rebin_seq):
binsize = binsize * rebin_fac
current_seq = 0
# reset sums
summ_t = 0
summ_v = 0
current_bins = 0
cnt = cnt + 1
current_seq = current_seq + 1
return (np.resize(xx,cnt), np.resize(yy,cnt), np.resize(yyerr,cnt))
def _mad(self,arr):
""" Median Absolute Deviation: a "Robust" version of standard deviation.
Indices variabililty of the sample.
https://en.wikipedia.org/wiki/Median_absolute_deviation
"""
arr = np.ma.array(arr).compressed() # should be faster to not use masked arrays.
med = np.median(arr)
return np.median(np.abs(arr - med))
def get_data(self,rebin=6,bg_range=500,rebin_thres=0,rebin_seq=10,rebin_fac=2, verbose=False):
"""
Return the offset-corrected and re-binned data
Arguments:
* rebin: Number of bins collapsed into one
(default=6 which yields 7.5m*6 = 45m bin width)
* bg_range: Number of bins to calculate the background (counted from the end)
(default=500 which corresponds to 7.5m*500 = 4.5 km, out of 112 km)
* rebin_thres: If not zero, applies an additional dynamic rebinning,
for data below the given threshold (in pe).
* rebin_seq: If rebin_thres>0, apply rebinning by increasing bin every rebin_seq bins
* rebin_fac: If rebin_thres>0, multiply binsize with rebin_fac rebin_seq bins
"""
if self.active is False:
print("No data returned: channel %i is not active." % self.address)
return
offset = 0
offset_err = 0
if (bg_range > 0):
off = trimboth(self.data[-bg_range:-2],0.05) # use the trimmed array here for more robust results on mean and std
offset = np.mean(off)
#offset = np.mean(self.data[-bg_range:])
offset_err = np.std (off) / np.sqrt(off.size)
if (verbose):
print ('offset=',offset,' +- ',offset_err)
elif (bg_range < 0):
off = trimboth(self.data[0:-bg_range],0.05) # use the trimmed array here for more robust results on mean and std
offset = np.mean(off)
#offset = np.mean(self.data[-bg_range:])
offset_err = np.std (off) / np.sqrt(off.size)
if (verbose):
print ('offset=',offset,' +- ',offset_err)
x = self.bin_width * np.arange(self.num_data) # correct for wrong estimate of Sampling frequency!
y = np.zeros(self.num_data)
if (self.offset_range == 0):
y = self.data - offset
elif (self.offset_range < 0):
y[-self.offset_range:] = self.data[:self.offset_range] - offset
else:
y[:-self.offset_range] = self.data[self.offset_range:] - offset
yerr = np.sqrt(np.maximum(0.5,y * self.num_shots)/self.num_shots**2 + offset_err**2)
if (rebin>0):
# rebinning for MAGIC style
if (verbose):
print ('collapse ',rebin,' time slices into one')
(x,y,yerr) = self._apply_rebinning(x,y,offset,offset_err,0,0.,len(y),rebin, 1.)
offset = offset * rebin
offset_err = offset_err * np.sqrt(rebin)
if (verbose):
print ('new offset=',offset,' +- ',offset_err)
# optional additional general rebinning
rebin_start = 0 # bin number
#rebin_thres = 4 # threshold in p.e.
#rebin_seq = 10 # increase re-binning every 50 bins
#rebin_fac = 2 # multiply binsize by 3 eveny rebin_seq
#print (y,yerr,rebin_thres)
if (rebin_thres>0):
if (verbose):
print ('Starting with bin',rebin_start,'and if orig. size<',rebin_thres,' MHz, bin size will increase by',rebin_fac,'every',rebin_seq,'slices')
(x,y,yerr) = self._apply_rebinning(x,y,offset,offset_err,rebin_start,rebin_thres,rebin_seq,rebin_fac,rebin)
#print (y,yerr)
return (x,y,yerr,offset,offset_err)
def print_data(self,raw=False, errs=False, offsets=False):
"""
Print the data for this channel.
"""
if self.active is False:
print("Print skipped: channel %i is not active." % self.address)
return
if (raw):
x = self.bin_width * np.arange(self.num_data-self.offset_range) # correct for wrong estimate of Sampling frequency!
y = self.data[self.offset_range:]
for (id,xx) in enumerate(x):
print ('Range= ', x[id], ' Signal= ',y[id])
return
(x,y,yerr,offset,offset_err) = self.get_data()
print ('Ranges:' , x)
print ('Signal:' , y)
if (offsets):
print ('Offset: ', offset, ' +- ', offset_err)
if (errs):
print ('Errs:', yerr)
def plot_data(self, raw=False, errs=False):
"""
Plots the data for this channel.
......@@ -261,20 +509,48 @@ class LICEL:
if self.active is False:
print("Plot skipped: channel %i is not active." % self.address)
return
x = self.bin_width*np.arange(self.num_data)
y = self.data
if not raw:
offset = np.mean(y[:6])
y = (y - offset)*x*x
y = np.where(y > 0, y, np.ones(len(y)))
plt.semilogy(x, y)
(x,y,yerr,offset,offset_err) = self.get_data()
#print (xx,yy,cnt)
yyr2 = y*x*x
yyerrr2 = yerr*x*x
yyr2 = np.where(yyr2 > 0, yyr2, np.ones(len(yyr2)))
yyr2 = np.log(yyr2)
yyerr_up = np.abs(yerr/y)
if raw:
if (errs):
plt.errorbar(x, y, yerr=[y-np.maximum(1.e-5,y-yerr),yerr])
else:
plt.plot(x, y)
plt.yscale('log')
else:
#y = y*x*x
#y = np.where(y > 0, y, np.ones(len(y)))
#plt.semilogy(x, y)
if (errs):
plt.errorbar(x, yyr2,yerr=[yyr2-np.maximum(1., yyr2 - yyerr_up), yyerr_up])
else:
plt.plot(x, yyr2)
plt.xlabel("Range [m]")
if raw:
plt.ylabel("Signal [phe/bin]")
else:
plt.ylabel(u"Signal [phe/bin] x R\u00B2") # \u00B2 is unicode for superscript 2
title_info = (self.fname, self.address, self.dataset_type)
plt.title("%s - Channel %i %s" % title_info)
plt.ylabel(u"ln(Signal [phe/bin] x R\u00B2)") # \u00B2 is unicode for superscript 2
ids = np.where((x >= 1500.) & (x <= 10000.))
func = lambda x, a, b: a -b * x
a, pcov = curve_fit(func, x[ids], yyr2[ids],p0=[12,1.23e-4]);
plt.plot(x[ids], func(x[ids],*a), label='y = {:.1f}-{:.3g}*R'.format(a[0],a[1]));
plt.legend()
title_info = (self.fname, self.wavelength_nm, self.num_shots)
plt.title("%s - %i nm - %i shots @ 10 Hz" % title_info)
plt.xlim(-100.,8100.)
plt.grid()
plt.show()
......
from __future__ import print_function, division
import numpy as np
from licel_reader import LICEL
from scipy.optimize import minimize
from scipy.signal import savgol_filter
from scipy.integrate import cumtrapz
from matplotlib import pyplot as plt
# alpha_0's in units of m^-1
alpha_dict = { 355 : 7.0101e-5 ,
387 : 4.88111e-5,
532 : 1.3128e-5,
607 : 7.66815e-6 }
LR_mol = 8.*np.pi/3.
def dynamic_rebinning(lic_channel,x,y,offset,offseterr,pc_thres=9.,rebin_start=0,verbose=False):
"""
Return the offset-corrected and re-binned data
Arguments:
* pc_thres: Minimum number of p.e. in a bins
(determines the bin width)
* rebin_start: If rebin_start>0, apply rebinning after the array index 'rebin_start'
"""
assert (x.shape == y.shape)
xx = np.zeros(len(x))
yy = np.zeros(len(y))
yyerr = np.zeros(len(y))
b_idx = np.zeros(len(y))
bin_widths = np.zeros(len(y))
scale = lic_channel.num_shots #* re_scale
xx[:rebin_start] = x[:rebin_start]
yy[:rebin_start] = y[:rebin_start]
yyerr[:rebin_start] = np.sqrt((y[:rebin_start]+offset)/scale + offseterr**2)
b_idx[:rebin_start] = np.arange(rebin_start)
bin_widths[:rebin_start] = np.ones(rebin_start) * lic_channel.bin_width
ii = rebin_start
cnt = rebin_start
current_bins = 1
summ_t = 0
summ_v = 0
while (ii < len(x)):
time = x[ii]
cont = y[ii]
b_idx[ii] = cnt
summ_t = summ_t + time
summ_v = summ_v + cont
#print ('ii=', ii, 'cnt=', cnt, ' cont=', cont, ' summ_v=', summ_v, ' time=', time)
if (summ_v > pc_thres):
xx[cnt] = summ_t / (current_bins)
yy[cnt] = summ_v / (current_bins)
bin_widths[cnt] = lic_channel.bin_width * current_bins
if (verbose):
print ('time =',xx[cnt], ' cont= ',yy[cnt], 'pc_thres=',pc_thres, ' ii=', ii, ' cnt=', cnt)
yyerr[cnt] = np.sqrt(np.maximum(0.5,(summ_v + current_bins*offset))/current_bins**2 + current_bins * offseterr**2)
#print ('1 binsize=',binsize, ' cnt=',cnt, ' xx=',xx[cnt],' summ_t=',summ_t,' yy=',yy[cnt],', summ_v=',summ_v)
# reset sums
summ_t = 0.
summ_v = 0.
current_bins = 0
cnt = cnt + 1
ii = ii + 1
current_bins = current_bins + 1
return (np.resize(xx,cnt), np.resize(yy,cnt), np.resize(yyerr,cnt), b_idx, np.resize(bin_widths,cnt))
def impose_rebinning(y,b_idx,offset,offseterr,verbose=False):
"""
Impose a re-binning to y
Arguments:
* y: Array of data before re-binning
* b_idx: Array of indices to fall into a same bin
* offset:
"""
assert (y.shape == b_idx.shape)
yy = np.zeros(len(y))
yyerr = np.zeros(len(y))
cnt = 0
summ_v = 0
current_bins = 1
for (ii,cont) in enumerate(y):
if (ii < (len(y)-1) and b_idx[ii+1] == cnt):
summ_v = summ_v + cont
if (verbose):
print ('ii=',ii,' cnt=', cnt, ' b_idx[ii]=',b_idx[ii], ' summ_v=', summ_v, ' cont=',cont)
current_bins = current_bins + 1
continue
summ_v = summ_v + cont
yy[cnt] = summ_v / current_bins
if (verbose):
print ('cnt=', cnt, ' cont=', cont,' yy[cnt]=',yy[cnt])
yyerr[cnt] = np.sqrt(np.maximum(0.5,(summ_v + current_bins*offset))/current_bins**2 + current_bins * offseterr**2)
# reset counters and sums
summ_v = 0
current_bins = 1
cnt = cnt + 1
if (verbose):
print ('CHECK: ', len(yy), cnt-1)
# take care of the last (not yet rebinned) bin:
if (b_idx[len(y)-1] == b_idx[len(y)-2]):
cnt = cnt - 1