-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSUNA_read_data.m
184 lines (174 loc) · 6.5 KB
/
SUNA_read_data.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
function [cfg,data,meta] = SUNA_read_data(cfg,file)
% FUNCTION SUNA_read_data
%
% Syntax:
% cfg = SUNA_read_data(cfg)
% "cfg" is a structure containing information about the deployment and
% the instrument.
%
% Description:
% Reads SUNA csv files in the SUNA_L0 directory.
%
% See also:
% User manual Submersible Ultraviolet Nitrate Analyzer SUNA V2, 2017
%
% Authors:
% Brita K Irving <[email protected]>
%% file was passed through
if nargin > 1
read_single_file = 1;
else
read_single_file = 0;
end
%% Define CSV column fieldnames
% from the user manual section 6.1.2.1 "File Types"
cols_spec = strcat('spec',compose('%d',1:256)); % generate fieldnames of spectrum channels
cols_start = {'hdr' 'yyyydoy' 'hr' 'NO3_uM' 'NO3_mgNL' 'abs_254nm' 'abs_350nm' 'bromide_mgL' 'spec_avg' 'dark' 'int_fact'};
cols_end = {'int_temp' 'spec_temp' 'lamp_temp' 'cum_lampon_sec' 'rel_hum' 'volt' 'lamp_volt' 'int_volt' 'current' 'fit_aux1' 'fit_aux2' 'fit_base1' 'fit_base2' 'fit_RMSE'...
'ctd_time' 'ctd_sal' 'ctd_temp' 'ctd_press' 'check_sum'};
cols = [cols_start cols_spec cols_end];
% add units/description
cols_start_units = {'Header and serial number' 'yearDOY' 'hours' 'uM' 'mgN/L' 'nm' 'nm' 'mg/L' 'spectrum average' 'dark value used for fit' 'integration time factor'};
cols_spec_units = strcat('spectrum channel',compose('%d',1:256)); % generate fieldnames of spectrum channels
cols_end_units = {'degC' 'degC' 'degC' 'sec' '%' 'V' 'V' 'V' 'mA' 'fit aux 1' 'fit aux 2' 'fit base 1' 'fit base 2' 'fit RMSE' 'sec' 'PSU' 'degC' 'dbar' 'nodim'};
cols_units = [cols_start_units cols_spec_units cols_end_units];
%% Check for data directory existance
if read_single_file == 0
raw_dir = fullfile(cfg.datadir,'SUNA_L0');
if ~exist(raw_dir,'dir')
fprintf('Directory not found: %s\n',raw_dir)
keyboard
end
%% Get mooring deployment date in yyyyDDD format (year day-of-year)
if isfield(cfg,'mooring')
deploydate = datetime( cfg.mooring.deploydate,'Format','yyyyDDD');
else % default to today
deploydate = datetime('now','Format','yyyyDDD');
end
if isfield(cfg,'mooring') && isfield(cfg.mooring,'recoverdate')
recoverdate = datetime( cfg.mooring.recoverdate,'Format','yyyyDDD');
else % default to today
recoverdate = datetime('now','Format','yyyyDDD');
end
% convert to numeric value instead of datetime
deploydate = str2double(char(deploydate));
recoverdate = str2double(char(recoverdate));
%% Read all raw files
files = dir(fullfile(raw_dir,'*.CSV'));
% remove files with erroneous names
ignore_files = contains({files.name},{'..' '._'});
files(ignore_files) = [];
files([files.bytes] == 0) = []; % throw out empty files
% skip files containing data before deployment
yyyyDDD = str2double(erase({files.name},{'D' '.CSV'}));
files(yyyyDDD < deploydate | yyyyDDD > recoverdate) = [];
else
files = struct();
for nf = 1:size(file,1)
[folder,name,ext] = fileparts(file(nf,:));
files(nf).folder = folder;
files(nf).name = [name ext];
end
end
%% Initialize meta structure
% stores filenames, pertinent information
meta = struct();
% add column names and units to meta
for nc = 1:numel(cols)
meta.(cols{nc}).units = cols_units{nc};
end
% save files to meta structure
meta.csv_filenames = {files.name}';
for nf = 1:numel(files)
filenam = fullfile(files(nf).folder,files(nf).name);
fprintf('Reading raw SUNA datafile: %s\n',files(nf).name)
opts = detectImportOptions(filenam);
if numel(opts.VariableNames) == 286
opts.VariableNames = cols;
else
fprintf('... unexpected first column (datetime) exists in file...\n')
fprintf('... may cause erros ...\n')
opts.VariableNames = ['datetime' cols];
end
% read in CTD variables as NaNs instead of ''
opts.VariableTypes(contains(opts.VariableNames,{'ctd_sal' 'ctd_temp' 'ctd_press'})) = {'double'};
%% First read header informatin
inst = struct(); % initialize inst and store all coefficients
fileid = fopen(filenam,'r');
for nl = 1:opts.DataLine-1
try
str = fgetl(fileid);
str = strsplit(str,',');
hdr_field = strrep(str{2},'-','_');
hdr_field = regexprep(hdr_field,' +',' '); % replace multiple spaces with single space
hdr_field = strtrim(hdr_field);
hdr_field = strrep(hdr_field,' ','_');
inst.(hdr_field) = strtrim(str{3});
catch
fprintf('error here\n')
keyboard
end
end
fclose(fileid);
%% Next read data
d = readtable(filenam,opts);
d.filename = repmat(files(nf).name,size(d.hdr));
if ~read_single_file
try
if d.yyyydoy(end) < deploydate || d.yyyydoy(end) > recoverdate
% remove fieldname
meta.csv_filenames(contains(meta.csv_filenames,files(nf).name)) = [];
continue
end
catch
fprintf('stopped here: %s\n',filenam);
keyboard
end
% check to make sure header information is the same
if exist('data','var') % i.e. one file has already been read
hdr_fields = fieldnames(inst);
for ncal = 1:numel(hdr_fields)
hdr_field = hdr_fields{ncal};
if ~strcmp(inst_prev.(hdr_field),inst.(hdr_field))
fprintf('Header info different than previous! %s\n',filenam)
fprintf(' previous file %s = %s\n',hdr_field,inst_prev.(hdr_field))
fprintf(' this file %s = %s\n',hdr_field,inst.(hdr_field))
skip_file = input('Do you want to skip this file? <1/0> ');
if isempty(skip_file); skip_file = 1; end
if skip_file
meta.csv_filenames(contains(meta.csv_filenames,files(nf).name)) = [];
inst = inst_prev;
continue
else
fprintf('stopped here...\n')
keyboard
end
end
end
end
end
inst_prev = inst; % update so can check next file has the same header information
if ~exist('data','var')
data = d;
else
data = [data;d];
end
end
% Add datenum and datetime
data.datetime = datetime(string(data.yyyydoy),'InputFormat','yyyyDDD') + hours(data.hr);
data.datenum = datenum(data.datetime);
% store CSV header information to meta structure
meta.csv_headers = inst;
% if ~strcmp(meta.csv_headers.Calibration_File,cfg.SUNA.cal1.calfile)
% fprintf('Different calibration files...\n')
% keyboard
% end
%% Save data
if ~read_single_file
meta_raw = meta;
data_raw = data;
fprintf('Calibration file for %s = %s\n',cfg.project,meta_raw.csv_headers.Calibration_File)
fprintf('Saving raw data to file: %s\n',cfg.SUNA.path.data_raw)
save(cfg.SUNA.path.data_raw,'data_raw','meta_raw','cfg','-v7.3');
end
end