function [] = get_model_and_obs_stats_mod( varname,              ...
    run_months, exp_path, exp_run, domain, start_year, end_year,   ...
    dt_assim, species, hscale, time_of_day_in_hours )

%
% get_model_and_obs_stats.m
%
% Compute mean, var, skewness, min, max, and histogram of model
% (synthetic obs) and observations from tile-based "innov" files.
%
% The main purpose of this function is to aggregate the information
% from the "innov" files so that scaling (cdf-matching) parameters
% can be derived with get_cdf_match.m - this implies that the
% histogram should be computed with very fine resolution, even if
% this means that the histogram as such would not be useful.  It
% will implicitly be aggregated further in get_cdf_match by way
% of computing the cdf.
%
% Stats output file contains tile ID, number of
% data, mean, variance, skewness, min, max, and histogram
% for each catchment (see write statement)
%
% Uses *constant* (pre-defined) edges for histogram
%
% Increase edge_dx if memory (RAM) is a problem
%
% reichle, 22 Aug 2005
%
% - "species" now input argument
% - added "time_of_day_in_hours" for Tskin stats
% reichle, 30 Sep 2005
%
% rewritten to permit spatial aggregation
% reichle, 26 jan 2006
%
% updated, GDL, 16 nov 2010
% updated CSD, 31 Jan 2014
%
% -------------------------------------------------------------------
% begin user-defined inputs
% -------------------------------------------------------------------

% obs species to be processed (see ens_upd_inputs.nml for a list)
%
% (only observation species that represent observations of the same
%  model prognostic or diagnostic can be processed together!)

% edges for histogram

old_output_style = 0;   % switch for old output file and dir conventions

nodata     = -9999;
nodata_tol = 1e-4;

disp(varname)

if(strcmp(varname,'sfmc'))         % surface soil moisture
    
    % NOTE: edge_dx should be chosen ridiculously small. Then the
    % resulting polynomial fit in get_cdf_match.m will approximate
    % most closely what would be obtained by fitting the ranked
    % scatter diagram.  (The latter cannot be obtained easily for
    % large domains and many observations because it is unlikely
    % to fit into memory, and looping over each tile would require
    % reading the entire data set of "innov" files once for each
    % tile.) - reichle, 22 Aug 2005
    
    edge_min = 0;
    edge_max = 0.5;
    edge_dx  = 0.001;    % increase edge_dx if memory (RAM) is a problem
    %  edge_dx  = 0.005;    % increase edge_dx if memory (RAM) is a problem
    spec_tag = 'SM';
    
    % coarse histogram for SMMR vs. AMSR-E stats - CANNOT be used
    % with get_cdf_match.m!!!
    % also increase edge_max for stats of scaled data
    % (reichle, 27 Jan 2006)
    
    if 0
        
        edge_min = 0;
        edge_max = 0.6;
        edge_dx  = 0.03;    % increase edge_dx if memory (RAM) is a problem
        
        edge_min = -.18;
        edge_max = 0.72;
        edge_dx  = 0.03;    % increase edge_dx if memory (RAM) is a problem
        
    end
    
elseif(strcmp(varname,'''Tb'''))
    
    edge_min = 200;
    edge_max = 400;
    edge_dx  = 0.5;
    spec_tag = 'Tb';
    
elseif(strcmp(varname,'Tskin'))
    
    edge_min = 200;
    edge_max = 400;
    edge_dx  = 0.5;
    spec_tag = 'Tskin';
    
else
    
    input(['Unknown species requested. Ctrl-C now. ',varname])
    
end

for s=1:length(species)
    spec_tag = [spec_tag,'_',num2str(species(s))];
end

% minimum number of data points to include in statistics

N_data_min = 2; % leave decision about "good" stats for later

% no-data-value for points that don't have good statistics

no_data_stats = -9999.;

% -------------------------------------------------------------------
% end user-defined inputs
% -------------------------------------------------------------------

edges = edge_min:edge_dx:edge_max;

N_edges = length(edges);

% -------------------------------------------------------------

% assemble input and output paths

inpath  = [ exp_path, '/', exp_run, '/', domain ];

outpath = [ inpath, '/stats/'   ];

% create outpath if it doesn't exist

if exist(outpath)~=2
    eval(['!mkdir -p ', outpath]);
end

% -------------------------------------------------------------

% assemble output file name

month_string = {'Jan'; 'Feb'; 'Mar'; 'Apr'; 'May'; 'Jun'; ...
    'Jul'; 'Aug'; 'Sep'; 'Oct'; 'Nov'; 'Dec'};

fname_out = [ outpath, '/', exp_run, '.stats.',                    ...
    'hscale_', num2str(hscale,'%2.2f'), '_',             ...
    num2str(start_year), '-', num2str(end_year), '.' ];

% I think this is to do each month and year separately?
%if (iscell(run_months))

%    if ((end_year-start_year+1)~=length(run_months))
%        error('cell entries should match number of years, i.e. need months for each year');
%    else
%        for i=1:length(run_months) %for all years
%            fname_out = [fname_out, month_string{min(run_months)},'_',month_string{max(run_months)},'.' ];
%        end
%    end

%else

if length(run_months)==12
    fname_out = [fname_out, 'all_months' ];
else
    %      for i=1:length(run_months)
    %        fname_out = [fname_out, month_string{run_months(i)} ];
    %      end
    %    end
    for i=1:length(run_months)
        fname_out=[fname_out,month_string{run_months(i)}];
    end
end

%end

fname_out = [fname_out, spec_tag];

if exist( 'time_of_day_in_hours', 'var')
    
    fname_out = [fname_out, '_', num2str(time_of_day_in_hours,'%2.2d'), 'z'];
    
end

fname_out = [fname_out, '.dat'];

% check whether output file exists

if (exist(fname_out)==2)
    
    disp(['output file exists. not overwriting. returning'])
    disp(fname_out)
    fexists=1;
    
else
    
    disp(['creating ', fname_out])
    fexists=0;
end

% -------------------------------------------------------------

% load catchment coordinates

if old_output_style
    
    fname = [inpath, '/', exp_run, '_tile_coord.dat'];
    
else
    
    fname = [ exp_path '/' exp_run '/output/' domain '/rc_out/' exp_run '.ldas_tilecoord.bin'];
    
end

[ tile_coord ] = read_tilecoord( fname );

N_tile = length(tile_coord.tile_id);

% -------------------------------------------------------------

% initialize output statistics

N_data   = zeros(N_tile,1);               % same for obs and obspred

obs_mean = zeros(N_tile,1);
obs_var  = zeros(N_tile,1);
obs_skew = zeros(N_tile,1);

obs_min  =  inf*ones(N_tile,1);
obs_max  = -inf*ones(N_tile,1);

obs_hist = zeros(N_tile,N_edges-1);


obspred_mean = zeros(N_tile,1);
obspred_var  = zeros(N_tile,1);
obspred_skew = zeros(N_tile,1);

obspred_min  =  inf*ones(N_tile,1);
obspred_max  = -inf*ones(N_tile,1);

obspred_hist = zeros(N_tile,N_edges-1);


% tmp debugging

obs_all = [];
obspred_all = [];

% determine tiles to whose statistics the current obs will contribute

disp('pre-computing index for regional averaging')

for i=1:N_tile
    
    if hscale>0
        
        this_lat = tile_coord.com_lat(i);
        this_lon = tile_coord.com_lon(i);
        
        tmp_sq_distance =                           ...
            (tile_coord.com_lon - this_lon).^2 +    ...
            (tile_coord.com_lat - this_lat).^2;
        
        hscale_ind{i} = find( tmp_sq_distance <= hscale^2 );
        
    else
        
        hscale_ind{i} = i;
        
    end
    
end


% -------------------------------------------------------------

tmp_months = run_months;

for year = start_year:end_year
    
    %if (iscell(tmp_months))
    
    %  run_months = [tmp_months{year-start_year+1}];
    
    %end
    
    for month = run_months
        
        for day = 1:days_in_month( year, month)
            
            for seconds_in_day = 0:dt_assim:(86400-1)
                
                hour    = floor(seconds_in_day/3600);
                
                % check if diurnal stats are needed
                
                if exist('time_of_day_in_hours','var')
                    tmp_hour = time_of_day_in_hours;
                else
                    tmp_hour = hour;       % all hours of day will be included
                end
                
                if hour==tmp_hour
                    
                    minute  = floor( (seconds_in_day-hour*3600)/60 );
                    
                    seconds = seconds_in_day-hour*3600-minute*60;
                    
                    if (seconds~=0)
                        input('something is wrong! Ctrl-c now')
                    end
                    
                    YYYYMMDD = [ num2str(year,   '%4.4d'),     ...
                        num2str(month,  '%2.2d'),     ...
                        num2str(day,    '%2.2d')  ];
                    
                    HHMM     = [ num2str(hour,   '%2.2d'),     ...
                        num2str(minute, '%2.2d')  ];
                    
                    % read innov files
                    
                    if (old_output_style)
                        
                        fname = [ inpath, '/ens_avg/',                 ...
                            'Y', YYYYMMDD(1:4), '/',             ...
                            'M', YYYYMMDD(5:6), '/',             ...
                            exp_run, '.ens_avg.innov.',          ...
                            YYYYMMDD, '_', HHMM ];
                        
                    else
                        
                        fname = [ exp_path '/' exp_run '/output/' domain '/ana/ens_avg/',                  ...
                            'Y', YYYYMMDD(1:4), '/',                  ...
                            'M', YYYYMMDD(5:6), '/',                  ...
                            exp_run, '.ens_avg.ldas_ObsFcstAna.',        ...
                            YYYYMMDD, '_', HHMM, 'z.bin' ];
                        
                    end
                    
                    % ifp = fopen( fname, 'r', 'b' );
                    
                    % if (ifp > 0)           %Proceed only if file exists (e.g. irregular SMOS swaths!)
                    
                    % fclose(ifp);
                    
                    [date_time,              ...
                        obs_assim,              ...
                        obs_species,            ...
                        obs_tilenum,            ...
                        obs_lon,                ...
                        obs_lat,                ...
                        obs_obs,                ...
                        obs_obsvar,             ...
                        obs_fcst,               ...
                        obs_fcstvar,            ...
                        obs_ana,                ...
                        obs_anavar              ...
                        ] =                      ...
                        read_ObsFcstAna( fname );
                    
                    % extract species of interest
                    
                    ind = [];
                    
                    for this_species = species
                        
                        ind = [ ind;  find( obs_species == this_species) ];
                        
                    end
                    
                    obs_species = obs_species(ind);
                    obs_tilenum = obs_tilenum(ind);
                    obs_value   = obs_obs(ind);
                    obs_fcst    = obs_fcst(ind);
                    obs_assim   = obs_assim(ind);
                    innov       = obs_fcst-obs_value;
                    
                    for i=1:length(obs_value)
                        
                        if ( abs(obs_value(i) - nodata) > nodata_tol && ...
                                abs(obs_fcst(i)  - nodata) > nodata_tol &&...
                                abs(innov(i) - nodata) > nodata_tol)
                            
                            ind = hscale_ind{obs_tilenum(i)};
                            
                            % add up data
                            
                            N_data(ind) = N_data(ind) + 1;  % same for obs and obspred
                            
                            % observations
                            
                            obs_mean(ind) = obs_mean(ind) + obs_value(i);
                            
                            obs_var(ind)  = obs_var(ind)  + obs_value(i)^2;
                            
                            obs_skew(ind) = obs_skew(ind) + obs_value(i)^3;
                            
                            obs_min(ind)  = min(obs_min(ind), obs_value(i));
                            obs_max(ind)  = max(obs_max(ind), obs_value(i));
                            
                            %HISTOGRAM
                            binindex = find( obs_value(i) > edges );
                            if (isempty(binindex))
                                j = 1;          %Storing values below min edge in first bin
                            elseif (max(binindex) >= N_edges-1)
                                j = N_edges-1;  %Storing values above max edge in last bin
                            else
                                j = max(binindex);
                            end
                            
                            %j = max( find( obs_value(i)>edges ) );   % histogram
                            
                            %disp(['j=',num2str(j)])
                            %disp([',ind=',num2str(ind)])
                            %disp(['obs_value(i)=',num2str(obs_value(i))])
                            
                            obs_hist(ind,j) = obs_hist(ind,j) + 1;
                            
                            % again for model (synth observations)
                            
                            obspred_value = obs_fcst(i);
                            
                            obspred_mean(ind) = obspred_mean(ind) + obspred_value;
                            
                            obspred_var(ind)  = obspred_var(ind)  + obspred_value^2;
                            
                            obspred_skew(ind) = obspred_skew(ind) + obspred_value^3;
                            
                            obspred_min(ind)  = min(obspred_min(ind), obspred_value);
                            obspred_max(ind)  = max(obspred_max(ind), obspred_value);
                            
                            %HISTOGRAM
                            binindex = find( obspred_value > edges );
                            if (isempty(binindex))
                                j = 1;          %Storing values below min edge in first bin
                            elseif (max(binindex) >= N_edges-1)
                                j = N_edges-1;  %Storing values above max edge in last bin
                            else
                                j = max(binindex);
                            end
                            
                            %j = max( find( obspred_value>edges ) );  % histogram
                            
                            obspred_hist(ind,j) = obspred_hist(ind,j) + 1;
                            
                            % tmp debugging
                            
                            %obs_all     = [obs_all;     obs_value(i)];
                            %obspred_all = [obspred_all; obspred_value];
                            
                        end % condition on valid data
                        
                    end    % loop through observations/innovations
                    
                    %  end  % if file present
                    
                end    % time_of_day_in_hours
                
            end      % seconds_in_day
        end        % day
    end          % month
end            % year

% normalize sums

% pick catchments with at least N_data_min data points

disp(' ')
disp(['total number of catchments = ', num2str(N_tile)])

ind_incl = find(N_data >= N_data_min);
ind_excl = find(N_data <  N_data_min);

disp(['number of catchments with at least ', num2str(N_data_min), ...
    ' data points = ', num2str(length(ind_incl))])

disp(['number of catchments without any data points = ', ...
    num2str(length(find(N_data==0)))])

disp(['number of catchments with 1 to ', num2str(N_data_min-1), ...
    ' data points = ', num2str(length(ind_excl)-length(find(N_data==0)))])


% throw out catchments with less than N_data_min data points

NN = N_data(ind_incl);

obs_mean(ind_excl)     = [];
obs_var(ind_excl)      = [];
obs_skew(ind_excl)     = [];


obspred_mean(ind_excl)     = [];
obspred_var(ind_excl)      = [];
obspred_skew(ind_excl)     = [];

% normalize

obs_mean = obs_mean./NN;

obspred_mean = obspred_mean./NN;

% NOTE normalization of skewness is *not* from textbook
%      (result differs slightly from matlab function)

obs_skew = (obs_skew./(NN-1) - 3*obs_var./NN.*obs_mean + 2*obs_mean.^3);

obs_var  = (obs_var-NN.*obs_mean.^2)./(NN-1);

obs_skew = obs_skew./(obs_var.^(1.5));


obspred_skew = (obspred_skew./(NN-1) ...
    - 3*obspred_var./NN.*obspred_mean + 2*obspred_mean.^3);

obspred_var  = (obspred_var-NN.*obspred_mean.^2)./(NN-1);

obspred_skew = obspred_skew./(obspred_var.^(1.5));


% skew = ( mean(x.^3) - 3*mean(x.^2)*mean(x) + 2*mean(x)^3)/std(x)^3


% expand stats back to arrays for all catchments,
% insert no-data-values for points without statistics

obs_mean(ind_incl) = obs_mean;
obs_mean(ind_excl) = no_data_stats;

obs_var(ind_incl) = obs_var;
obs_var(ind_excl) = no_data_stats;

obs_skew(ind_incl) = obs_skew;
obs_skew(ind_excl) = no_data_stats;

obspred_mean(ind_incl) = obspred_mean;
obspred_mean(ind_excl) = no_data_stats;

obspred_var(ind_incl) = obspred_var;
obspred_var(ind_excl) = no_data_stats;

obspred_skew(ind_incl) = obspred_skew;
obspred_skew(ind_excl) = no_data_stats;


% set no-data for min/max

obs_min( isinf(obs_min)) = no_data_stats;
obs_max( isinf(obs_max)) = no_data_stats;

obspred_min( isinf(obspred_min)) = no_data_stats;
obspred_max( isinf(obspred_max)) = no_data_stats;


% additional quality control
% (variance of moisture content may be close to zero and become negative
%  due to roundoff error)

ind = find( obs_var(ind_incl)<0 );
obs_var(ind_incl(ind)) = no_data_stats;

if length(ind)>0
    disp(['found negative obs variance ', ...
        ' for catchments ', num2str(ind_incl(ind)') ])
    disp(['setting respective components of obs_var to no-data-value'])
end

clear ind


ind = find( obspred_var(ind_incl)<0 );
obspred_var(ind_incl(ind)) = no_data_stats;

if length(ind)>0
    disp(['found negative obspred variance ', ...
        ' for catchments ', num2str(ind_incl(ind)') ])
    disp(['setting respective components of obspred_var to no-data-value'])
end

clear ind`

% write output file

disp(' ')
disp(['writing ', fname_out])

if (fexists==0)
    ofp = fopen(fname_out, 'w');
    
    format_string = ['%10d%7d%13.5e%13.5e%13.5e%13.5e%13.5e%8.3f%8.3f'];
    for i=1:N_edges-1
        format_string = [ format_string, ' %d'];
    end
    format_string = [ format_string, '\n'];
    
    
    for k=1:N_tile
        
        fprintf(ofp, format_string, ...
            tile_coord.tile_id(k), N_data(k),                        ...
            obs_mean(k), obs_var(k), obs_skew(k),         ...
            obs_min(k), obs_max(k),                       ...
            edge_min, edge_max, ...
            obs_hist(k,:) );
        
        
        fprintf(ofp, format_string, ...
            tile_coord.tile_id(k), N_data(k),                        ...
            obspred_mean(k), obspred_var(k), obspred_skew(k),         ...
            obspred_min(k), obspred_max(k),                       ...
            edge_min, edge_max, ...
            obspred_hist(k,:) );
        
    end
    
    fclose(ofp);
end

%--------------------------------------------------------------------
% write z-score file, consistent with expected by LDAS

%    read(10) sclprm_tile_id
%    read(10) sclprm_lon
%    read(10) sclprm_lat
%    read(10) sclprm_mean_obs
%    read(10) sclprm_std_obs
%    read(10) sclprm_mean_mod
%    read(10) sclprm_std_mod

% -------------------------------------------------------------------

fname_out = [ outpath, '/', exp_run, '.stats.',                    ...
    'hscale_', num2str(hscale,'%2.2f'), '_',             ...
    num2str(start_year), '-', num2str(end_year), '.zscore.dat'];

disp(' ')
disp(['writing ', fname_out])

ofp = fopen(fname_out, 'w');

%format_string = ['%10d%7d%13.5e%13.5e%13.5e%13.5e%13.5e%8.3f%8.3f'];
%for i=1:N_edges-1
%  format_string = [ format_string, ' %d'];
%end
format_string='';
for i=1:N_tile
    format_string=[ format_string,'%10d'];
end
format_string = [ format_string, '\n'];
fprintf(ofp, format_string, tile_coord.tile_id)

format_string='';
for i=1:N_tile
    format_string=[ format_string,'%13.5e'];
end
format_string = [ format_string, '\n'];


fprintf(ofp, format_string, tile_coord.com_lon)
fprintf(ofp, format_string, tile_coord.com_lat)
fprintf(ofp, format_string, obs_mean)
fprintf(ofp, format_string, sqrt(obs_var))
fprintf(ofp, format_string, obspred_mean)
fprintf(ofp, format_string, sqrt(obspred_var))

fclose(ofp);

% echo some diagnostics

% figure out what fraction of data was thrown to satisfy N_data_min
%  and other quality control

N_data_elim  = sum( N_data( find(obs_mean<0) ) );

N_data_total = sum( N_data );

disp(' ')
disp(['N_data_total = ', num2str(N_data_total)])
disp(['N_data_elim  = ', num2str(N_data_elim)])
disp(['fraction of lost data points = ', num2str(N_data_elim/N_data_total)])


% tmp debugging (save obs_all and obspred_all)

%disp('SAVING tmp2.mat')
%save tmp2.mat



% ==================== EOF ==============================================
