1math::statistics(n) Tcl Math Library math::statistics(n)
2
3
4
5______________________________________________________________________________
6
8 math::statistics - Basic statistical functions and procedures
9
11 package require Tcl 8.5
12
13 package require math::statistics 1
14
15 ::math::statistics::mean data
16
17 ::math::statistics::min data
18
19 ::math::statistics::max data
20
21 ::math::statistics::number data
22
23 ::math::statistics::stdev data
24
25 ::math::statistics::var data
26
27 ::math::statistics::pstdev data
28
29 ::math::statistics::pvar data
30
31 ::math::statistics::median data
32
33 ::math::statistics::basic-stats data
34
35 ::math::statistics::histogram limits values ?weights?
36
37 ::math::statistics::histogram-alt limits values ?weights?
38
39 ::math::statistics::corr data1 data2
40
41 ::math::statistics::interval-mean-stdev data confidence
42
43 ::math::statistics::t-test-mean data est_mean est_stdev alpha
44
45 ::math::statistics::test-normal data significance
46
47 ::math::statistics::lillieforsFit data
48
49 ::math::statistics::test-Duckworth list1 list2 significance
50
51 ::math::statistics::test-anova-F alpha args
52
53 ::math::statistics::test-Tukey-range alpha args
54
55 ::math::statistics::test-Dunnett alpha control args
56
57 ::math::statistics::quantiles data confidence
58
59 ::math::statistics::quantiles limits counts confidence
60
61 ::math::statistics::autocorr data
62
63 ::math::statistics::crosscorr data1 data2
64
65 ::math::statistics::mean-histogram-limits mean stdev number
66
67 ::math::statistics::minmax-histogram-limits min max number
68
69 ::math::statistics::linear-model xdata ydata intercept
70
71 ::math::statistics::linear-residuals xdata ydata intercept
72
73 ::math::statistics::test-2x2 n11 n21 n12 n22
74
75 ::math::statistics::print-2x2 n11 n21 n12 n22
76
77 ::math::statistics::control-xbar data ?nsamples?
78
79 ::math::statistics::control-Rchart data ?nsamples?
80
81 ::math::statistics::test-xbar control data
82
83 ::math::statistics::test-Rchart control data
84
85 ::math::statistics::test-Kruskal-Wallis confidence args
86
87 ::math::statistics::analyse-Kruskal-Wallis args
88
89 ::math::statistics::test-Levene groups
90
91 ::math::statistics::test-Brown-Forsythe groups
92
93 ::math::statistics::group-rank args
94
95 ::math::statistics::test-Wilcoxon sample_a sample_b
96
97 ::math::statistics::spearman-rank sample_a sample_b
98
99 ::math::statistics::spearman-rank-extended sample_a sample_b
100
101 ::math::statistics::kernel-density data opt -option value ...
102
103 ::math::statistics::bootstrap data sampleSize ?numberSamples?
104
105 ::math::statistics::wasserstein-distance prob1 prob2
106
107 ::math::statistics::kl-divergence prob1 prob2
108
109 ::math::statistics::logistic-model xdata ydata
110
111 ::math::statistics::logistic-probability coeffs x
112
113 ::math::statistics::tstat dof ?alpha?
114
115 ::math::statistics::mv-wls wt1 weights_and_values
116
117 ::math::statistics::mv-ols values
118
119 ::math::statistics::pdf-normal mean stdev value
120
121 ::math::statistics::pdf-lognormal mean stdev value
122
123 ::math::statistics::pdf-exponential mean value
124
125 ::math::statistics::pdf-uniform xmin xmax value
126
127 ::math::statistics::pdf-triangular xmin xmax value
128
129 ::math::statistics::pdf-symmetric-triangular xmin xmax value
130
131 ::math::statistics::pdf-gamma alpha beta value
132
133 ::math::statistics::pdf-poisson mu k
134
135 ::math::statistics::pdf-chisquare df value
136
137 ::math::statistics::pdf-student-t df value
138
139 ::math::statistics::pdf-gamma a b value
140
141 ::math::statistics::pdf-beta a b value
142
143 ::math::statistics::pdf-weibull scale shape value
144
145 ::math::statistics::pdf-gumbel location scale value
146
147 ::math::statistics::pdf-pareto scale shape value
148
149 ::math::statistics::pdf-cauchy location scale value
150
151 ::math::statistics::pdf-laplace location scale value
152
153 ::math::statistics::pdf-kumaraswamy a b value
154
155 ::math::statistics::pdf-negative-binomial r p value
156
157 ::math::statistics::cdf-normal mean stdev value
158
159 ::math::statistics::cdf-lognormal mean stdev value
160
161 ::math::statistics::cdf-exponential mean value
162
163 ::math::statistics::cdf-uniform xmin xmax value
164
165 ::math::statistics::cdf-triangular xmin xmax value
166
167 ::math::statistics::cdf-symmetric-triangular xmin xmax value
168
169 ::math::statistics::cdf-students-t degrees value
170
171 ::math::statistics::cdf-gamma alpha beta value
172
173 ::math::statistics::cdf-poisson mu k
174
175 ::math::statistics::cdf-beta a b value
176
177 ::math::statistics::cdf-weibull scale shape value
178
179 ::math::statistics::cdf-gumbel location scale value
180
181 ::math::statistics::cdf-pareto scale shape value
182
183 ::math::statistics::cdf-cauchy location scale value
184
185 ::math::statistics::cdf-F nf1 nf2 value
186
187 ::math::statistics::cdf-laplace location scale value
188
189 ::math::statistics::cdf-kumaraswamy a b value
190
191 ::math::statistics::cdf-negative-binomial r p value
192
193 ::math::statistics::empirical-distribution values
194
195 ::math::statistics::random-normal mean stdev number
196
197 ::math::statistics::random-lognormal mean stdev number
198
199 ::math::statistics::random-exponential mean number
200
201 ::math::statistics::random-uniform xmin xmax number
202
203 ::math::statistics::random-triangular xmin xmax number
204
205 ::math::statistics::random-symmetric-triangular xmin xmax number
206
207 ::math::statistics::random-gamma alpha beta number
208
209 ::math::statistics::random-poisson mu number
210
211 ::math::statistics::random-chisquare df number
212
213 ::math::statistics::random-student-t df number
214
215 ::math::statistics::random-beta a b number
216
217 ::math::statistics::random-weibull scale shape number
218
219 ::math::statistics::random-gumbel location scale number
220
221 ::math::statistics::random-pareto scale shape number
222
223 ::math::statistics::random-cauchy location scale number
224
225 ::math::statistics::random-laplace location scale number
226
227 ::math::statistics::random-kumaraswamy a b number
228
229 ::math::statistics::random-negative-binomial r p number
230
231 ::math::statistics::histogram-uniform xmin xmax limits number
232
233 ::math::statistics::incompleteGamma x p ?tol?
234
235 ::math::statistics::incompleteBeta a b x ?tol?
236
237 ::math::statistics::estimate-pareto values
238
239 ::math::statistics::estimate-exponential values
240
241 ::math::statistics::estimate-laplace values
242
243 ::math::statistics::estimante-negative-binomial r values
244
245 ::math::statistics::filter varname data expression
246
247 ::math::statistics::map varname data expression
248
249 ::math::statistics::samplescount varname list expression
250
251 ::math::statistics::subdivide
252
253 ::math::statistics::plot-scale canvas xmin xmax ymin ymax
254
255 ::math::statistics::plot-xydata canvas xdata ydata tag
256
257 ::math::statistics::plot-xyline canvas xdata ydata tag
258
259 ::math::statistics::plot-tdata canvas tdata tag
260
261 ::math::statistics::plot-tline canvas tdata tag
262
263 ::math::statistics::plot-histogram canvas counts limits tag
264
265______________________________________________________________________________
266
268 The math::statistics package contains functions and procedures for ba‐
269 sic statistical data analysis, such as:
270
271 • Descriptive statistical parameters (mean, minimum, maximum,
272 standard deviation)
273
274 • Estimates of the distribution in the form of histograms and
275 quantiles
276
277 • Basic testing of hypotheses
278
279 • Probability and cumulative density functions
280
281 It is meant to help in developing data analysis applications or doing
282 ad hoc data analysis, it is not in itself a full application, nor is it
283 intended to rival with full (non-)commercial statistical packages.
284
285 The purpose of this document is to describe the implemented procedures
286 and provide some examples of their usage. As there is ample literature
287 on the algorithms involved, we refer to relevant text books for more
288 explanations. The package contains a fairly large number of public
289 procedures. They can be distinguished in three sets: general proce‐
290 dures, procedures that deal with specific statistical distributions,
291 list procedures to select or transform data and simple plotting proce‐
292 dures (these require Tk). Note: The data that need to be analyzed are
293 always contained in a simple list. Missing values are represented as
294 empty list elements. Note: With version 1.0.1 a mistake in the procs
295 pdf-lognormal, cdf-lognormal and random-lognormal has been corrected.
296 In previous versions the argument for the standard deviation was actu‐
297 ally used as if it was the variance.
298
300 The general statistical procedures are:
301
302 ::math::statistics::mean data
303 Determine the mean value of the given list of data.
304
305 list data
306 - List of data
307
308
309 ::math::statistics::min data
310 Determine the minimum value of the given list of data.
311
312 list data
313 - List of data
314
315
316 ::math::statistics::max data
317 Determine the maximum value of the given list of data.
318
319 list data
320 - List of data
321
322
323 ::math::statistics::number data
324 Determine the number of non-missing data in the given list
325
326 list data
327 - List of data
328
329
330 ::math::statistics::stdev data
331 Determine the sample standard deviation of the data in the given
332 list
333
334 list data
335 - List of data
336
337
338 ::math::statistics::var data
339 Determine the sample variance of the data in the given list
340
341 list data
342 - List of data
343
344
345 ::math::statistics::pstdev data
346 Determine the population standard deviation of the data in the
347 given list
348
349 list data
350 - List of data
351
352
353 ::math::statistics::pvar data
354 Determine the population variance of the data in the given list
355
356 list data
357 - List of data
358
359
360 ::math::statistics::median data
361 Determine the median of the data in the given list (Note that
362 this requires sorting the data, which may be a costly operation)
363
364 list data
365 - List of data
366
367
368 ::math::statistics::basic-stats data
369 Determine a list of all the descriptive parameters: mean, mini‐
370 mum, maximum, number of data, sample standard deviation, sample
371 variance, population standard deviation and population variance.
372
373 (This routine is called whenever either or all of the basic sta‐
374 tistical parameters are required. Hence all calculations are
375 done and the relevant values are returned.)
376
377 list data
378 - List of data
379
380
381 ::math::statistics::histogram limits values ?weights?
382 Determine histogram information for the given list of data. Re‐
383 turns a list consisting of the number of values that fall into
384 each interval. (The first interval consists of all values lower
385 than the first limit, the last interval consists of all values
386 greater than the last limit. There is one more interval than
387 there are limits.)
388
389 Optionally, you can use weights to influence the histogram.
390
391 list limits
392 - List of upper limits (in ascending order) for the in‐
393 tervals of the histogram.
394
395 list values
396 - List of data
397
398 list weights
399 - List of weights, one weight per value
400
401
402 ::math::statistics::histogram-alt limits values ?weights?
403 Alternative implementation of the histogram procedure: the open
404 end of the intervals is at the lower bound instead of the upper
405 bound.
406
407 list limits
408 - List of upper limits (in ascending order) for the in‐
409 tervals of the histogram.
410
411 list values
412 - List of data
413
414 list weights
415 - List of weights, one weight per value
416
417
418 ::math::statistics::corr data1 data2
419 Determine the correlation coefficient between two sets of data.
420
421 list data1
422 - First list of data
423
424 list data2
425 - Second list of data
426
427
428 ::math::statistics::interval-mean-stdev data confidence
429 Return the interval containing the mean value and one containing
430 the standard deviation with a certain level of confidence (as‐
431 suming a normal distribution)
432
433 list data
434 - List of raw data values (small sample)
435
436 float confidence
437 - Confidence level (0.95 or 0.99 for instance)
438
439
440 ::math::statistics::t-test-mean data est_mean est_stdev alpha
441 Test whether the mean value of a sample is in accordance with
442 the estimated normal distribution with a certain probability.
443 Returns 1 if the test succeeds or 0 if the mean is unlikely to
444 fit the given distribution.
445
446 list data
447 - List of raw data values (small sample)
448
449 float est_mean
450 - Estimated mean of the distribution
451
452 float est_stdev
453 - Estimated stdev of the distribution
454
455 float alpha
456 - Probability level (0.95 or 0.99 for instance)
457
458
459 ::math::statistics::test-normal data significance
460 Test whether the given data follow a normal distribution with a
461 certain level of significance. Returns 1 if the data are nor‐
462 mally distributed within the level of significance, returns 0 if
463 not. The underlying test is the Lilliefors test. Smaller values
464 of the significance mean a stricter testing.
465
466 list data
467 - List of raw data values
468
469 float significance
470 - Significance level (one of 0.01, 0.05, 0.10, 0.15 or
471 0.20). For compatibility reasons the values "1-signifi‐
472 cance", 0.80, 0.85, 0.90, 0.95 or 0.99 are also accepted.
473
474 Compatibility issue: the original implementation and documentation used
475 the term "confidence" and used a value 1-significance (see ticket
476 2812473fff). This has been corrected as of version 0.9.3.
477
478
479 ::math::statistics::lillieforsFit data
480 Returns the goodness of fit to a normal distribution according
481 to Lilliefors. The higher the number, the more likely the data
482 are indeed normally distributed. The test requires at least five
483 data points.
484
485 list data
486 - List of raw data values
487
488
489 ::math::statistics::test-Duckworth list1 list2 significance
490 Determine if two data sets have the same median according to the
491 Tukey-Duckworth test. The procedure returns 0 if the medians
492 are unequal, 1 if they are equal, -1 if the test can not be con‐
493 ducted (the smallest value must be in a different set than the
494 greatest value). # # Arguments: # list1 Values in
495 the first data set # list2 Values in the second
496 data set # significance Significance level (either 0.05,
497 0.01 or 0.001) # # Returns: Test whether the given data follow a
498 normal distribution with a certain level of significance. Re‐
499 turns 1 if the data are normally distributed within the level of
500 significance, returns 0 if not. The underlying test is the Lil‐
501 liefors test. Smaller values of the significance mean a stricter
502 testing.
503
504 list list1
505 - First list of data
506
507 list list2
508 - Second list of data
509
510 float significance
511 - Significance level (either 0.05, 0.01 or 0.001)
512
513
514 ::math::statistics::test-anova-F alpha args
515 Determine if two or more groups with normally distributed data
516 have the same means. The procedure returns 0 if the means are
517 likely unequal, 1 if they are. This is a one-way ANOVA test. The
518 groups may also be stored in a nested list: The procedure re‐
519 turns a list of the comparison results for each pair of groups.
520 Each element of this list contains: the index of the first group
521 and that of the second group, whether the means are likely to be
522 different (1) or not (0) and the confidence interval the conclu‐
523 sion is based on. The groups may also be stored in a nested
524 list:
525
526
527 test-anova-F 0.05 $A $B $C
528 #
529 # Or equivalently:
530 #
531 test-anova-F 0.05 [list $A $B $C]
532
533
534 float alpha
535 - Significance level
536
537 list args
538 - Two or more groups of data to be checked
539
540
541 ::math::statistics::test-Tukey-range alpha args
542 Determine if two or more groups with normally distributed data
543 have the same means, using Tukey's range test. It is complemen‐
544 tary to the ANOVA test. The procedure returns a list of the
545 comparison results for each pair of groups. Each element of this
546 list contains: the index of the first group and that of the sec‐
547 ond group, whether the means are likely to be different (1) or
548 not (0) and the confidence interval the conclusion is based on.
549 The groups may also be stored in a nested list, just as with the
550 ANOVA test.
551
552 float alpha
553 - Significance level - either 0.05 or 0.01
554
555 list args
556 - Two or more groups of data to be checked
557
558
559 ::math::statistics::test-Dunnett alpha control args
560 Determine if one or more groups with normally distributed data
561 have the same means as the group of control data, using Dun‐
562 nett's test. It is complementary to the ANOVA test. The proce‐
563 dure returns a list of the comparison results for each group
564 with the control group. Each element of this list contains:
565 whether the means are likely to be different (1) or not (0) and
566 the confidence interval the conclusion is based on. The groups
567 may also be stored in a nested list, just as with the ANOVA
568 test.
569
570 Note: some care is required if there is only one group to com‐
571 pare the control with:
572
573
574 test-Dunnett-F 0.05 $control [list $A]
575
576
577 Otherwise the group A is split up into groups of one element -
578 this is due to an ambiguity.
579
580 float alpha
581 - Significance level - either 0.05 or 0.01
582
583 list args
584 - One or more groups of data to be checked
585
586
587 ::math::statistics::quantiles data confidence
588 Return the quantiles for a given set of data
589
590 list data
591 - List of raw data values
592
593
594 float confidence
595 - Confidence level (0.95 or 0.99 for instance) or a list
596 of confidence levels.
597
598
599
600 ::math::statistics::quantiles limits counts confidence
601 Return the quantiles based on histogram information (alternative
602 to the call with two arguments)
603
604 list limits
605 - List of upper limits from histogram
606
607 list counts
608 - List of counts for for each interval in histogram
609
610 float confidence
611 - Confidence level (0.95 or 0.99 for instance) or a list
612 of confidence levels.
613
614
615 ::math::statistics::autocorr data
616 Return the autocorrelation function as a list of values (assum‐
617 ing equidistance between samples, about 1/2 of the number of raw
618 data)
619
620 The correlation is determined in such a way that the first value
621 is always 1 and all others are equal to or smaller than 1. The
622 number of values involved will diminish as the "time" (the index
623 in the list of returned values) increases
624
625 list data
626 - Raw data for which the autocorrelation must be deter‐
627 mined
628
629
630 ::math::statistics::crosscorr data1 data2
631 Return the cross-correlation function as a list of values (as‐
632 suming equidistance between samples, about 1/2 of the number of
633 raw data)
634
635 The correlation is determined in such a way that the values can
636 never exceed 1 in magnitude. The number of values involved will
637 diminish as the "time" (the index in the list of returned val‐
638 ues) increases.
639
640 list data1
641 - First list of data
642
643 list data2
644 - Second list of data
645
646
647 ::math::statistics::mean-histogram-limits mean stdev number
648 Determine reasonable limits based on mean and standard deviation
649 for a histogram Convenience function - the result is suitable
650 for the histogram function.
651
652 float mean
653 - Mean of the data
654
655 float stdev
656 - Standard deviation
657
658 int number
659 - Number of limits to generate (defaults to 8)
660
661
662 ::math::statistics::minmax-histogram-limits min max number
663 Determine reasonable limits based on a minimum and maximum for a
664 histogram
665
666 Convenience function - the result is suitable for the histogram
667 function.
668
669 float min
670 - Expected minimum
671
672 float max
673 - Expected maximum
674
675 int number
676 - Number of limits to generate (defaults to 8)
677
678
679 ::math::statistics::linear-model xdata ydata intercept
680 Determine the coefficients for a linear regression between two
681 series of data (the model: Y = A + B*X). Returns a list of pa‐
682 rameters describing the fit
683
684 list xdata
685 - List of independent data
686
687 list ydata
688 - List of dependent data to be fitted
689
690 boolean intercept
691 - (Optional) compute the intercept (1, default) or fit to
692 a line through the origin (0)
693
694 The result consists of the following list:
695
696 • (Estimate of) Intercept A
697
698 • (Estimate of) Slope B
699
700 • Standard deviation of Y relative to fit
701
702 • Correlation coefficient R2
703
704 • Number of degrees of freedom df
705
706 • Standard error of the intercept A
707
708 • Significance level of A
709
710 • Standard error of the slope B
711
712 • Significance level of B
713
714
715 ::math::statistics::linear-residuals xdata ydata intercept
716 Determine the difference between actual data and predicted from
717 the linear model.
718
719 Returns a list of the differences between the actual data and
720 the predicted values.
721
722 list xdata
723 - List of independent data
724
725 list ydata
726 - List of dependent data to be fitted
727
728 boolean intercept
729 - (Optional) compute the intercept (1, default) or fit to
730 a line through the origin (0)
731
732
733 ::math::statistics::test-2x2 n11 n21 n12 n22
734 Determine if two set of samples, each from a binomial distribu‐
735 tion, differ significantly or not (implying a different parame‐
736 ter).
737
738 Returns the "chi-square" value, which can be used to the deter‐
739 mine the significance.
740
741 int n11
742 - Number of outcomes with the first value from the first
743 sample.
744
745 int n21
746 - Number of outcomes with the first value from the second
747 sample.
748
749 int n12
750 - Number of outcomes with the second value from the first
751 sample.
752
753 int n22
754 - Number of outcomes with the second value from the sec‐
755 ond sample.
756
757
758 ::math::statistics::print-2x2 n11 n21 n12 n22
759 Determine if two set of samples, each from a binomial distribu‐
760 tion, differ significantly or not (implying a different parame‐
761 ter).
762
763 Returns a short report, useful in an interactive session.
764
765 int n11
766 - Number of outcomes with the first value from the first
767 sample.
768
769 int n21
770 - Number of outcomes with the first value from the second
771 sample.
772
773 int n12
774 - Number of outcomes with the second value from the first
775 sample.
776
777 int n22
778 - Number of outcomes with the second value from the sec‐
779 ond sample.
780
781
782 ::math::statistics::control-xbar data ?nsamples?
783 Determine the control limits for an xbar chart. The number of
784 data in each subsample defaults to 4. At least 20 subsamples are
785 required.
786
787 Returns the mean, the lower limit, the upper limit and the num‐
788 ber of data per subsample.
789
790 list data
791 - List of observed data
792
793 int nsamples
794 - Number of data per subsample
795
796
797 ::math::statistics::control-Rchart data ?nsamples?
798 Determine the control limits for an R chart. The number of data
799 in each subsample (nsamples) defaults to 4. At least 20 subsam‐
800 ples are required.
801
802 Returns the mean range, the lower limit, the upper limit and the
803 number of data per subsample.
804
805 list data
806 - List of observed data
807
808 int nsamples
809 - Number of data per subsample
810
811
812 ::math::statistics::test-xbar control data
813 Determine if the data exceed the control limits for the xbar
814 chart.
815
816 Returns a list of subsamples (their indices) that indeed violate
817 the limits.
818
819 list control
820 - Control limits as returned by the "control-xbar" proce‐
821 dure
822
823 list data
824 - List of observed data
825
826
827 ::math::statistics::test-Rchart control data
828 Determine if the data exceed the control limits for the R chart.
829
830 Returns a list of subsamples (their indices) that indeed violate
831 the limits.
832
833 list control
834 - Control limits as returned by the "control-Rchart" pro‐
835 cedure
836
837 list data
838 - List of observed data
839
840
841 ::math::statistics::test-Kruskal-Wallis confidence args
842 Check if the population medians of two or more groups are equal
843 with a given confidence level, using the Kruskal-Wallis test.
844
845 float confidence
846 - Confidence level to be used (0-1)
847
848 list args
849 - Two or more lists of data
850
851
852 ::math::statistics::analyse-Kruskal-Wallis args
853 Compute the statistical parameters for the Kruskal-Wallis test.
854 Returns the Kruskal-Wallis statistic and the probability that
855 that value would occur assuming the medians of the populations
856 are equal.
857
858 list args
859 - Two or more lists of data
860
861
862 ::math::statistics::test-Levene groups
863 Compute the Levene statistic to determine if groups of data have
864 the same variance (are homoscadastic) or not. The data are or‐
865 ganised in groups. This version uses the mean of the data as the
866 measure to determine the deviations. The statistic is equivalent
867 to an F statistic with degrees of freedom k-1 and N-k, k being
868 the number of groups and N the total number of data.
869
870 list groups
871 - List of groups of data
872
873
874 ::math::statistics::test-Brown-Forsythe groups
875 Compute the Brown-Forsythe statistic to determine if groups of
876 data have the same variance (are homoscadastic) or not. Like the
877 Levene test, but this version uses the median of the data.
878
879 list groups
880 - List of groups of data
881
882
883 ::math::statistics::group-rank args
884 Rank the groups of data with respect to the complete set. Re‐
885 turns a list consisting of the group ID, the value and the rank
886 (possibly a rational number, in case of ties) for each data
887 item.
888
889 list args
890 - Two or more lists of data
891
892
893 ::math::statistics::test-Wilcoxon sample_a sample_b
894 Compute the Wilcoxon test statistic to determine if two samples
895 have the same median or not. (The statistic can be regarded as
896 standard normal, if the sample sizes are both larger than 10.)
897 Returns the value of this statistic.
898
899 list sample_a
900 - List of data comprising the first sample
901
902 list sample_b
903 - List of data comprising the second sample
904
905
906 ::math::statistics::spearman-rank sample_a sample_b
907 Return the Spearman rank correlation as an alternative to the
908 ordinary (Pearson's) correlation coefficient. The two samples
909 should have the same number of data.
910
911 list sample_a
912 - First list of data
913
914 list sample_b
915 - Second list of data
916
917
918 ::math::statistics::spearman-rank-extended sample_a sample_b
919 Return the Spearman rank correlation as an alternative to the
920 ordinary (Pearson's) correlation coefficient as well as addi‐
921 tional data. The two samples should have the same number of
922 data. The procedure returns the correlation coefficient, the
923 number of data pairs used and the z-score, an approximately
924 standard normal statistic, indicating the significance of the
925 correlation.
926
927 list sample_a
928 - First list of data
929
930 list sample_b
931 - Second list of data
932
933 ::math::statistics::kernel-density data opt -option value ...
934 Return the density function based on kernel density estimation.
935 The procedure is controlled by a small set of options, each of
936 which is given a reasonable default.
937
938 The return value consists of three lists: the centres of the
939 bins, the associated probability density and a list of computa‐
940 tional parameters (begin and end of the interval, mean and stan‐
941 dard deviation and the used bandwidth). The computational param‐
942 eters can be used for further analysis.
943
944 list data
945 - The data to be examined
946
947 list args
948 - Option-value pairs:
949
950 -weights weights
951 Per data point the weight (default: 1 for all
952 data)
953
954 -bandwidth value
955 Bandwidth to be used for the estimation (default:
956 determined from standard deviation)
957
958 -number value
959 Number of bins to be returned (default: 100)
960
961 -interval {begin end}
962 Begin and end of the interval for which the den‐
963 sity is returned (default: mean +/- 3*standard de‐
964 viation)
965
966 -kernel function
967 Kernel to be used (One of: gaussian, cosine,
968 epanechnikov, uniform, triangular, biweight, lo‐
969 gistic; default: gaussian)
970
971 ::math::statistics::bootstrap data sampleSize ?numberSamples?
972 Create a subsample or subsamples from a given list of data. The
973 data in the samples are chosen from this list - multiples may
974 occur. If there is only one subsample, the sample itself is re‐
975 turned (as a list of "sampleSize" values), otherwise a list of
976 samples is returned.
977
978 list data
979 List of values to chose from
980
981 int sampleSize
982 Number of values per sample
983
984 int numberSamples
985 Number of samples (default: 1)
986
987 ::math::statistics::wasserstein-distance prob1 prob2
988 Compute the Wasserstein distance or earth mover's distance for
989 two equidstantly spaced histograms or probability densities. The
990 histograms need not to be normalised to sum to one, but they
991 must have the same number of entries.
992
993 Note: the histograms are assumed to be based on the same
994 equidistant intervals. As the bounds are not passed, the value
995 is expressed in the length of the intervals.
996
997 list prob1
998 List of values for the first histogram/probability den‐
999 sity
1000
1001 list prob2
1002 List of values for the second histogram/probability den‐
1003 sity
1004
1005 ::math::statistics::kl-divergence prob1 prob2
1006 Compute the Kullback-Leibler (KL) divergence for two equid‐
1007 stantly spaced histograms or probability densities. The his‐
1008 tograms need not to be normalised to sum to one, but they must
1009 have the same number of entries.
1010
1011 Note: the histograms are assumed to be based on the same
1012 equidistant intervals. As the bounds are not passed, the value
1013 is expressed in the length of the intervals.
1014
1015 Note also that the KL divergence is not symmetric and that the
1016 second histogram should not contain zeroes in places where the
1017 first histogram has non-zero values.
1018
1019 list prob1
1020 List of values for the first histogram/probability den‐
1021 sity
1022
1023 list prob2
1024 List of values for the second histogram/probability den‐
1025 sity
1026
1027 ::math::statistics::logistic-model xdata ydata
1028 Estimate the coefficients of the logistic model that fits the
1029 data best. The data consist of independent x-values and the out‐
1030 come 0 or 1 for each of the x-values. The result can be used to
1031 estimate the probability that a certain x-value gives 1.
1032
1033 list xdata
1034 List of values for which the success (1) or failure (0)
1035 is known
1036
1037 list ydata
1038 List of successes or failures corresponding to each value
1039 in xdata.
1040
1041 ::math::statistics::logistic-probability coeffs x
1042 Calculate the probability of success for the value x given the
1043 coefficients of the logistic model.
1044
1045 list coeffs
1046 List of coefficients as determine by the logistic-model
1047 command
1048
1049 float x
1050 X-value for which the probability needs to be determined
1051
1053 Besides the linear regression with a single independent variable, the
1054 statistics package provides two procedures for doing ordinary least
1055 squares (OLS) and weighted least squares (WLS) linear regression with
1056 several variables. They were written by Eric Kemp-Benedict.
1057
1058 In addition to these two, it provides a procedure (tstat) for calculat‐
1059 ing the value of the t-statistic for the specified number of degrees of
1060 freedom that is required to demonstrate a given level of significance.
1061
1062 Note: These procedures depend on the math::linearalgebra package.
1063
1064 Description of the procedures
1065
1066 ::math::statistics::tstat dof ?alpha?
1067 Returns the value of the t-distribution t* satisfying
1068
1069
1070 P(t*) = 1 - alpha/2
1071 P(-t*) = alpha/2
1072
1073
1074 for the number of degrees of freedom dof.
1075
1076 Given a sample of normally-distributed data x, with an estimate
1077 xbar for the mean and sbar for the standard deviation, the alpha
1078 confidence interval for the estimate of the mean can be calcu‐
1079 lated as
1080
1081
1082 ( xbar - t* sbar , xbar + t* sbar)
1083
1084
1085 The return values from this procedure can be compared to an es‐
1086 timated t-statistic to determine whether the estimated value of
1087 a parameter is significantly different from zero at the given
1088 confidence level.
1089
1090 int dof
1091 Number of degrees of freedom
1092
1093 float alpha
1094 Confidence level of the t-distribution. Defaults to 0.05.
1095
1096
1097 ::math::statistics::mv-wls wt1 weights_and_values
1098 Carries out a weighted least squares linear regression for the
1099 data points provided, with weights assigned to each point.
1100
1101 The linear model is of the form
1102
1103
1104 y = b0 + b1 * x1 + b2 * x2 ... + bN * xN + error
1105
1106
1107 and each point satisfies
1108
1109
1110 yi = b0 + b1 * xi1 + b2 * xi2 + ... + bN * xiN + Residual_i
1111
1112
1113 The procedure returns a list with the following elements:
1114
1115 • The r-squared statistic
1116
1117 • The adjusted r-squared statistic
1118
1119 • A list containing the estimated coefficients b1, ... bN,
1120 b0 (The constant b0 comes last in the list.)
1121
1122 • A list containing the standard errors of the coefficients
1123
1124 • A list containing the 95% confidence bounds of the coef‐
1125 ficients, with each set of bounds returned as a list with
1126 two values
1127
1128 Arguments:
1129
1130 list weights_and_values
1131 A list consisting of: the weight for the first observa‐
1132 tion, the data for the first observation (as a sublist),
1133 the weight for the second observation (as a sublist) and
1134 so on. The sublists of data are organised as lists of the
1135 value of the dependent variable y and the independent
1136 variables x1, x2 to xN.
1137
1138
1139 ::math::statistics::mv-ols values
1140 Carries out an ordinary least squares linear regression for the
1141 data points provided.
1142
1143 This procedure simply calls ::mvlinreg::wls with the weights set
1144 to 1.0, and returns the same information.
1145
1146 Example of the use:
1147
1148
1149 # Store the value of the unicode value for the "+/-" character
1150 set pm "\u00B1"
1151
1152 # Provide some data
1153 set data {{ -.67 14.18 60.03 -7.5 }
1154 { 36.97 15.52 34.24 14.61 }
1155 {-29.57 21.85 83.36 -7. }
1156 {-16.9 11.79 51.67 -6.56 }
1157 { 14.09 16.24 36.97 -12.84}
1158 { 31.52 20.93 45.99 -25.4 }
1159 { 24.05 20.69 50.27 17.27}
1160 { 22.23 16.91 45.07 -4.3 }
1161 { 40.79 20.49 38.92 -.73 }
1162 {-10.35 17.24 58.77 18.78}}
1163
1164 # Call the ols routine
1165 set results [::math::statistics::mv-ols $data]
1166
1167 # Pretty-print the results
1168 puts "R-squared: [lindex $results 0]"
1169 puts "Adj R-squared: [lindex $results 1]"
1170 puts "Coefficients $pm s.e. -- \[95% confidence interval\]:"
1171 foreach val [lindex $results 2] se [lindex $results 3] bounds [lindex $results 4] {
1172 set lb [lindex $bounds 0]
1173 set ub [lindex $bounds 1]
1174 puts " $val $pm $se -- \[$lb to $ub\]"
1175 }
1176
1177
1179 In the literature a large number of probability distributions can be
1180 found. The statistics package supports:
1181
1182 • The normal or Gaussian distribution as well as the log-normal
1183 distribution
1184
1185 • The uniform distribution - equal probability for all data within
1186 a given interval
1187
1188 • The exponential distribution - useful as a model for certain ex‐
1189 treme-value distributions.
1190
1191 • The gamma distribution - based on the incomplete Gamma integral
1192
1193 • The beta distribution
1194
1195 • The chi-square distribution
1196
1197 • The student's T distribution
1198
1199 • The Poisson distribution
1200
1201 • The Pareto distribution
1202
1203 • The Gumbel distribution
1204
1205 • The Weibull distribution
1206
1207 • The Cauchy distribution
1208
1209 • The F distribution (only the cumulative density function)
1210
1211 • PM - binomial.
1212
1213 In principle for each distribution one has procedures for:
1214
1215 • The probability density (pdf-*)
1216
1217 • The cumulative density (cdf-*)
1218
1219 • Quantiles for the given distribution (quantiles-*)
1220
1221 • Histograms for the given distribution (histogram-*)
1222
1223 • List of random values with the given distribution (random-*)
1224
1225 The following procedures have been implemented:
1226
1227 ::math::statistics::pdf-normal mean stdev value
1228 Return the probability of a given value for a normal distribu‐
1229 tion with given mean and standard deviation.
1230
1231 float mean
1232 - Mean value of the distribution
1233
1234 float stdev
1235 - Standard deviation of the distribution
1236
1237 float value
1238 - Value for which the probability is required
1239
1240
1241 ::math::statistics::pdf-lognormal mean stdev value
1242 Return the probability of a given value for a log-normal distri‐
1243 bution with given mean and standard deviation.
1244
1245 float mean
1246 - Mean value of the distribution
1247
1248 float stdev
1249 - Standard deviation of the distribution
1250
1251 float value
1252 - Value for which the probability is required
1253
1254
1255 ::math::statistics::pdf-exponential mean value
1256 Return the probability of a given value for an exponential dis‐
1257 tribution with given mean.
1258
1259 float mean
1260 - Mean value of the distribution
1261
1262 float value
1263 - Value for which the probability is required
1264
1265
1266 ::math::statistics::pdf-uniform xmin xmax value
1267 Return the probability of a given value for a uniform distribu‐
1268 tion with given extremes.
1269
1270 float xmin
1271 - Minimum value of the distribution
1272
1273 float xmin
1274 - Maximum value of the distribution
1275
1276 float value
1277 - Value for which the probability is required
1278
1279
1280 ::math::statistics::pdf-triangular xmin xmax value
1281 Return the probability of a given value for a triangular distri‐
1282 bution with given extremes. If the argument min is lower than
1283 the argument max, then smaller values have higher probability
1284 and vice versa. In the first case the probability density func‐
1285 tion is of the form f(x) = 2(1-x) and the other case it is of
1286 the form f(x) = 2x.
1287
1288 float xmin
1289 - Minimum value of the distribution
1290
1291 float xmin
1292 - Maximum value of the distribution
1293
1294 float value
1295 - Value for which the probability is required
1296
1297
1298 ::math::statistics::pdf-symmetric-triangular xmin xmax value
1299 Return the probability of a given value for a symmetric triangu‐
1300 lar distribution with given extremes.
1301
1302 float xmin
1303 - Minimum value of the distribution
1304
1305 float xmin
1306 - Maximum value of the distribution
1307
1308 float value
1309 - Value for which the probability is required
1310
1311
1312 ::math::statistics::pdf-gamma alpha beta value
1313 Return the probability of a given value for a Gamma distribution
1314 with given shape and rate parameters
1315
1316 float alpha
1317 - Shape parameter
1318
1319 float beta
1320 - Rate parameter
1321
1322 float value
1323 - Value for which the probability is required
1324
1325
1326 ::math::statistics::pdf-poisson mu k
1327 Return the probability of a given number of occurrences in the
1328 same interval (k) for a Poisson distribution with given mean
1329 (mu)
1330
1331 float mu
1332 - Mean number of occurrences
1333
1334 int k - Number of occurences
1335
1336
1337 ::math::statistics::pdf-chisquare df value
1338 Return the probability of a given value for a chi square distri‐
1339 bution with given degrees of freedom
1340
1341 float df
1342 - Degrees of freedom
1343
1344 float value
1345 - Value for which the probability is required
1346
1347
1348 ::math::statistics::pdf-student-t df value
1349 Return the probability of a given value for a Student's t dis‐
1350 tribution with given degrees of freedom
1351
1352 float df
1353 - Degrees of freedom
1354
1355 float value
1356 - Value for which the probability is required
1357
1358
1359 ::math::statistics::pdf-gamma a b value
1360 Return the probability of a given value for a Gamma distribution
1361 with given shape and rate parameters
1362
1363 float a
1364 - Shape parameter
1365
1366 float b
1367 - Rate parameter
1368
1369 float value
1370 - Value for which the probability is required
1371
1372
1373 ::math::statistics::pdf-beta a b value
1374 Return the probability of a given value for a Beta distribution
1375 with given shape parameters
1376
1377 float a
1378 - First shape parameter
1379
1380 float b
1381 - Second shape parameter
1382
1383 float value
1384 - Value for which the probability is required
1385
1386
1387 ::math::statistics::pdf-weibull scale shape value
1388 Return the probability of a given value for a Weibull distribu‐
1389 tion with given scale and shape parameters
1390
1391 float location
1392 - Scale parameter
1393
1394 float scale
1395 - Shape parameter
1396
1397 float value
1398 - Value for which the probability is required
1399
1400
1401 ::math::statistics::pdf-gumbel location scale value
1402 Return the probability of a given value for a Gumbel distribu‐
1403 tion with given location and shape parameters
1404
1405 float location
1406 - Location parameter
1407
1408 float scale
1409 - Shape parameter
1410
1411 float value
1412 - Value for which the probability is required
1413
1414
1415 ::math::statistics::pdf-pareto scale shape value
1416 Return the probability of a given value for a Pareto distribu‐
1417 tion with given scale and shape parameters
1418
1419 float scale
1420 - Scale parameter
1421
1422 float shape
1423 - Shape parameter
1424
1425 float value
1426 - Value for which the probability is required
1427
1428
1429 ::math::statistics::pdf-cauchy location scale value
1430 Return the probability of a given value for a Cauchy distribu‐
1431 tion with given location and shape parameters. Note that the
1432 Cauchy distribution has no finite higher-order moments.
1433
1434 float location
1435 - Location parameter
1436
1437 float scale
1438 - Shape parameter
1439
1440 float value
1441 - Value for which the probability is required
1442
1443
1444 ::math::statistics::pdf-laplace location scale value
1445 Return the probability of a given value for a Laplace distribu‐
1446 tion with given location and shape parameters. The Laplace dis‐
1447 tribution consists of two exponential functions, is peaked and
1448 has heavier tails than the normal distribution.
1449
1450 float location
1451 - Location parameter (mean)
1452
1453 float scale
1454 - Shape parameter
1455
1456 float value
1457 - Value for which the probability is required
1458
1459
1460 ::math::statistics::pdf-kumaraswamy a b value
1461 Return the probability of a given value for a Kumaraswamy dis‐
1462 tribution with given parameters a and b. The Kumaraswamy distri‐
1463 bution is related to the Beta distribution, but has a tractable
1464 cumulative distribution function.
1465
1466 float a
1467 - Parameter a
1468
1469 float b
1470 - Parameter b
1471
1472 float value
1473 - Value for which the probability is required
1474
1475
1476 ::math::statistics::pdf-negative-binomial r p value
1477 Return the probability of a given value for a negative binomial
1478 distribution with an allowed number of failures and the proba‐
1479 bility of success.
1480
1481 int r - Allowed number of failures (at least 1)
1482
1483 float p
1484 - Probability of success
1485
1486 int value
1487 - Number of successes for which the probability is to be
1488 returned
1489
1490
1491 ::math::statistics::cdf-normal mean stdev value
1492 Return the cumulative probability of a given value for a normal
1493 distribution with given mean and standard deviation, that is the
1494 probability for values up to the given one.
1495
1496 float mean
1497 - Mean value of the distribution
1498
1499 float stdev
1500 - Standard deviation of the distribution
1501
1502 float value
1503 - Value for which the probability is required
1504
1505
1506 ::math::statistics::cdf-lognormal mean stdev value
1507 Return the cumulative probability of a given value for a log-
1508 normal distribution with given mean and standard deviation, that
1509 is the probability for values up to the given one.
1510
1511 float mean
1512 - Mean value of the distribution
1513
1514 float stdev
1515 - Standard deviation of the distribution
1516
1517 float value
1518 - Value for which the probability is required
1519
1520
1521 ::math::statistics::cdf-exponential mean value
1522 Return the cumulative probability of a given value for an expo‐
1523 nential distribution with given mean.
1524
1525 float mean
1526 - Mean value of the distribution
1527
1528 float value
1529 - Value for which the probability is required
1530
1531
1532 ::math::statistics::cdf-uniform xmin xmax value
1533 Return the cumulative probability of a given value for a uniform
1534 distribution with given extremes.
1535
1536 float xmin
1537 - Minimum value of the distribution
1538
1539 float xmin
1540 - Maximum value of the distribution
1541
1542 float value
1543 - Value for which the probability is required
1544
1545
1546 ::math::statistics::cdf-triangular xmin xmax value
1547 Return the cumulative probability of a given value for a trian‐
1548 gular distribution with given extremes. If xmin < xmax, then
1549 lower values have a higher probability and vice versa, see also
1550 pdf-triangular
1551
1552 float xmin
1553 - Minimum value of the distribution
1554
1555 float xmin
1556 - Maximum value of the distribution
1557
1558 float value
1559 - Value for which the probability is required
1560
1561
1562 ::math::statistics::cdf-symmetric-triangular xmin xmax value
1563 Return the cumulative probability of a given value for a symmet‐
1564 ric triangular distribution with given extremes.
1565
1566 float xmin
1567 - Minimum value of the distribution
1568
1569 float xmin
1570 - Maximum value of the distribution
1571
1572 float value
1573 - Value for which the probability is required
1574
1575
1576 ::math::statistics::cdf-students-t degrees value
1577 Return the cumulative probability of a given value for a Stu‐
1578 dent's t distribution with given number of degrees.
1579
1580 int degrees
1581 - Number of degrees of freedom
1582
1583 float value
1584 - Value for which the probability is required
1585
1586
1587 ::math::statistics::cdf-gamma alpha beta value
1588 Return the cumulative probability of a given value for a Gamma
1589 distribution with given shape and rate parameters.
1590
1591 float alpha
1592 - Shape parameter
1593
1594 float beta
1595 - Rate parameter
1596
1597 float value
1598 - Value for which the cumulative probability is required
1599
1600
1601 ::math::statistics::cdf-poisson mu k
1602 Return the cumulative probability of a given number of occur‐
1603 rences in the same interval (k) for a Poisson distribution with
1604 given mean (mu).
1605
1606 float mu
1607 - Mean number of occurrences
1608
1609 int k - Number of occurences
1610
1611
1612 ::math::statistics::cdf-beta a b value
1613 Return the cumulative probability of a given value for a Beta
1614 distribution with given shape parameters
1615
1616 float a
1617 - First shape parameter
1618
1619 float b
1620 - Second shape parameter
1621
1622 float value
1623 - Value for which the probability is required
1624
1625
1626 ::math::statistics::cdf-weibull scale shape value
1627 Return the cumulative probability of a given value for a Weibull
1628 distribution with given scale and shape parameters.
1629
1630 float scale
1631 - Scale parameter
1632
1633 float shape
1634 - Shape parameter
1635
1636 float value
1637 - Value for which the probability is required
1638
1639
1640 ::math::statistics::cdf-gumbel location scale value
1641 Return the cumulative probability of a given value for a Gumbel
1642 distribution with given location and scale parameters.
1643
1644 float location
1645 - Location parameter
1646
1647 float scale
1648 - Scale parameter
1649
1650 float value
1651 - Value for which the probability is required
1652
1653
1654 ::math::statistics::cdf-pareto scale shape value
1655 Return the cumulative probability of a given value for a Pareto
1656 distribution with given scale and shape parameters
1657
1658 float scale
1659 - Scale parameter
1660
1661 float shape
1662 - Shape parameter
1663
1664 float value
1665 - Value for which the probability is required
1666
1667
1668 ::math::statistics::cdf-cauchy location scale value
1669 Return the cumulative probability of a given value for a Cauchy
1670 distribution with given location and scale parameters.
1671
1672 float location
1673 - Location parameter
1674
1675 float scale
1676 - Scale parameter
1677
1678 float value
1679 - Value for which the probability is required
1680
1681
1682 ::math::statistics::cdf-F nf1 nf2 value
1683 Return the cumulative probability of a given value for an F dis‐
1684 tribution with nf1 and nf2 degrees of freedom.
1685
1686 float nf1
1687 - Degrees of freedom for the numerator
1688
1689 float nf2
1690 - Degrees of freedom for the denominator
1691
1692 float value
1693 - Value for which the probability is required
1694
1695
1696 ::math::statistics::cdf-laplace location scale value
1697 Return the cumulative probability of a given value for a Laplace
1698 distribution with given location and shape parameters. The
1699 Laplace distribution consists of two exponential functions, is
1700 peaked and has heavier tails than the normal distribution.
1701
1702 float location
1703 - Location parameter (mean)
1704
1705 float scale
1706 - Shape parameter
1707
1708 float value
1709 - Value for which the probability is required
1710
1711
1712 ::math::statistics::cdf-kumaraswamy a b value
1713 Return the cumulative probability of a given value for a Ku‐
1714 maraswamy distribution with given parameters a and b. The Ku‐
1715 maraswamy distribution is related to the Beta distribution, but
1716 has a tractable cumulative distribution function.
1717
1718 float a
1719 - Parameter a
1720
1721 float b
1722 - Parameter b
1723
1724 float value
1725 - Value for which the probability is required
1726
1727
1728 ::math::statistics::cdf-negative-binomial r p value
1729 Return the cumulative probability of a given value for a nega‐
1730 tive binomial distribution with an allowed number of failures
1731 and the probability of success.
1732
1733 int r - Allowed number of failures (at least 1)
1734
1735 float p
1736 - Probability of success
1737
1738 int value
1739 - Greatest number of successes
1740
1741
1742 ::math::statistics::empirical-distribution values
1743 Return a list of values and their empirical probability. The
1744 values are sorted in increasing order. (The implementation fol‐
1745 lows the description at the corresponding Wikipedia page)
1746
1747 list values
1748 - List of data to be examined
1749
1750
1751 ::math::statistics::random-normal mean stdev number
1752 Return a list of "number" random values satisfying a normal dis‐
1753 tribution with given mean and standard deviation.
1754
1755 float mean
1756 - Mean value of the distribution
1757
1758 float stdev
1759 - Standard deviation of the distribution
1760
1761 int number
1762 - Number of values to be returned
1763
1764
1765 ::math::statistics::random-lognormal mean stdev number
1766 Return a list of "number" random values satisfying a log-normal
1767 distribution with given mean and standard deviation.
1768
1769 float mean
1770 - Mean value of the distribution
1771
1772 float stdev
1773 - Standard deviation of the distribution
1774
1775 int number
1776 - Number of values to be returned
1777
1778
1779 ::math::statistics::random-exponential mean number
1780 Return a list of "number" random values satisfying an exponen‐
1781 tial distribution with given mean.
1782
1783 float mean
1784 - Mean value of the distribution
1785
1786 int number
1787 - Number of values to be returned
1788
1789
1790 ::math::statistics::random-uniform xmin xmax number
1791 Return a list of "number" random values satisfying a uniform
1792 distribution with given extremes.
1793
1794 float xmin
1795 - Minimum value of the distribution
1796
1797 float xmax
1798 - Maximum value of the distribution
1799
1800 int number
1801 - Number of values to be returned
1802
1803
1804 ::math::statistics::random-triangular xmin xmax number
1805 Return a list of "number" random values satisfying a triangular
1806 distribution with given extremes. If xmin < xmax, then lower
1807 values have a higher probability and vice versa (see also pdf-
1808 triangular.
1809
1810 float xmin
1811 - Minimum value of the distribution
1812
1813 float xmax
1814 - Maximum value of the distribution
1815
1816 int number
1817 - Number of values to be returned
1818
1819
1820 ::math::statistics::random-symmetric-triangular xmin xmax number
1821 Return a list of "number" random values satisfying a symmetric
1822 triangular distribution with given extremes.
1823
1824 float xmin
1825 - Minimum value of the distribution
1826
1827 float xmax
1828 - Maximum value of the distribution
1829
1830 int number
1831 - Number of values to be returned
1832
1833
1834 ::math::statistics::random-gamma alpha beta number
1835 Return a list of "number" random values satisfying a Gamma dis‐
1836 tribution with given shape and rate parameters.
1837
1838 float alpha
1839 - Shape parameter
1840
1841 float beta
1842 - Rate parameter
1843
1844 int number
1845 - Number of values to be returned
1846
1847
1848 ::math::statistics::random-poisson mu number
1849 Return a list of "number" random values satisfying a Poisson
1850 distribution with given mean.
1851
1852 float mu
1853 - Mean of the distribution
1854
1855 int number
1856 - Number of values to be returned
1857
1858
1859 ::math::statistics::random-chisquare df number
1860 Return a list of "number" random values satisfying a chi square
1861 distribution with given degrees of freedom.
1862
1863 float df
1864 - Degrees of freedom
1865
1866 int number
1867 - Number of values to be returned
1868
1869
1870 ::math::statistics::random-student-t df number
1871 Return a list of "number" random values satisfying a Student's t
1872 distribution with given degrees of freedom.
1873
1874 float df
1875 - Degrees of freedom
1876
1877 int number
1878 - Number of values to be returned
1879
1880
1881 ::math::statistics::random-beta a b number
1882 Return a list of "number" random values satisfying a Beta dis‐
1883 tribution with given shape parameters.
1884
1885 float a
1886 - First shape parameter
1887
1888 float b
1889 - Second shape parameter
1890
1891 int number
1892 - Number of values to be returned
1893
1894
1895 ::math::statistics::random-weibull scale shape number
1896 Return a list of "number" random values satisfying a Weibull
1897 distribution with given scale and shape parameters.
1898
1899 float scale
1900 - Scale parameter
1901
1902 float shape
1903 - Shape parameter
1904
1905 int number
1906 - Number of values to be returned
1907
1908
1909 ::math::statistics::random-gumbel location scale number
1910 Return a list of "number" random values satisfying a Gumbel dis‐
1911 tribution with given location and scale parameters.
1912
1913 float location
1914 - Location parameter
1915
1916 float scale
1917 - Scale parameter
1918
1919 int number
1920 - Number of values to be returned
1921
1922
1923 ::math::statistics::random-pareto scale shape number
1924 Return a list of "number" random values satisfying a Pareto dis‐
1925 tribution with given scale and shape parameters.
1926
1927 float scale
1928 - Scale parameter
1929
1930 float shape
1931 - Shape parameter
1932
1933 int number
1934 - Number of values to be returned
1935
1936
1937 ::math::statistics::random-cauchy location scale number
1938 Return a list of "number" random values satisfying a Cauchy dis‐
1939 tribution with given location and scale parameters.
1940
1941 float location
1942 - Location parameter
1943
1944 float scale
1945 - Scale parameter
1946
1947 int number
1948 - Number of values to be returned
1949
1950
1951 ::math::statistics::random-laplace location scale number
1952 Return a list of "number" random values satisfying a Laplace
1953 distribution with given location and shape parameters. The
1954 Laplace distribution consists of two exponential functions, is
1955 peaked and has heavier tails than the normal distribution.
1956
1957 float location
1958 - Location parameter (mean)
1959
1960 float scale
1961 - Shape parameter
1962
1963 int number
1964 - Number of values to be returned
1965
1966
1967 ::math::statistics::random-kumaraswamy a b number
1968 Return a list of "number" random values satisying a Kumaraswamy
1969 distribution with given parameters a and b. The Kumaraswamy dis‐
1970 tribution is related to the Beta distribution, but has a
1971 tractable cumulative distribution function.
1972
1973 float a
1974 - Parameter a
1975
1976 float b
1977 - Parameter b
1978
1979 int number
1980 - Number of values to be returned
1981
1982
1983 ::math::statistics::random-negative-binomial r p number
1984 Return a list of "number" random values satisying a negative bi‐
1985 nomial distribution.
1986
1987 int r - Allowed number of failures (at least 1)
1988
1989 float p
1990 - Probability of success
1991
1992 int number
1993 - Number of values to be returned
1994
1995
1996 ::math::statistics::histogram-uniform xmin xmax limits number
1997 Return the expected histogram for a uniform distribution.
1998
1999 float xmin
2000 - Minimum value of the distribution
2001
2002 float xmax
2003 - Maximum value of the distribution
2004
2005 list limits
2006 - Upper limits for the buckets in the histogram
2007
2008 int number
2009 - Total number of "observations" in the histogram
2010
2011
2012 ::math::statistics::incompleteGamma x p ?tol?
2013 Evaluate the incomplete Gamma integral
2014
2015
2016 1 / x p-1
2017 P(p,x) = -------- | dt exp(-t) * t
2018 Gamma(p) / 0
2019
2020
2021 float x
2022 - Value of x (limit of the integral)
2023
2024 float p
2025 - Value of p in the integrand
2026
2027 float tol
2028 - Required tolerance (default: 1.0e-9)
2029
2030
2031 ::math::statistics::incompleteBeta a b x ?tol?
2032 Evaluate the incomplete Beta integral
2033
2034 float a
2035 - First shape parameter
2036
2037 float b
2038 - Second shape parameter
2039
2040 float x
2041 - Value of x (limit of the integral)
2042
2043 float tol
2044 - Required tolerance (default: 1.0e-9)
2045
2046
2047 ::math::statistics::estimate-pareto values
2048 Estimate the parameters for the Pareto distribution that comes
2049 closest to the given values. Returns the estimated scale and
2050 shape parameters, as well as the standard error for the shape
2051 parameter.
2052
2053 list values
2054 - List of values, assumed to be distributed according to
2055 a Pareto distribution
2056
2057
2058 ::math::statistics::estimate-exponential values
2059 Estimate the parameter for the exponential distribution that
2060 comes closest to the given values. Returns an estimate of the
2061 one parameter and of the standard error.
2062
2063 list values
2064 - List of values, assumed to be distributed according to
2065 an exponential distribution
2066
2067
2068 ::math::statistics::estimate-laplace values
2069 Estimate the parameters for the Laplace distribution that comes
2070 closest to the given values. Returns an estimate of respec‐
2071 tively the location and scale parameters, based on maximum like‐
2072 lihood.
2073
2074 list values
2075 - List of values, assumed to be distributed according to
2076 an exponential distribution
2077
2078
2079 ::math::statistics::estimante-negative-binomial r values
2080 Estimate the probability of success for the negative binomial
2081 distribution that comes closest to the given values. The al‐
2082 lowed number of failures must be given.
2083
2084 int r - Allowed number of failures (at least 1)
2085
2086 int number
2087 - List of values, assumed to be distributed according to
2088 a negative binomial distribution.
2089
2090
2091 TO DO: more function descriptions to be added
2092
2094 The data manipulation procedures act on lists or lists of lists:
2095
2096 ::math::statistics::filter varname data expression
2097 Return a list consisting of the data for which the logical ex‐
2098 pression is true (this command works analogously to the command
2099 foreach).
2100
2101 string varname
2102 - Name of the variable used in the expression
2103
2104 list data
2105 - List of data
2106
2107 string expression
2108 - Logical expression using the variable name
2109
2110
2111 ::math::statistics::map varname data expression
2112 Return a list consisting of the data that are transformed via
2113 the expression.
2114
2115 string varname
2116 - Name of the variable used in the expression
2117
2118 list data
2119 - List of data
2120
2121 string expression
2122 - Expression to be used to transform (map) the data
2123
2124
2125 ::math::statistics::samplescount varname list expression
2126 Return a list consisting of the counts of all data in the sub‐
2127 lists of the "list" argument for which the expression is true.
2128
2129 string varname
2130 - Name of the variable used in the expression
2131
2132 list data
2133 - List of sublists, each containing the data
2134
2135 string expression
2136 - Logical expression to test the data (defaults to
2137 "true").
2138
2139
2140 ::math::statistics::subdivide
2141 Routine PM - not implemented yet
2142
2143
2145 The following simple plotting procedures are available:
2146
2147 ::math::statistics::plot-scale canvas xmin xmax ymin ymax
2148 Set the scale for a plot in the given canvas. All plot routines
2149 expect this function to be called first. There is no automatic
2150 scaling provided.
2151
2152 widget canvas
2153 - Canvas widget to use
2154
2155 float xmin
2156 - Minimum x value
2157
2158 float xmax
2159 - Maximum x value
2160
2161 float ymin
2162 - Minimum y value
2163
2164 float ymax
2165 - Maximum y value
2166
2167
2168 ::math::statistics::plot-xydata canvas xdata ydata tag
2169 Create a simple XY plot in the given canvas - the data are shown
2170 as a collection of dots. The tag can be used to manipulate the
2171 appearance.
2172
2173 widget canvas
2174 - Canvas widget to use
2175
2176 float xdata
2177 - Series of independent data
2178
2179 float ydata
2180 - Series of dependent data
2181
2182 string tag
2183 - Tag to give to the plotted data (defaults to xyplot)
2184
2185
2186 ::math::statistics::plot-xyline canvas xdata ydata tag
2187 Create a simple XY plot in the given canvas - the data are shown
2188 as a line through the data points. The tag can be used to manip‐
2189 ulate the appearance.
2190
2191 widget canvas
2192 - Canvas widget to use
2193
2194 list xdata
2195 - Series of independent data
2196
2197 list ydata
2198 - Series of dependent data
2199
2200 string tag
2201 - Tag to give to the plotted data (defaults to xyplot)
2202
2203
2204 ::math::statistics::plot-tdata canvas tdata tag
2205 Create a simple XY plot in the given canvas - the data are shown
2206 as a collection of dots. The horizontal coordinate is equal to
2207 the index. The tag can be used to manipulate the appearance.
2208 This type of presentation is suitable for autocorrelation func‐
2209 tions for instance or for inspecting the time-dependent behav‐
2210 iour.
2211
2212 widget canvas
2213 - Canvas widget to use
2214
2215 list tdata
2216 - Series of dependent data
2217
2218 string tag
2219 - Tag to give to the plotted data (defaults to xyplot)
2220
2221
2222 ::math::statistics::plot-tline canvas tdata tag
2223 Create a simple XY plot in the given canvas - the data are shown
2224 as a line. See plot-tdata for an explanation.
2225
2226 widget canvas
2227 - Canvas widget to use
2228
2229 list tdata
2230 - Series of dependent data
2231
2232 string tag
2233 - Tag to give to the plotted data (defaults to xyplot)
2234
2235
2236 ::math::statistics::plot-histogram canvas counts limits tag
2237 Create a simple histogram in the given canvas
2238
2239 widget canvas
2240 - Canvas widget to use
2241
2242 list counts
2243 - Series of bucket counts
2244
2245 list limits
2246 - Series of upper limits for the buckets
2247
2248 string tag
2249 - Tag to give to the plotted data (defaults to xyplot)
2250
2251
2253 The following procedures are yet to be implemented:
2254
2255 • F-test-stdev
2256
2257 • interval-mean-stdev
2258
2259 • histogram-normal
2260
2261 • histogram-exponential
2262
2263 • test-histogram
2264
2265 • test-corr
2266
2267 • quantiles-*
2268
2269 • fourier-coeffs
2270
2271 • fourier-residuals
2272
2273 • onepar-function-fit
2274
2275 • onepar-function-residuals
2276
2277 • plot-linear-model
2278
2279 • subdivide
2280
2282 The code below is a small example of how you can examine a set of data:
2283
2284 # Simple example:
2285 # - Generate data (as a cheap way of getting some)
2286 # - Perform statistical analysis to describe the data
2287 #
2288 package require math::statistics
2289
2290 #
2291 # Two auxiliary procs
2292 #
2293 proc pause {time} {
2294 set wait 0
2295 after [expr {$time*1000}] {set ::wait 1}
2296 vwait wait
2297 }
2298
2299 proc print-histogram {counts limits} {
2300 foreach count $counts limit $limits {
2301 if { $limit != {} } {
2302 puts [format "<%12.4g\t%d" $limit $count]
2303 set prev_limit $limit
2304 } else {
2305 puts [format ">%12.4g\t%d" $prev_limit $count]
2306 }
2307 }
2308 }
2309
2310 #
2311 # Our source of arbitrary data
2312 #
2313 proc generateData { data1 data2 } {
2314 upvar 1 $data1 _data1
2315 upvar 1 $data2 _data2
2316
2317 set d1 0.0
2318 set d2 0.0
2319 for { set i 0 } { $i < 100 } { incr i } {
2320 set d1 [expr {10.0-2.0*cos(2.0*3.1415926*$i/24.0)+3.5*rand()}]
2321 set d2 [expr {0.7*$d2+0.3*$d1+0.7*rand()}]
2322 lappend _data1 $d1
2323 lappend _data2 $d2
2324 }
2325 return {}
2326 }
2327
2328 #
2329 # The analysis session
2330 #
2331 package require Tk
2332 console show
2333 canvas .plot1
2334 canvas .plot2
2335 pack .plot1 .plot2 -fill both -side top
2336
2337 generateData data1 data2
2338
2339 puts "Basic statistics:"
2340 set b1 [::math::statistics::basic-stats $data1]
2341 set b2 [::math::statistics::basic-stats $data2]
2342 foreach label {mean min max number stdev var} v1 $b1 v2 $b2 {
2343 puts "$label\t$v1\t$v2"
2344 }
2345 puts "Plot the data as function of \"time\" and against each other"
2346 ::math::statistics::plot-scale .plot1 0 100 0 20
2347 ::math::statistics::plot-scale .plot2 0 20 0 20
2348 ::math::statistics::plot-tline .plot1 $data1
2349 ::math::statistics::plot-tline .plot1 $data2
2350 ::math::statistics::plot-xydata .plot2 $data1 $data2
2351
2352 puts "Correlation coefficient:"
2353 puts [::math::statistics::corr $data1 $data2]
2354
2355 pause 2
2356 puts "Plot histograms"
2357 .plot2 delete all
2358 ::math::statistics::plot-scale .plot2 0 20 0 100
2359 set limits [::math::statistics::minmax-histogram-limits 7 16]
2360 set histogram_data [::math::statistics::histogram $limits $data1]
2361 ::math::statistics::plot-histogram .plot2 $histogram_data $limits
2362
2363 puts "First series:"
2364 print-histogram $histogram_data $limits
2365
2366 pause 2
2367 set limits [::math::statistics::minmax-histogram-limits 0 15 10]
2368 set histogram_data [::math::statistics::histogram $limits $data2]
2369 ::math::statistics::plot-histogram .plot2 $histogram_data $limits d2
2370 .plot2 itemconfigure d2 -fill red
2371
2372 puts "Second series:"
2373 print-histogram $histogram_data $limits
2374
2375 puts "Autocorrelation function:"
2376 set autoc [::math::statistics::autocorr $data1]
2377 puts [::math::statistics::map $autoc {[format "%.2f" $x]}]
2378 puts "Cross-correlation function:"
2379 set crossc [::math::statistics::crosscorr $data1 $data2]
2380 puts [::math::statistics::map $crossc {[format "%.2f" $x]}]
2381
2382 ::math::statistics::plot-scale .plot1 0 100 -1 4
2383 ::math::statistics::plot-tline .plot1 $autoc "autoc"
2384 ::math::statistics::plot-tline .plot1 $crossc "crossc"
2385 .plot1 itemconfigure autoc -fill green
2386 .plot1 itemconfigure crossc -fill yellow
2387
2388 puts "Quantiles: 0.1, 0.2, 0.5, 0.8, 0.9"
2389 puts "First: [::math::statistics::quantiles $data1 {0.1 0.2 0.5 0.8 0.9}]"
2390 puts "Second: [::math::statistics::quantiles $data2 {0.1 0.2 0.5 0.8 0.9}]"
2391
2392
2393 If you run this example, then the following should be clear:
2394
2395 • There is a strong correlation between two time series, as dis‐
2396 played by the raw data and especially by the correlation func‐
2397 tions.
2398
2399 • Both time series show a significant periodic component
2400
2401 • The histograms are not very useful in identifying the nature of
2402 the time series - they do not show the periodic nature.
2403
2405 This document, and the package it describes, will undoubtedly contain
2406 bugs and other problems. Please report such in the category math ::
2407 statistics of the Tcllib Trackers [http://core.tcl.tk/tcllib/re‐
2408 portlist]. Please also report any ideas for enhancements you may have
2409 for either package and/or documentation.
2410
2411 When proposing code changes, please provide unified diffs, i.e the out‐
2412 put of diff -u.
2413
2414 Note further that attachments are strongly preferred over inlined
2415 patches. Attachments can be made by going to the Edit form of the
2416 ticket immediately after its creation, and then using the left-most
2417 button in the secondary navigation bar.
2418
2420 data analysis, mathematics, statistics
2421
2423 Mathematics
2424
2425
2426
2427tcllib 1 math::statistics(n)