1math::statistics(n) Tcl Math Library math::statistics(n)
2
3
4
5______________________________________________________________________________
6
8 math::statistics - Basic statistical functions and procedures
9
11 package require Tcl 8.4
12
13 package require math::statistics 1
14
15 ::math::statistics::mean data
16
17 ::math::statistics::min data
18
19 ::math::statistics::max data
20
21 ::math::statistics::number data
22
23 ::math::statistics::stdev data
24
25 ::math::statistics::var data
26
27 ::math::statistics::pstdev data
28
29 ::math::statistics::pvar data
30
31 ::math::statistics::median data
32
33 ::math::statistics::basic-stats data
34
35 ::math::statistics::histogram limits values ?weights?
36
37 ::math::statistics::histogram-alt limits values ?weights?
38
39 ::math::statistics::corr data1 data2
40
41 ::math::statistics::interval-mean-stdev data confidence
42
43 ::math::statistics::t-test-mean data est_mean est_stdev alpha
44
45 ::math::statistics::test-normal data significance
46
47 ::math::statistics::lillieforsFit data
48
49 ::math::statistics::test-Duckworth list1 list2 significance
50
51 ::math::statistics::test-anova-F alpha args
52
53 ::math::statistics::test-Tukey-range alpha args
54
55 ::math::statistics::test-Dunnett alpha control args
56
57 ::math::statistics::quantiles data confidence
58
59 ::math::statistics::quantiles limits counts confidence
60
61 ::math::statistics::autocorr data
62
63 ::math::statistics::crosscorr data1 data2
64
65 ::math::statistics::mean-histogram-limits mean stdev number
66
67 ::math::statistics::minmax-histogram-limits min max number
68
69 ::math::statistics::linear-model xdata ydata intercept
70
71 ::math::statistics::linear-residuals xdata ydata intercept
72
73 ::math::statistics::test-2x2 n11 n21 n12 n22
74
75 ::math::statistics::print-2x2 n11 n21 n12 n22
76
77 ::math::statistics::control-xbar data ?nsamples?
78
79 ::math::statistics::control-Rchart data ?nsamples?
80
81 ::math::statistics::test-xbar control data
82
83 ::math::statistics::test-Rchart control data
84
85 ::math::statistics::test-Kruskal-Wallis confidence args
86
87 ::math::statistics::analyse-Kruskal-Wallis args
88
89 ::math::statistics::group-rank args
90
91 ::math::statistics::test-Wilcoxon sample_a sample_b
92
93 ::math::statistics::spearman-rank sample_a sample_b
94
95 ::math::statistics::spearman-rank-extended sample_a sample_b
96
97 ::math::statistics::kernel-density data opt -option value ...
98
99 ::math::statistics::tstat dof ?alpha?
100
101 ::math::statistics::mv-wls wt1 weights_and_values
102
103 ::math::statistics::mv-ols values
104
105 ::math::statistics::pdf-normal mean stdev value
106
107 ::math::statistics::pdf-lognormal mean stdev value
108
109 ::math::statistics::pdf-exponential mean value
110
111 ::math::statistics::pdf-uniform xmin xmax value
112
113 ::math::statistics::pdf-gamma alpha beta value
114
115 ::math::statistics::pdf-poisson mu k
116
117 ::math::statistics::pdf-chisquare df value
118
119 ::math::statistics::pdf-student-t df value
120
121 ::math::statistics::pdf-gamma a b value
122
123 ::math::statistics::pdf-beta a b value
124
125 ::math::statistics::pdf-weibull scale shape value
126
127 ::math::statistics::pdf-gumbel location scale value
128
129 ::math::statistics::pdf-pareto scale shape value
130
131 ::math::statistics::pdf-cauchy location scale value
132
133 ::math::statistics::cdf-normal mean stdev value
134
135 ::math::statistics::cdf-lognormal mean stdev value
136
137 ::math::statistics::cdf-exponential mean value
138
139 ::math::statistics::cdf-uniform xmin xmax value
140
141 ::math::statistics::cdf-students-t degrees value
142
143 ::math::statistics::cdf-gamma alpha beta value
144
145 ::math::statistics::cdf-poisson mu k
146
147 ::math::statistics::cdf-beta a b value
148
149 ::math::statistics::cdf-weibull scale shape value
150
151 ::math::statistics::cdf-gumbel location scale value
152
153 ::math::statistics::cdf-pareto scale shape value
154
155 ::math::statistics::cdf-cauchy location scale value
156
157 ::math::statistics::cdf-F nf1 nf2 value
158
159 ::math::statistics::empirical-distribution values
160
161 ::math::statistics::random-normal mean stdev number
162
163 ::math::statistics::random-lognormal mean stdev number
164
165 ::math::statistics::random-exponential mean number
166
167 ::math::statistics::random-uniform xmin xmax number
168
169 ::math::statistics::random-gamma alpha beta number
170
171 ::math::statistics::random-poisson mu number
172
173 ::math::statistics::random-chisquare df number
174
175 ::math::statistics::random-student-t df number
176
177 ::math::statistics::random-beta a b number
178
179 ::math::statistics::random-weibull scale shape number
180
181 ::math::statistics::random-gumbel location scale number
182
183 ::math::statistics::random-pareto scale shape number
184
185 ::math::statistics::random-cauchy location scale number
186
187 ::math::statistics::histogram-uniform xmin xmax limits number
188
189 ::math::statistics::incompleteGamma x p ?tol?
190
191 ::math::statistics::incompleteBeta a b x ?tol?
192
193 ::math::statistics::estimate-pareto values
194
195 ::math::statistics::filter varname data expression
196
197 ::math::statistics::map varname data expression
198
199 ::math::statistics::samplescount varname list expression
200
201 ::math::statistics::subdivide
202
203 ::math::statistics::plot-scale canvas xmin xmax ymin ymax
204
205 ::math::statistics::plot-xydata canvas xdata ydata tag
206
207 ::math::statistics::plot-xyline canvas xdata ydata tag
208
209 ::math::statistics::plot-tdata canvas tdata tag
210
211 ::math::statistics::plot-tline canvas tdata tag
212
213 ::math::statistics::plot-histogram canvas counts limits tag
214
215______________________________________________________________________________
216
218 The math::statistics package contains functions and procedures for
219 basic statistical data analysis, such as:
220
221 · Descriptive statistical parameters (mean, minimum, maximum,
222 standard deviation)
223
224 · Estimates of the distribution in the form of histograms and
225 quantiles
226
227 · Basic testing of hypotheses
228
229 · Probability and cumulative density functions
230
231 It is meant to help in developing data analysis applications or doing
232 ad hoc data analysis, it is not in itself a full application, nor is it
233 intended to rival with full (non-)commercial statistical packages.
234
235 The purpose of this document is to describe the implemented procedures
236 and provide some examples of their usage. As there is ample literature
237 on the algorithms involved, we refer to relevant text books for more
238 explanations. The package contains a fairly large number of public
239 procedures. They can be distinguished in three sets: general proce‐
240 dures, procedures that deal with specific statistical distributions,
241 list procedures to select or transform data and simple plotting proce‐
242 dures (these require Tk). Note: The data that need to be analyzed are
243 always contained in a simple list. Missing values are represented as
244 empty list elements. Note: With version 1.0.1 a mistake in the procs
245 pdf-lognormal, cdf-lognormal and random-lognormal has been corrected.
246 In previous versions the argument for the standard deviation was actu‐
247 ally used as if it was the variance.
248
250 The general statistical procedures are:
251
252 ::math::statistics::mean data
253 Determine the mean value of the given list of data.
254
255 list data
256 - List of data
257
258
259 ::math::statistics::min data
260 Determine the minimum value of the given list of data.
261
262 list data
263 - List of data
264
265
266 ::math::statistics::max data
267 Determine the maximum value of the given list of data.
268
269 list data
270 - List of data
271
272
273 ::math::statistics::number data
274 Determine the number of non-missing data in the given list
275
276 list data
277 - List of data
278
279
280 ::math::statistics::stdev data
281 Determine the sample standard deviation of the data in the given
282 list
283
284 list data
285 - List of data
286
287
288 ::math::statistics::var data
289 Determine the sample variance of the data in the given list
290
291 list data
292 - List of data
293
294
295 ::math::statistics::pstdev data
296 Determine the population standard deviation of the data in the
297 given list
298
299 list data
300 - List of data
301
302
303 ::math::statistics::pvar data
304 Determine the population variance of the data in the given list
305
306 list data
307 - List of data
308
309
310 ::math::statistics::median data
311 Determine the median of the data in the given list (Note that
312 this requires sorting the data, which may be a costly operation)
313
314 list data
315 - List of data
316
317
318 ::math::statistics::basic-stats data
319 Determine a list of all the descriptive parameters: mean, mini‐
320 mum, maximum, number of data, sample standard deviation, sample
321 variance, population standard deviation and population variance.
322
323 (This routine is called whenever either or all of the basic sta‐
324 tistical parameters are required. Hence all calculations are
325 done and the relevant values are returned.)
326
327 list data
328 - List of data
329
330
331 ::math::statistics::histogram limits values ?weights?
332 Determine histogram information for the given list of data.
333 Returns a list consisting of the number of values that fall into
334 each interval. (The first interval consists of all values lower
335 than the first limit, the last interval consists of all values
336 greater than the last limit. There is one more interval than
337 there are limits.)
338
339 Optionally, you can use weights to influence the histogram.
340
341 list limits
342 - List of upper limits (in ascending order) for the
343 intervals of the histogram.
344
345 list values
346 - List of data
347
348 list weights
349 - List of weights, one weight per value
350
351
352 ::math::statistics::histogram-alt limits values ?weights?
353 Alternative implementation of the histogram procedure: the open
354 end of the intervals is at the lower bound instead of the upper
355 bound.
356
357 list limits
358 - List of upper limits (in ascending order) for the
359 intervals of the histogram.
360
361 list values
362 - List of data
363
364 list weights
365 - List of weights, one weight per value
366
367
368 ::math::statistics::corr data1 data2
369 Determine the correlation coefficient between two sets of data.
370
371 list data1
372 - First list of data
373
374 list data2
375 - Second list of data
376
377
378 ::math::statistics::interval-mean-stdev data confidence
379 Return the interval containing the mean value and one containing
380 the standard deviation with a certain level of confidence
381 (assuming a normal distribution)
382
383 list data
384 - List of raw data values (small sample)
385
386 float confidence
387 - Confidence level (0.95 or 0.99 for instance)
388
389
390 ::math::statistics::t-test-mean data est_mean est_stdev alpha
391 Test whether the mean value of a sample is in accordance with
392 the estimated normal distribution with a certain probability.
393 Returns 1 if the test succeeds or 0 if the mean is unlikely to
394 fit the given distribution.
395
396 list data
397 - List of raw data values (small sample)
398
399 float est_mean
400 - Estimated mean of the distribution
401
402 float est_stdev
403 - Estimated stdev of the distribution
404
405 float alpha
406 - Probability level (0.95 or 0.99 for instance)
407
408
409 ::math::statistics::test-normal data significance
410 Test whether the given data follow a normal distribution with a
411 certain level of significance. Returns 1 if the data are nor‐
412 mally distributed within the level of significance, returns 0 if
413 not. The underlying test is the Lilliefors test. Smaller values
414 of the significance mean a stricter testing.
415
416 list data
417 - List of raw data values
418
419 float significance
420 - Significance level (one of 0.01, 0.05, 0.10, 0.15 or
421 0.20). For compatibility reasons the values "1-signifi‐
422 cance", 0.80, 0.85, 0.90, 0.95 or 0.99 are also accepted.
423
424 Compatibility issue: the original implementation and documentation used
425 the term "confidence" and used a value 1-significance (see ticket
426 2812473fff). This has been corrected as of version 0.9.3.
427
428
429 ::math::statistics::lillieforsFit data
430 Returns the goodness of fit to a normal distribution according
431 to Lilliefors. The higher the number, the more likely the data
432 are indeed normally distributed. The test requires at least five
433 data points.
434
435 list data
436 - List of raw data values
437
438
439 ::math::statistics::test-Duckworth list1 list2 significance
440 Determine if two data sets have the same median according to the
441 Tukey-Duckworth test. The procedure returns 0 if the medians
442 are unequal, 1 if they are equal, -1 if the test can not be con‐
443 ducted (the smallest value must be in a different set than the
444 greatest value). # # Arguments: # list1 Values in
445 the first data set # list2 Values in the second
446 data set # significance Significance level (either 0.05,
447 0.01 or 0.001) # # Returns: Test whether the given data follow a
448 normal distribution with a certain level of significance.
449 Returns 1 if the data are normally distributed within the level
450 of significance, returns 0 if not. The underlying test is the
451 Lilliefors test. Smaller values of the significance mean a
452 stricter testing.
453
454 list list1
455 - First list of data
456
457 list list2
458 - Second list of data
459
460 float significance
461 - Significance level (either 0.05, 0.01 or 0.001)
462
463
464 ::math::statistics::test-anova-F alpha args
465 Determine if two or more groups with normally distributed data
466 have the same means. The procedure returns 0 if the means are
467 likely unequal, 1 if they are. This is a one-way ANOVA test. The
468 groups may also be stored in a nested list: The procedure
469 returns a list of the comparison results for each pair of
470 groups. Each element of this list contains: the index of the
471 first group and that of the second group, whether the means are
472 likely to be different (1) or not (0) and the confidence inter‐
473 val the conclusion is based on. The groups may also be stored in
474 a nested list:
475
476
477 test-anova-F 0.05 $A $B $C
478 #
479 # Or equivalently:
480 #
481 test-anova-F 0.05 [list $A $B $C]
482
483
484 float alpha
485 - Significance level
486
487 list args
488 - Two or more groups of data to be checked
489
490
491 ::math::statistics::test-Tukey-range alpha args
492 Determine if two or more groups with normally distributed data
493 have the same means, using Tukey's range test. It is complemen‐
494 tary to the ANOVA test. The procedure returns a list of the
495 comparison results for each pair of groups. Each element of this
496 list contains: the index of the first group and that of the sec‐
497 ond group, whether the means are likely to be different (1) or
498 not (0) and the confidence interval the conclusion is based on.
499 The groups may also be stored in a nested list, just as with the
500 ANOVA test.
501
502 float alpha
503 - Significance level - either 0.05 or 0.01
504
505 list args
506 - Two or more groups of data to be checked
507
508
509 ::math::statistics::test-Dunnett alpha control args
510 Determine if one or more groups with normally distributed data
511 have the same means as the group of control data, using Dun‐
512 nett's test. It is complementary to the ANOVA test. The proce‐
513 dure returns a list of the comparison results for each group
514 with the control group. Each element of this list contains:
515 whether the means are likely to be different (1) or not (0) and
516 the confidence interval the conclusion is based on. The groups
517 may also be stored in a nested list, just as with the ANOVA
518 test.
519
520 Note: some care is required if there is only one group to com‐
521 pare the control with:
522
523
524 test-Dunnett-F 0.05 $control [list $A]
525
526
527 Otherwise the group A is split up into groups of one element -
528 this is due to an ambiguity.
529
530 float alpha
531 - Significance level - either 0.05 or 0.01
532
533 list args
534 - One or more groups of data to be checked
535
536
537 ::math::statistics::quantiles data confidence
538 Return the quantiles for a given set of data
539
540
541 list data
542 - List of raw data values
543
544
545 float confidence
546 - Confidence level (0.95 or 0.99 for instance) or a list
547 of confidence levels.
548
549
550
551 ::math::statistics::quantiles limits counts confidence
552 Return the quantiles based on histogram information (alternative
553 to the call with two arguments)
554
555 list limits
556 - List of upper limits from histogram
557
558 list counts
559 - List of counts for for each interval in histogram
560
561 float confidence
562 - Confidence level (0.95 or 0.99 for instance) or a list
563 of confidence levels.
564
565
566 ::math::statistics::autocorr data
567 Return the autocorrelation function as a list of values (assum‐
568 ing equidistance between samples, about 1/2 of the number of raw
569 data)
570
571 The correlation is determined in such a way that the first value
572 is always 1 and all others are equal to or smaller than 1. The
573 number of values involved will diminish as the "time" (the index
574 in the list of returned values) increases
575
576 list data
577 - Raw data for which the autocorrelation must be deter‐
578 mined
579
580
581 ::math::statistics::crosscorr data1 data2
582 Return the cross-correlation function as a list of values
583 (assuming equidistance between samples, about 1/2 of the number
584 of raw data)
585
586 The correlation is determined in such a way that the values can
587 never exceed 1 in magnitude. The number of values involved will
588 diminish as the "time" (the index in the list of returned val‐
589 ues) increases.
590
591 list data1
592 - First list of data
593
594 list data2
595 - Second list of data
596
597
598 ::math::statistics::mean-histogram-limits mean stdev number
599 Determine reasonable limits based on mean and standard deviation
600 for a histogram Convenience function - the result is suitable
601 for the histogram function.
602
603 float mean
604 - Mean of the data
605
606 float stdev
607 - Standard deviation
608
609 int number
610 - Number of limits to generate (defaults to 8)
611
612
613 ::math::statistics::minmax-histogram-limits min max number
614 Determine reasonable limits based on a minimum and maximum for a
615 histogram
616
617 Convenience function - the result is suitable for the histogram
618 function.
619
620 float min
621 - Expected minimum
622
623 float max
624 - Expected maximum
625
626 int number
627 - Number of limits to generate (defaults to 8)
628
629
630 ::math::statistics::linear-model xdata ydata intercept
631 Determine the coefficients for a linear regression between two
632 series of data (the model: Y = A + B*X). Returns a list of
633 parameters describing the fit
634
635 list xdata
636 - List of independent data
637
638 list ydata
639 - List of dependent data to be fitted
640
641 boolean intercept
642 - (Optional) compute the intercept (1, default) or fit to
643 a line through the origin (0)
644
645 The result consists of the following list:
646
647 · (Estimate of) Intercept A
648
649 · (Estimate of) Slope B
650
651 · Standard deviation of Y relative to fit
652
653 · Correlation coefficient R2
654
655 · Number of degrees of freedom df
656
657 · Standard error of the intercept A
658
659 · Significance level of A
660
661 · Standard error of the slope B
662
663 · Significance level of B
664
665
666 ::math::statistics::linear-residuals xdata ydata intercept
667 Determine the difference between actual data and predicted from
668 the linear model.
669
670 Returns a list of the differences between the actual data and
671 the predicted values.
672
673 list xdata
674 - List of independent data
675
676 list ydata
677 - List of dependent data to be fitted
678
679 boolean intercept
680 - (Optional) compute the intercept (1, default) or fit to
681 a line through the origin (0)
682
683
684 ::math::statistics::test-2x2 n11 n21 n12 n22
685 Determine if two set of samples, each from a binomial distribu‐
686 tion, differ significantly or not (implying a different parame‐
687 ter).
688
689 Returns the "chi-square" value, which can be used to the deter‐
690 mine the significance.
691
692 int n11
693 - Number of outcomes with the first value from the first
694 sample.
695
696 int n21
697 - Number of outcomes with the first value from the second
698 sample.
699
700 int n12
701 - Number of outcomes with the second value from the first
702 sample.
703
704 int n22
705 - Number of outcomes with the second value from the sec‐
706 ond sample.
707
708
709 ::math::statistics::print-2x2 n11 n21 n12 n22
710 Determine if two set of samples, each from a binomial distribu‐
711 tion, differ significantly or not (implying a different parame‐
712 ter).
713
714 Returns a short report, useful in an interactive session.
715
716 int n11
717 - Number of outcomes with the first value from the first
718 sample.
719
720 int n21
721 - Number of outcomes with the first value from the second
722 sample.
723
724 int n12
725 - Number of outcomes with the second value from the first
726 sample.
727
728 int n22
729 - Number of outcomes with the second value from the sec‐
730 ond sample.
731
732
733 ::math::statistics::control-xbar data ?nsamples?
734 Determine the control limits for an xbar chart. The number of
735 data in each subsample defaults to 4. At least 20 subsamples are
736 required.
737
738 Returns the mean, the lower limit, the upper limit and the num‐
739 ber of data per subsample.
740
741 list data
742 - List of observed data
743
744 int nsamples
745 - Number of data per subsample
746
747
748 ::math::statistics::control-Rchart data ?nsamples?
749 Determine the control limits for an R chart. The number of data
750 in each subsample (nsamples) defaults to 4. At least 20 subsam‐
751 ples are required.
752
753 Returns the mean range, the lower limit, the upper limit and the
754 number of data per subsample.
755
756 list data
757 - List of observed data
758
759 int nsamples
760 - Number of data per subsample
761
762
763 ::math::statistics::test-xbar control data
764 Determine if the data exceed the control limits for the xbar
765 chart.
766
767 Returns a list of subsamples (their indices) that indeed violate
768 the limits.
769
770 list control
771 - Control limits as returned by the "control-xbar" proce‐
772 dure
773
774 list data
775 - List of observed data
776
777
778 ::math::statistics::test-Rchart control data
779 Determine if the data exceed the control limits for the R chart.
780
781 Returns a list of subsamples (their indices) that indeed violate
782 the limits.
783
784 list control
785 - Control limits as returned by the "control-Rchart" pro‐
786 cedure
787
788 list data
789 - List of observed data
790
791
792 ::math::statistics::test-Kruskal-Wallis confidence args
793 Check if the population medians of two or more groups are equal
794 with a given confidence level, using the Kruskal-Wallis test.
795
796 float confidence
797 - Confidence level to be used (0-1)
798
799 list args
800 - Two or more lists of data
801
802
803 ::math::statistics::analyse-Kruskal-Wallis args
804 Compute the statistical parameters for the Kruskal-Wallis test.
805 Returns the Kruskal-Wallis statistic and the probability that
806 that value would occur assuming the medians of the populations
807 are equal.
808
809 list args
810 - Two or more lists of data
811
812
813 ::math::statistics::group-rank args
814 Rank the groups of data with respect to the complete set.
815 Returns a list consisting of the group ID, the value and the
816 rank (possibly a rational number, in case of ties) for each data
817 item.
818
819 list args
820 - Two or more lists of data
821
822
823 ::math::statistics::test-Wilcoxon sample_a sample_b
824 Compute the Wilcoxon test statistic to determine if two samples
825 have the same median or not. (The statistic can be regarded as
826 standard normal, if the sample sizes are both larger than 10.
827 Returns the value of this statistic.
828
829 list sample_a
830 - List of data comprising the first sample
831
832 list sample_b
833 - List of data comprising the second sample
834
835
836 ::math::statistics::spearman-rank sample_a sample_b
837 Return the Spearman rank correlation as an alternative to the
838 ordinary (Pearson's) correlation coefficient. The two samples
839 should have the same number of data.
840
841 list sample_a
842 - First list of data
843
844 list sample_b
845 - Second list of data
846
847
848 ::math::statistics::spearman-rank-extended sample_a sample_b
849 Return the Spearman rank correlation as an alternative to the
850 ordinary (Pearson's) correlation coefficient as well as addi‐
851 tional data. The two samples should have the same number of
852 data. The procedure returns the correlation coefficient, the
853 number of data pairs used and the z-score, an approximately
854 standard normal statistic, indicating the significance of the
855 correlation.
856
857 list sample_a
858 - First list of data
859
860 list sample_b
861 - Second list of data
862
863 ::math::statistics::kernel-density data opt -option value ...
864 ] Return the density function based on kernel density estima‐
865 tion. The procedure is controlled by a small set of options,
866 each of which is given a reasonable default.
867
868 The return value consists of three lists: the centres of the
869 bins, the associated probability density and a list of computa‐
870 tional parameters (begin and end of the interval, mean and stan‐
871 dard deviation and the used bandwidth). The computational param‐
872 eters can be used for further analysis.
873
874 list data
875 - The data to be examined
876
877 list args
878 - Option-value pairs:
879
880 -weights weights
881 Per data point the weight (default: 1 for all
882 data)
883
884 -bandwidth value
885 Bandwidth to be used for the estimation (default:
886 determined from standard deviation)
887
888 -number value
889 Number of bins to be returned (default: 100)
890
891 -interval {begin end}
892 Begin and end of the interval for which the den‐
893 sity is returned (default: mean +/- 3*standard
894 deviation)
895
896 -kernel function
897 Kernel to be used (One of: gaussian, cosine,
898 epanechnikov, uniform, triangular, biweight,
899 logistic; default: gaussian)
900
902 Besides the linear regression with a single independent variable, the
903 statistics package provides two procedures for doing ordinary least
904 squares (OLS) and weighted least squares (WLS) linear regression with
905 several variables. They were written by Eric Kemp-Benedict.
906
907 In addition to these two, it provides a procedure (tstat) for calculat‐
908 ing the value of the t-statistic for the specified number of degrees of
909 freedom that is required to demonstrate a given level of significance.
910
911 Note: These procedures depend on the math::linearalgebra package.
912
913 Description of the procedures
914
915 ::math::statistics::tstat dof ?alpha?
916 Returns the value of the t-distribution t* satisfying
917
918
919 P(t*) = 1 - alpha/2
920 P(-t*) = alpha/2
921
922
923 for the number of degrees of freedom dof.
924
925 Given a sample of normally-distributed data x, with an estimate
926 xbar for the mean and sbar for the standard deviation, the alpha
927 confidence interval for the estimate of the mean can be calcu‐
928 lated as
929
930
931 ( xbar - t* sbar , xbar + t* sbar)
932
933
934 The return values from this procedure can be compared to an
935 estimated t-statistic to determine whether the estimated value
936 of a parameter is significantly different from zero at the given
937 confidence level.
938
939 int dof
940 Number of degrees of freedom
941
942 float alpha
943 Confidence level of the t-distribution. Defaults to 0.05.
944
945
946 ::math::statistics::mv-wls wt1 weights_and_values
947 Carries out a weighted least squares linear regression for the
948 data points provided, with weights assigned to each point.
949
950 The linear model is of the form
951
952
953 y = b0 + b1 * x1 + b2 * x2 ... + bN * xN + error
954
955
956 and each point satisfies
957
958
959 yi = b0 + b1 * xi1 + b2 * xi2 + ... + bN * xiN + Residual_i
960
961
962 The procedure returns a list with the following elements:
963
964 · The r-squared statistic
965
966 · The adjusted r-squared statistic
967
968 · A list containing the estimated coefficients b1, ... bN,
969 b0 (The constant b0 comes last in the list.)
970
971 · A list containing the standard errors of the coefficients
972
973 · A list containing the 95% confidence bounds of the coef‐
974 ficients, with each set of bounds returned as a list with
975 two values
976
977 Arguments:
978
979 list weights_and_values
980 A list consisting of: the weight for the first observa‐
981 tion, the data for the first observation (as a sublist),
982 the weight for the second observation (as a sublist) and
983 so on. The sublists of data are organised as lists of the
984 value of the dependent variable y and the independent
985 variables x1, x2 to xN.
986
987
988 ::math::statistics::mv-ols values
989 Carries out an ordinary least squares linear regression for the
990 data points provided.
991
992 This procedure simply calls ::mvlinreg::wls with the weights set
993 to 1.0, and returns the same information.
994
995 Example of the use:
996
997
998 # Store the value of the unicode value for the "+/-" character
999 set pm "\u00B1"
1000
1001 # Provide some data
1002 set data {{ -.67 14.18 60.03 -7.5 }
1003 { 36.97 15.52 34.24 14.61 }
1004 {-29.57 21.85 83.36 -7. }
1005 {-16.9 11.79 51.67 -6.56 }
1006 { 14.09 16.24 36.97 -12.84}
1007 { 31.52 20.93 45.99 -25.4 }
1008 { 24.05 20.69 50.27 17.27}
1009 { 22.23 16.91 45.07 -4.3 }
1010 { 40.79 20.49 38.92 -.73 }
1011 {-10.35 17.24 58.77 18.78}}
1012
1013 # Call the ols routine
1014 set results [::math::statistics::mv-ols $data]
1015
1016 # Pretty-print the results
1017 puts "R-squared: [lindex $results 0]"
1018 puts "Adj R-squared: [lindex $results 1]"
1019 puts "Coefficients $pm s.e. -- \[95% confidence interval\]:"
1020 foreach val [lindex $results 2] se [lindex $results 3] bounds [lindex $results 4] {
1021 set lb [lindex $bounds 0]
1022 set ub [lindex $bounds 1]
1023 puts " $val $pm $se -- \[$lb to $ub\]"
1024 }
1025
1026
1028 In the literature a large number of probability distributions can be
1029 found. The statistics package supports:
1030
1031 · The normal or Gaussian distribution as well as the log-normal
1032 distribution
1033
1034 · The uniform distribution - equal probability for all data within
1035 a given interval
1036
1037 · The exponential distribution - useful as a model for certain
1038 extreme-value distributions.
1039
1040 · The gamma distribution - based on the incomplete Gamma integral
1041
1042 · The beta distribution
1043
1044 · The chi-square distribution
1045
1046 · The student's T distribution
1047
1048 · The Poisson distribution
1049
1050 · The Pareto distribution
1051
1052 · The Gumbel distribution
1053
1054 · The Weibull distribution
1055
1056 · The Cauchy distribution
1057
1058 · The F distribution (only the cumulative density function)
1059
1060 · PM - binomial.
1061
1062 In principle for each distribution one has procedures for:
1063
1064 · The probability density (pdf-*)
1065
1066 · The cumulative density (cdf-*)
1067
1068 · Quantiles for the given distribution (quantiles-*)
1069
1070 · Histograms for the given distribution (histogram-*)
1071
1072 · List of random values with the given distribution (random-*)
1073
1074 The following procedures have been implemented:
1075
1076 ::math::statistics::pdf-normal mean stdev value
1077 Return the probability of a given value for a normal distribu‐
1078 tion with given mean and standard deviation.
1079
1080 float mean
1081 - Mean value of the distribution
1082
1083 float stdev
1084 - Standard deviation of the distribution
1085
1086 float value
1087 - Value for which the probability is required
1088
1089
1090 ::math::statistics::pdf-lognormal mean stdev value
1091 Return the probability of a given value for a log-normal distri‐
1092 bution with given mean and standard deviation.
1093
1094 float mean
1095 - Mean value of the distribution
1096
1097 float stdev
1098 - Standard deviation of the distribution
1099
1100 float value
1101 - Value for which the probability is required
1102
1103
1104 ::math::statistics::pdf-exponential mean value
1105 Return the probability of a given value for an exponential dis‐
1106 tribution with given mean.
1107
1108 float mean
1109 - Mean value of the distribution
1110
1111 float value
1112 - Value for which the probability is required
1113
1114
1115 ::math::statistics::pdf-uniform xmin xmax value
1116 Return the probability of a given value for a uniform distribu‐
1117 tion with given extremes.
1118
1119 float xmin
1120 - Minimum value of the distribution
1121
1122 float xmin
1123 - Maximum value of the distribution
1124
1125 float value
1126 - Value for which the probability is required
1127
1128
1129 ::math::statistics::pdf-gamma alpha beta value
1130 Return the probability of a given value for a Gamma distribution
1131 with given shape and rate parameters
1132
1133 float alpha
1134 - Shape parameter
1135
1136 float beta
1137 - Rate parameter
1138
1139 float value
1140 - Value for which the probability is required
1141
1142
1143 ::math::statistics::pdf-poisson mu k
1144 Return the probability of a given number of occurrences in the
1145 same interval (k) for a Poisson distribution with given mean
1146 (mu)
1147
1148 float mu
1149 - Mean number of occurrences
1150
1151 int k - Number of occurences
1152
1153
1154 ::math::statistics::pdf-chisquare df value
1155 Return the probability of a given value for a chi square distri‐
1156 bution with given degrees of freedom
1157
1158 float df
1159 - Degrees of freedom
1160
1161 float value
1162 - Value for which the probability is required
1163
1164
1165 ::math::statistics::pdf-student-t df value
1166 Return the probability of a given value for a Student's t dis‐
1167 tribution with given degrees of freedom
1168
1169 float df
1170 - Degrees of freedom
1171
1172 float value
1173 - Value for which the probability is required
1174
1175
1176 ::math::statistics::pdf-gamma a b value
1177 Return the probability of a given value for a Gamma distribution
1178 with given shape and rate parameters
1179
1180 float a
1181 - Shape parameter
1182
1183 float b
1184 - Rate parameter
1185
1186 float value
1187 - Value for which the probability is required
1188
1189
1190 ::math::statistics::pdf-beta a b value
1191 Return the probability of a given value for a Beta distribution
1192 with given shape parameters
1193
1194 float a
1195 - First shape parameter
1196
1197 float b
1198 - Second shape parameter
1199
1200 float value
1201 - Value for which the probability is required
1202
1203
1204 ::math::statistics::pdf-weibull scale shape value
1205 Return the probability of a given value for a Weibull distribu‐
1206 tion with given scale and shape parameters
1207
1208 float location
1209 - Scale parameter
1210
1211 float scale
1212 - Shape parameter
1213
1214 float value
1215 - Value for which the probability is required
1216
1217
1218 ::math::statistics::pdf-gumbel location scale value
1219 Return the probability of a given value for a Gumbel distribu‐
1220 tion with given location and shape parameters
1221
1222 float location
1223 - Location parameter
1224
1225 float scale
1226 - Shape parameter
1227
1228 float value
1229 - Value for which the probability is required
1230
1231
1232 ::math::statistics::pdf-pareto scale shape value
1233 Return the probability of a given value for a Pareto distribu‐
1234 tion with given scale and shape parameters
1235
1236 float scale
1237 - Scale parameter
1238
1239 float shape
1240 - Shape parameter
1241
1242 float value
1243 - Value for which the probability is required
1244
1245
1246 ::math::statistics::pdf-cauchy location scale value
1247 Return the probability of a given value for a Cauchy distribu‐
1248 tion with given location and shape parameters. Note that the
1249 Cauchy distribution has no finite higher-order moments.
1250
1251 float location
1252 - Location parameter
1253
1254 float scale
1255 - Shape parameter
1256
1257 float value
1258 - Value for which the probability is required
1259
1260
1261 ::math::statistics::cdf-normal mean stdev value
1262 Return the cumulative probability of a given value for a normal
1263 distribution with given mean and standard deviation, that is the
1264 probability for values up to the given one.
1265
1266 float mean
1267 - Mean value of the distribution
1268
1269 float stdev
1270 - Standard deviation of the distribution
1271
1272 float value
1273 - Value for which the probability is required
1274
1275
1276 ::math::statistics::cdf-lognormal mean stdev value
1277 Return the cumulative probability of a given value for a log-
1278 normal distribution with given mean and standard deviation, that
1279 is the probability for values up to the given one.
1280
1281 float mean
1282 - Mean value of the distribution
1283
1284 float stdev
1285 - Standard deviation of the distribution
1286
1287 float value
1288 - Value for which the probability is required
1289
1290
1291 ::math::statistics::cdf-exponential mean value
1292 Return the cumulative probability of a given value for an expo‐
1293 nential distribution with given mean.
1294
1295 float mean
1296 - Mean value of the distribution
1297
1298 float value
1299 - Value for which the probability is required
1300
1301
1302 ::math::statistics::cdf-uniform xmin xmax value
1303 Return the cumulative probability of a given value for a uniform
1304 distribution with given extremes.
1305
1306 float xmin
1307 - Minimum value of the distribution
1308
1309 float xmin
1310 - Maximum value of the distribution
1311
1312 float value
1313 - Value for which the probability is required
1314
1315
1316 ::math::statistics::cdf-students-t degrees value
1317 Return the cumulative probability of a given value for a Stu‐
1318 dent's t distribution with given number of degrees.
1319
1320 int degrees
1321 - Number of degrees of freedom
1322
1323 float value
1324 - Value for which the probability is required
1325
1326
1327 ::math::statistics::cdf-gamma alpha beta value
1328 Return the cumulative probability of a given value for a Gamma
1329 distribution with given shape and rate parameters.
1330
1331 float alpha
1332 - Shape parameter
1333
1334 float beta
1335 - Rate parameter
1336
1337 float value
1338 - Value for which the cumulative probability is required
1339
1340
1341 ::math::statistics::cdf-poisson mu k
1342 Return the cumulative probability of a given number of occur‐
1343 rences in the same interval (k) for a Poisson distribution with
1344 given mean (mu).
1345
1346 float mu
1347 - Mean number of occurrences
1348
1349 int k - Number of occurences
1350
1351
1352 ::math::statistics::cdf-beta a b value
1353 Return the cumulative probability of a given value for a Beta
1354 distribution with given shape parameters
1355
1356 float a
1357 - First shape parameter
1358
1359 float b
1360 - Second shape parameter
1361
1362 float value
1363 - Value for which the probability is required
1364
1365
1366 ::math::statistics::cdf-weibull scale shape value
1367 Return the cumulative probability of a given value for a Weibull
1368 distribution with given scale and shape parameters.
1369
1370 float scale
1371 - Scale parameter
1372
1373 float shape
1374 - Shape parameter
1375
1376 float value
1377 - Value for which the probability is required
1378
1379
1380 ::math::statistics::cdf-gumbel location scale value
1381 Return the cumulative probability of a given value for a Gumbel
1382 distribution with given location and scale parameters.
1383
1384 float location
1385 - Location parameter
1386
1387 float scale
1388 - Scale parameter
1389
1390 float value
1391 - Value for which the probability is required
1392
1393
1394 ::math::statistics::cdf-pareto scale shape value
1395 Return the cumulative probability of a given value for a Pareto
1396 distribution with given scale and shape parameters
1397
1398 float scale
1399 - Scale parameter
1400
1401 float shape
1402 - Shape parameter
1403
1404 float value
1405 - Value for which the probability is required
1406
1407
1408 ::math::statistics::cdf-cauchy location scale value
1409 Return the cumulative probability of a given value for a Cauchy
1410 distribution with given location and scale parameters.
1411
1412 float location
1413 - Location parameter
1414
1415 float scale
1416 - Scale parameter
1417
1418 float value
1419 - Value for which the probability is required
1420
1421
1422 ::math::statistics::cdf-F nf1 nf2 value
1423 Return the cumulative probability of a given value for an F dis‐
1424 tribution with nf1 and nf2 degrees of freedom.
1425
1426 float nf1
1427 - Degrees of freedom for the numerator
1428
1429 float nf2
1430 - Degrees of freedom for the denominator
1431
1432 float value
1433 - Value for which the probability is required
1434
1435
1436 ::math::statistics::empirical-distribution values
1437 Return a list of values and their empirical probability. The
1438 values are sorted in increasing order. (The implementation fol‐
1439 lows the description at the corresponding Wikipedia page)
1440
1441 list values
1442 - List of data to be examined
1443
1444
1445 ::math::statistics::random-normal mean stdev number
1446 Return a list of "number" random values satisfying a normal dis‐
1447 tribution with given mean and standard deviation.
1448
1449 float mean
1450 - Mean value of the distribution
1451
1452 float stdev
1453 - Standard deviation of the distribution
1454
1455 int number
1456 - Number of values to be returned
1457
1458
1459 ::math::statistics::random-lognormal mean stdev number
1460 Return a list of "number" random values satisfying a log-normal
1461 distribution with given mean and standard deviation.
1462
1463 float mean
1464 - Mean value of the distribution
1465
1466 float stdev
1467 - Standard deviation of the distribution
1468
1469 int number
1470 - Number of values to be returned
1471
1472
1473 ::math::statistics::random-exponential mean number
1474 Return a list of "number" random values satisfying an exponen‐
1475 tial distribution with given mean.
1476
1477 float mean
1478 - Mean value of the distribution
1479
1480 int number
1481 - Number of values to be returned
1482
1483
1484 ::math::statistics::random-uniform xmin xmax number
1485 Return a list of "number" random values satisfying a uniform
1486 distribution with given extremes.
1487
1488 float xmin
1489 - Minimum value of the distribution
1490
1491 float xmax
1492 - Maximum value of the distribution
1493
1494 int number
1495 - Number of values to be returned
1496
1497
1498 ::math::statistics::random-gamma alpha beta number
1499 Return a list of "number" random values satisfying a Gamma dis‐
1500 tribution with given shape and rate parameters.
1501
1502 float alpha
1503 - Shape parameter
1504
1505 float beta
1506 - Rate parameter
1507
1508 int number
1509 - Number of values to be returned
1510
1511
1512 ::math::statistics::random-poisson mu number
1513 Return a list of "number" random values satisfying a Poisson
1514 distribution with given mean.
1515
1516 float mu
1517 - Mean of the distribution
1518
1519 int number
1520 - Number of values to be returned
1521
1522
1523 ::math::statistics::random-chisquare df number
1524 Return a list of "number" random values satisfying a chi square
1525 distribution with given degrees of freedom.
1526
1527 float df
1528 - Degrees of freedom
1529
1530 int number
1531 - Number of values to be returned
1532
1533
1534 ::math::statistics::random-student-t df number
1535 Return a list of "number" random values satisfying a Student's t
1536 distribution with given degrees of freedom.
1537
1538 float df
1539 - Degrees of freedom
1540
1541 int number
1542 - Number of values to be returned
1543
1544
1545 ::math::statistics::random-beta a b number
1546 Return a list of "number" random values satisfying a Beta dis‐
1547 tribution with given shape parameters.
1548
1549 float a
1550 - First shape parameter
1551
1552 float b
1553 - Second shape parameter
1554
1555 int number
1556 - Number of values to be returned
1557
1558
1559 ::math::statistics::random-weibull scale shape number
1560 Return a list of "number" random values satisfying a Weibull
1561 distribution with given scale and shape parameters.
1562
1563 float scale
1564 - Scale parameter
1565
1566 float shape
1567 - Shape parameter
1568
1569 int number
1570 - Number of values to be returned
1571
1572
1573 ::math::statistics::random-gumbel location scale number
1574 Return a list of "number" random values satisfying a Gumbel dis‐
1575 tribution with given location and scale parameters.
1576
1577 float location
1578 - Location parameter
1579
1580 float scale
1581 - Scale parameter
1582
1583 int number
1584 - Number of values to be returned
1585
1586
1587 ::math::statistics::random-pareto scale shape number
1588 Return a list of "number" random values satisfying a Pareto dis‐
1589 tribution with given scale and shape parameters.
1590
1591 float scale
1592 - Scale parameter
1593
1594 float shape
1595 - Shape parameter
1596
1597 int number
1598 - Number of values to be returned
1599
1600
1601 ::math::statistics::random-cauchy location scale number
1602 Return a list of "number" random values satisfying a Cauchy dis‐
1603 tribution with given location and scale parameters.
1604
1605 float location
1606 - Location parameter
1607
1608 float scale
1609 - Scale parameter
1610
1611 int number
1612 - Number of values to be returned
1613
1614
1615 ::math::statistics::histogram-uniform xmin xmax limits number
1616 Return the expected histogram for a uniform distribution.
1617
1618 float xmin
1619 - Minimum value of the distribution
1620
1621 float xmax
1622 - Maximum value of the distribution
1623
1624 list limits
1625 - Upper limits for the buckets in the histogram
1626
1627 int number
1628 - Total number of "observations" in the histogram
1629
1630
1631 ::math::statistics::incompleteGamma x p ?tol?
1632 Evaluate the incomplete Gamma integral
1633
1634
1635 1 / x p-1
1636 P(p,x) = -------- | dt exp(-t) * t
1637 Gamma(p) / 0
1638
1639
1640 float x
1641 - Value of x (limit of the integral)
1642
1643 float p
1644 - Value of p in the integrand
1645
1646 float tol
1647 - Required tolerance (default: 1.0e-9)
1648
1649
1650 ::math::statistics::incompleteBeta a b x ?tol?
1651 Evaluate the incomplete Beta integral
1652
1653 float a
1654 - First shape parameter
1655
1656 float b
1657 - Second shape parameter
1658
1659 float x
1660 - Value of x (limit of the integral)
1661
1662 float tol
1663 - Required tolerance (default: 1.0e-9)
1664
1665
1666 ::math::statistics::estimate-pareto values
1667 Estimate the parameters for the Pareto distribution that comes
1668 closest to the given values. Returns the estimated scale and
1669 shape parameters, as well as the standard error for the shape
1670 parameter.
1671
1672 list values
1673 - List of values, assumed to be distributed according to
1674 a Pareto distribution
1675
1676
1677 TO DO: more function descriptions to be added
1678
1680 The data manipulation procedures act on lists or lists of lists:
1681
1682 ::math::statistics::filter varname data expression
1683 Return a list consisting of the data for which the logical
1684 expression is true (this command works analogously to the com‐
1685 mand foreach).
1686
1687 string varname
1688 - Name of the variable used in the expression
1689
1690 list data
1691 - List of data
1692
1693 string expression
1694 - Logical expression using the variable name
1695
1696
1697 ::math::statistics::map varname data expression
1698 Return a list consisting of the data that are transformed via
1699 the expression.
1700
1701 string varname
1702 - Name of the variable used in the expression
1703
1704 list data
1705 - List of data
1706
1707 string expression
1708 - Expression to be used to transform (map) the data
1709
1710
1711 ::math::statistics::samplescount varname list expression
1712 Return a list consisting of the counts of all data in the sub‐
1713 lists of the "list" argument for which the expression is true.
1714
1715 string varname
1716 - Name of the variable used in the expression
1717
1718 list data
1719 - List of sublists, each containing the data
1720
1721 string expression
1722 - Logical expression to test the data (defaults to
1723 "true").
1724
1725
1726 ::math::statistics::subdivide
1727 Routine PM - not implemented yet
1728
1729
1731 The following simple plotting procedures are available:
1732
1733 ::math::statistics::plot-scale canvas xmin xmax ymin ymax
1734 Set the scale for a plot in the given canvas. All plot routines
1735 expect this function to be called first. There is no automatic
1736 scaling provided.
1737
1738 widget canvas
1739 - Canvas widget to use
1740
1741 float xmin
1742 - Minimum x value
1743
1744 float xmax
1745 - Maximum x value
1746
1747 float ymin
1748 - Minimum y value
1749
1750 float ymax
1751 - Maximum y value
1752
1753
1754 ::math::statistics::plot-xydata canvas xdata ydata tag
1755 Create a simple XY plot in the given canvas - the data are shown
1756 as a collection of dots. The tag can be used to manipulate the
1757 appearance.
1758
1759 widget canvas
1760 - Canvas widget to use
1761
1762 float xdata
1763 - Series of independent data
1764
1765 float ydata
1766 - Series of dependent data
1767
1768 string tag
1769 - Tag to give to the plotted data (defaults to xyplot)
1770
1771
1772 ::math::statistics::plot-xyline canvas xdata ydata tag
1773 Create a simple XY plot in the given canvas - the data are shown
1774 as a line through the data points. The tag can be used to manip‐
1775 ulate the appearance.
1776
1777 widget canvas
1778 - Canvas widget to use
1779
1780 list xdata
1781 - Series of independent data
1782
1783 list ydata
1784 - Series of dependent data
1785
1786 string tag
1787 - Tag to give to the plotted data (defaults to xyplot)
1788
1789
1790 ::math::statistics::plot-tdata canvas tdata tag
1791 Create a simple XY plot in the given canvas - the data are shown
1792 as a collection of dots. The horizontal coordinate is equal to
1793 the index. The tag can be used to manipulate the appearance.
1794 This type of presentation is suitable for autocorrelation func‐
1795 tions for instance or for inspecting the time-dependent behav‐
1796 iour.
1797
1798 widget canvas
1799 - Canvas widget to use
1800
1801 list tdata
1802 - Series of dependent data
1803
1804 string tag
1805 - Tag to give to the plotted data (defaults to xyplot)
1806
1807
1808 ::math::statistics::plot-tline canvas tdata tag
1809 Create a simple XY plot in the given canvas - the data are shown
1810 as a line. See plot-tdata for an explanation.
1811
1812 widget canvas
1813 - Canvas widget to use
1814
1815 list tdata
1816 - Series of dependent data
1817
1818 string tag
1819 - Tag to give to the plotted data (defaults to xyplot)
1820
1821
1822 ::math::statistics::plot-histogram canvas counts limits tag
1823 Create a simple histogram in the given canvas
1824
1825 widget canvas
1826 - Canvas widget to use
1827
1828 list counts
1829 - Series of bucket counts
1830
1831 list limits
1832 - Series of upper limits for the buckets
1833
1834 string tag
1835 - Tag to give to the plotted data (defaults to xyplot)
1836
1837
1839 The following procedures are yet to be implemented:
1840
1841 · F-test-stdev
1842
1843 · interval-mean-stdev
1844
1845 · histogram-normal
1846
1847 · histogram-exponential
1848
1849 · test-histogram
1850
1851 · test-corr
1852
1853 · quantiles-*
1854
1855 · fourier-coeffs
1856
1857 · fourier-residuals
1858
1859 · onepar-function-fit
1860
1861 · onepar-function-residuals
1862
1863 · plot-linear-model
1864
1865 · subdivide
1866
1868 The code below is a small example of how you can examine a set of data:
1869
1870 # Simple example:
1871 # - Generate data (as a cheap way of getting some)
1872 # - Perform statistical analysis to describe the data
1873 #
1874 package require math::statistics
1875
1876 #
1877 # Two auxiliary procs
1878 #
1879 proc pause {time} {
1880 set wait 0
1881 after [expr {$time*1000}] {set ::wait 1}
1882 vwait wait
1883 }
1884
1885 proc print-histogram {counts limits} {
1886 foreach count $counts limit $limits {
1887 if { $limit != {} } {
1888 puts [format "<%12.4g\t%d" $limit $count]
1889 set prev_limit $limit
1890 } else {
1891 puts [format ">%12.4g\t%d" $prev_limit $count]
1892 }
1893 }
1894 }
1895
1896 #
1897 # Our source of arbitrary data
1898 #
1899 proc generateData { data1 data2 } {
1900 upvar 1 $data1 _data1
1901 upvar 1 $data2 _data2
1902
1903 set d1 0.0
1904 set d2 0.0
1905 for { set i 0 } { $i < 100 } { incr i } {
1906 set d1 [expr {10.0-2.0*cos(2.0*3.1415926*$i/24.0)+3.5*rand()}]
1907 set d2 [expr {0.7*$d2+0.3*$d1+0.7*rand()}]
1908 lappend _data1 $d1
1909 lappend _data2 $d2
1910 }
1911 return {}
1912 }
1913
1914 #
1915 # The analysis session
1916 #
1917 package require Tk
1918 console show
1919 canvas .plot1
1920 canvas .plot2
1921 pack .plot1 .plot2 -fill both -side top
1922
1923 generateData data1 data2
1924
1925 puts "Basic statistics:"
1926 set b1 [::math::statistics::basic-stats $data1]
1927 set b2 [::math::statistics::basic-stats $data2]
1928 foreach label {mean min max number stdev var} v1 $b1 v2 $b2 {
1929 puts "$label\t$v1\t$v2"
1930 }
1931 puts "Plot the data as function of \"time\" and against each other"
1932 ::math::statistics::plot-scale .plot1 0 100 0 20
1933 ::math::statistics::plot-scale .plot2 0 20 0 20
1934 ::math::statistics::plot-tline .plot1 $data1
1935 ::math::statistics::plot-tline .plot1 $data2
1936 ::math::statistics::plot-xydata .plot2 $data1 $data2
1937
1938 puts "Correlation coefficient:"
1939 puts [::math::statistics::corr $data1 $data2]
1940
1941 pause 2
1942 puts "Plot histograms"
1943 .plot2 delete all
1944 ::math::statistics::plot-scale .plot2 0 20 0 100
1945 set limits [::math::statistics::minmax-histogram-limits 7 16]
1946 set histogram_data [::math::statistics::histogram $limits $data1]
1947 ::math::statistics::plot-histogram .plot2 $histogram_data $limits
1948
1949 puts "First series:"
1950 print-histogram $histogram_data $limits
1951
1952 pause 2
1953 set limits [::math::statistics::minmax-histogram-limits 0 15 10]
1954 set histogram_data [::math::statistics::histogram $limits $data2]
1955 ::math::statistics::plot-histogram .plot2 $histogram_data $limits d2
1956 .plot2 itemconfigure d2 -fill red
1957
1958 puts "Second series:"
1959 print-histogram $histogram_data $limits
1960
1961 puts "Autocorrelation function:"
1962 set autoc [::math::statistics::autocorr $data1]
1963 puts [::math::statistics::map $autoc {[format "%.2f" $x]}]
1964 puts "Cross-correlation function:"
1965 set crossc [::math::statistics::crosscorr $data1 $data2]
1966 puts [::math::statistics::map $crossc {[format "%.2f" $x]}]
1967
1968 ::math::statistics::plot-scale .plot1 0 100 -1 4
1969 ::math::statistics::plot-tline .plot1 $autoc "autoc"
1970 ::math::statistics::plot-tline .plot1 $crossc "crossc"
1971 .plot1 itemconfigure autoc -fill green
1972 .plot1 itemconfigure crossc -fill yellow
1973
1974 puts "Quantiles: 0.1, 0.2, 0.5, 0.8, 0.9"
1975 puts "First: [::math::statistics::quantiles $data1 {0.1 0.2 0.5 0.8 0.9}]"
1976 puts "Second: [::math::statistics::quantiles $data2 {0.1 0.2 0.5 0.8 0.9}]"
1977
1978
1979 If you run this example, then the following should be clear:
1980
1981 · There is a strong correlation between two time series, as dis‐
1982 played by the raw data and especially by the correlation func‐
1983 tions.
1984
1985 · Both time series show a significant periodic component
1986
1987 · The histograms are not very useful in identifying the nature of
1988 the time series - they do not show the periodic nature.
1989
1991 This document, and the package it describes, will undoubtedly contain
1992 bugs and other problems. Please report such in the category math ::
1993 statistics of the Tcllib Trackers
1994 [http://core.tcl.tk/tcllib/reportlist]. Please also report any ideas
1995 for enhancements you may have for either package and/or documentation.
1996
1997 When proposing code changes, please provide unified diffs, i.e the out‐
1998 put of diff -u.
1999
2000 Note further that attachments are strongly preferred over inlined
2001 patches. Attachments can be made by going to the Edit form of the
2002 ticket immediately after its creation, and then using the left-most
2003 button in the secondary navigation bar.
2004
2006 data analysis, mathematics, statistics
2007
2009 Mathematics
2010
2011
2012
2013tcllib 1 math::statistics(n)