1Stdlib.Bytes(3) OCaml library Stdlib.Bytes(3)
2
3
4
6 Stdlib.Bytes - no description
7
9 Module Stdlib.Bytes
10
12 Module Bytes
13 : (module Stdlib__bytes)
14
15
16
17
18
19
20
21
22 val length : bytes -> int
23
24 Return the length (number of bytes) of the argument.
25
26
27
28 val get : bytes -> int -> char
29
30
31 get s n returns the byte at index n in argument s .
32
33
34 Raises Invalid_argument if n is not a valid index in s .
35
36
37
38 val set : bytes -> int -> char -> unit
39
40
41 set s n c modifies s in place, replacing the byte at index n with c .
42
43
44 Raises Invalid_argument if n is not a valid index in s .
45
46
47
48 val create : int -> bytes
49
50
51 create n returns a new byte sequence of length n . The sequence is
52 uninitialized and contains arbitrary bytes.
53
54
55 Raises Invalid_argument if n < 0 or n > Sys.max_string_length .
56
57
58
59 val make : int -> char -> bytes
60
61
62 make n c returns a new byte sequence of length n , filled with the byte
63 c .
64
65
66 Raises Invalid_argument if n < 0 or n > Sys.max_string_length .
67
68
69
70 val init : int -> (int -> char) -> bytes
71
72
73 init n f returns a fresh byte sequence of length n , with character i
74 initialized to the result of f i (in increasing index order).
75
76
77 Raises Invalid_argument if n < 0 or n > Sys.max_string_length .
78
79
80
81 val empty : bytes
82
83 A byte sequence of size 0.
84
85
86
87 val copy : bytes -> bytes
88
89 Return a new byte sequence that contains the same bytes as the argu‐
90 ment.
91
92
93
94 val of_string : string -> bytes
95
96 Return a new byte sequence that contains the same bytes as the given
97 string.
98
99
100
101 val to_string : bytes -> string
102
103 Return a new string that contains the same bytes as the given byte se‐
104 quence.
105
106
107
108 val sub : bytes -> int -> int -> bytes
109
110
111 sub s pos len returns a new byte sequence of length len , containing
112 the subsequence of s that starts at position pos and has length len .
113
114
115 Raises Invalid_argument if pos and len do not designate a valid range
116 of s .
117
118
119
120 val sub_string : bytes -> int -> int -> string
121
122 Same as Bytes.sub but return a string instead of a byte sequence.
123
124
125
126 val extend : bytes -> int -> int -> bytes
127
128
129 extend s left right returns a new byte sequence that contains the bytes
130 of s , with left uninitialized bytes prepended and right uninitialized
131 bytes appended to it. If left or right is negative, then bytes are re‐
132 moved (instead of appended) from the corresponding side of s .
133
134
135 Since 4.05.0 in BytesLabels
136
137
138 Raises Invalid_argument if the result length is negative or longer than
139 Sys.max_string_length bytes.
140
141
142
143 val fill : bytes -> int -> int -> char -> unit
144
145
146 fill s pos len c modifies s in place, replacing len characters with c ,
147 starting at pos .
148
149
150 Raises Invalid_argument if pos and len do not designate a valid range
151 of s .
152
153
154
155 val blit : bytes -> int -> bytes -> int -> int -> unit
156
157
158 blit src src_pos dst dst_pos len copies len bytes from sequence src ,
159 starting at index src_pos , to sequence dst , starting at index dst_pos
160 . It works correctly even if src and dst are the same byte sequence,
161 and the source and destination intervals overlap.
162
163
164 Raises Invalid_argument if src_pos and len do not designate a valid
165 range of src , or if dst_pos and len do not designate a valid range of
166 dst .
167
168
169
170 val blit_string : string -> int -> bytes -> int -> int -> unit
171
172
173 blit src src_pos dst dst_pos len copies len bytes from string src ,
174 starting at index src_pos , to byte sequence dst , starting at index
175 dst_pos .
176
177
178 Since 4.05.0 in BytesLabels
179
180
181 Raises Invalid_argument if src_pos and len do not designate a valid
182 range of src , or if dst_pos and len do not designate a valid range of
183 dst .
184
185
186
187 val concat : bytes -> bytes list -> bytes
188
189
190 concat sep sl concatenates the list of byte sequences sl , inserting
191 the separator byte sequence sep between each, and returns the result as
192 a new byte sequence.
193
194
195 Raises Invalid_argument if the result is longer than
196 Sys.max_string_length bytes.
197
198
199
200 val cat : bytes -> bytes -> bytes
201
202
203 cat s1 s2 concatenates s1 and s2 and returns the result as a new byte
204 sequence.
205
206
207 Since 4.05.0 in BytesLabels
208
209
210 Raises Invalid_argument if the result is longer than
211 Sys.max_string_length bytes.
212
213
214
215 val iter : (char -> unit) -> bytes -> unit
216
217
218 iter f s applies function f in turn to all the bytes of s . It is
219 equivalent to f (get s 0); f (get s 1); ...; f (get s
220 (length s - 1)); () .
221
222
223
224 val iteri : (int -> char -> unit) -> bytes -> unit
225
226 Same as Bytes.iter , but the function is applied to the index of the
227 byte as first argument and the byte itself as second argument.
228
229
230
231 val map : (char -> char) -> bytes -> bytes
232
233
234 map f s applies function f in turn to all the bytes of s (in increasing
235 index order) and stores the resulting bytes in a new sequence that is
236 returned as the result.
237
238
239
240 val mapi : (int -> char -> char) -> bytes -> bytes
241
242
243 mapi f s calls f with each character of s and its index (in increasing
244 index order) and stores the resulting bytes in a new sequence that is
245 returned as the result.
246
247
248
249 val trim : bytes -> bytes
250
251 Return a copy of the argument, without leading and trailing whitespace.
252 The bytes regarded as whitespace are the ASCII characters ' ' , '\012'
253 , '\n' , '\r' , and '\t' .
254
255
256
257 val escaped : bytes -> bytes
258
259 Return a copy of the argument, with special characters represented by
260 escape sequences, following the lexical conventions of OCaml. All
261 characters outside the ASCII printable range (32..126) are escaped, as
262 well as backslash and double-quote.
263
264
265 Raises Invalid_argument if the result is longer than
266 Sys.max_string_length bytes.
267
268
269
270 val index : bytes -> char -> int
271
272
273 index s c returns the index of the first occurrence of byte c in s .
274
275
276 Raises Not_found if c does not occur in s .
277
278
279
280 val index_opt : bytes -> char -> int option
281
282
283 index_opt s c returns the index of the first occurrence of byte c in s
284 or None if c does not occur in s .
285
286
287 Since 4.05
288
289
290
291 val rindex : bytes -> char -> int
292
293
294 rindex s c returns the index of the last occurrence of byte c in s .
295
296
297 Raises Not_found if c does not occur in s .
298
299
300
301 val rindex_opt : bytes -> char -> int option
302
303
304 rindex_opt s c returns the index of the last occurrence of byte c in s
305 or None if c does not occur in s .
306
307
308 Since 4.05
309
310
311
312 val index_from : bytes -> int -> char -> int
313
314
315 index_from s i c returns the index of the first occurrence of byte c in
316 s after position i . index s c is equivalent to index_from s 0 c .
317
318
319 Raises Invalid_argument if i is not a valid position in s .
320
321
322 Raises Not_found if c does not occur in s after position i .
323
324
325
326 val index_from_opt : bytes -> int -> char -> int option
327
328
329 index_from_opt s i c returns the index of the first occurrence of byte
330 c in s after position i or None if c does not occur in s after position
331 i . index_opt s c is equivalent to index_from_opt s 0 c .
332
333
334 Since 4.05
335
336
337 Raises Invalid_argument if i is not a valid position in s .
338
339
340
341 val rindex_from : bytes -> int -> char -> int
342
343
344 rindex_from s i c returns the index of the last occurrence of byte c in
345 s before position i+1 . rindex s c is equivalent to rindex_from s
346 (length s - 1) c .
347
348
349 Raises Invalid_argument if i+1 is not a valid position in s .
350
351
352 Raises Not_found if c does not occur in s before position i+1 .
353
354
355
356 val rindex_from_opt : bytes -> int -> char -> int option
357
358
359 rindex_from_opt s i c returns the index of the last occurrence of byte
360 c in s before position i+1 or None if c does not occur in s before po‐
361 sition i+1 . rindex_opt s c is equivalent to rindex_from s (length s -
362 1) c .
363
364
365 Since 4.05
366
367
368 Raises Invalid_argument if i+1 is not a valid position in s .
369
370
371
372 val contains : bytes -> char -> bool
373
374
375 contains s c tests if byte c appears in s .
376
377
378
379 val contains_from : bytes -> int -> char -> bool
380
381
382 contains_from s start c tests if byte c appears in s after position
383 start . contains s c is equivalent to contains_from
384 s 0 c .
385
386
387 Raises Invalid_argument if start is not a valid position in s .
388
389
390
391 val rcontains_from : bytes -> int -> char -> bool
392
393
394 rcontains_from s stop c tests if byte c appears in s before position
395 stop+1 .
396
397
398 Raises Invalid_argument if stop < 0 or stop+1 is not a valid position
399 in s .
400
401
402
403 val uppercase : bytes -> bytes
404
405 Deprecated. Functions operating on Latin-1 character set are depre‐
406 cated.
407
408
409 Return a copy of the argument, with all lowercase letters translated to
410 uppercase, including accented letters of the ISO Latin-1 (8859-1) char‐
411 acter set.
412
413
414
415 val lowercase : bytes -> bytes
416
417 Deprecated. Functions operating on Latin-1 character set are depre‐
418 cated.
419
420
421 Return a copy of the argument, with all uppercase letters translated to
422 lowercase, including accented letters of the ISO Latin-1 (8859-1) char‐
423 acter set.
424
425
426
427 val capitalize : bytes -> bytes
428
429 Deprecated. Functions operating on Latin-1 character set are depre‐
430 cated.
431
432
433 Return a copy of the argument, with the first character set to upper‐
434 case, using the ISO Latin-1 (8859-1) character set.
435
436
437
438 val uncapitalize : bytes -> bytes
439
440 Deprecated. Functions operating on Latin-1 character set are depre‐
441 cated.
442
443
444 Return a copy of the argument, with the first character set to lower‐
445 case, using the ISO Latin-1 (8859-1) character set.
446
447
448
449 val uppercase_ascii : bytes -> bytes
450
451 Return a copy of the argument, with all lowercase letters translated to
452 uppercase, using the US-ASCII character set.
453
454
455 Since 4.03.0 (4.05.0 in BytesLabels)
456
457
458
459 val lowercase_ascii : bytes -> bytes
460
461 Return a copy of the argument, with all uppercase letters translated to
462 lowercase, using the US-ASCII character set.
463
464
465 Since 4.03.0 (4.05.0 in BytesLabels)
466
467
468
469 val capitalize_ascii : bytes -> bytes
470
471 Return a copy of the argument, with the first character set to upper‐
472 case, using the US-ASCII character set.
473
474
475 Since 4.03.0 (4.05.0 in BytesLabels)
476
477
478
479 val uncapitalize_ascii : bytes -> bytes
480
481 Return a copy of the argument, with the first character set to lower‐
482 case, using the US-ASCII character set.
483
484
485 Since 4.03.0 (4.05.0 in BytesLabels)
486
487
488 type t = bytes
489
490
491 An alias for the type of byte sequences.
492
493
494
495 val compare : t -> t -> int
496
497 The comparison function for byte sequences, with the same specification
498 as compare . Along with the type t , this function compare allows the
499 module Bytes to be passed as argument to the functors Set.Make and
500 Map.Make .
501
502
503
504 val equal : t -> t -> bool
505
506 The equality function for byte sequences.
507
508
509 Since 4.03.0 (4.05.0 in BytesLabels)
510
511
512
513
514 Unsafe conversions (for advanced users)
515 This section describes unsafe, low-level conversion functions between
516 bytes and string . They do not copy the internal data; used improperly,
517 they can break the immutability invariant on strings provided by the
518 -safe-string option. They are available for expert library authors, but
519 for most purposes you should use the always-correct Bytes.to_string and
520 Bytes.of_string instead.
521
522 val unsafe_to_string : bytes -> string
523
524 Unsafely convert a byte sequence into a string.
525
526 To reason about the use of unsafe_to_string , it is convenient to con‐
527 sider an "ownership" discipline. A piece of code that manipulates some
528 data "owns" it; there are several disjoint ownership modes, including:
529
530 -Unique ownership: the data may be accessed and mutated
531
532 -Shared ownership: the data has several owners, that may only access
533 it, not mutate it.
534
535 Unique ownership is linear: passing the data to another piece of code
536 means giving up ownership (we cannot write the data again). A unique
537 owner may decide to make the data shared (giving up mutation rights on
538 it), but shared data may not become uniquely-owned again.
539
540
541 unsafe_to_string s can only be used when the caller owns the byte se‐
542 quence s -- either uniquely or as shared immutable data. The caller
543 gives up ownership of s , and gains ownership of the returned string.
544
545 There are two valid use-cases that respect this ownership discipline:
546
547 1. Creating a string by initializing and mutating a byte sequence that
548 is never changed after initialization is performed.
549
550
551 let string_init len f : string =
552 let s = Bytes.create len in
553 for i = 0 to len - 1 do Bytes.set s i (f i) done;
554 Bytes.unsafe_to_string s
555
556
557 This function is safe because the byte sequence s will never be ac‐
558 cessed or mutated after unsafe_to_string is called. The string_init
559 code gives up ownership of s , and returns the ownership of the result‐
560 ing string to its caller.
561
562 Note that it would be unsafe if s was passed as an additional parameter
563 to the function f as it could escape this way and be mutated in the fu‐
564 ture -- string_init would give up ownership of s to pass it to f , and
565 could not call unsafe_to_string safely.
566
567 We have provided the String.init , String.map and String.mapi functions
568 to cover most cases of building new strings. You should prefer those
569 over to_string or unsafe_to_string whenever applicable.
570
571 2. Temporarily giving ownership of a byte sequence to a function that
572 expects a uniquely owned string and returns ownership back, so that we
573 can mutate the sequence again after the call ended.
574
575
576 let bytes_length (s : bytes) =
577 String.length (Bytes.unsafe_to_string s)
578
579
580 In this use-case, we do not promise that s will never be mutated after
581 the call to bytes_length s . The String.length function temporarily
582 borrows unique ownership of the byte sequence (and sees it as a string
583 ), but returns this ownership back to the caller, which may assume that
584 s is still a valid byte sequence after the call. Note that this is only
585 correct because we know that String.length does not capture its argu‐
586 ment -- it could escape by a side-channel such as a memoization combi‐
587 nator.
588
589 The caller may not mutate s while the string is borrowed (it has tempo‐
590 rarily given up ownership). This affects concurrent programs, but also
591 higher-order functions: if String.length returned a closure to be
592 called later, s should not be mutated until this closure is fully ap‐
593 plied and returns ownership.
594
595
596
597 val unsafe_of_string : string -> bytes
598
599 Unsafely convert a shared string to a byte sequence that should not be
600 mutated.
601
602 The same ownership discipline that makes unsafe_to_string correct ap‐
603 plies to unsafe_of_string : you may use it if you were the owner of the
604 string value, and you will own the return bytes in the same mode.
605
606 In practice, unique ownership of string values is extremely difficult
607 to reason about correctly. You should always assume strings are shared,
608 never uniquely owned.
609
610 For example, string literals are implicitly shared by the compiler, so
611 you never uniquely own them.
612
613
614 let incorrect = Bytes.unsafe_of_string "hello"
615 let s = Bytes.of_string "hello"
616
617
618 The first declaration is incorrect, because the string literal "hello"
619 could be shared by the compiler with other parts of the program, and
620 mutating incorrect is a bug. You must always use the second version,
621 which performs a copy and is thus correct.
622
623 Assuming unique ownership of strings that are not string literals, but
624 are (partly) built from string literals, is also incorrect. For exam‐
625 ple, mutating unsafe_of_string ("foo" ^ s) could mutate the shared
626 string "foo" -- assuming a rope-like representation of strings. More
627 generally, functions operating on strings will assume shared ownership,
628 they do not preserve unique ownership. It is thus incorrect to assume
629 unique ownership of the result of unsafe_of_string .
630
631 The only case we have reasonable confidence is safe is if the produced
632 bytes is shared -- used as an immutable byte sequence. This is possibly
633 useful for incremental migration of low-level programs that manipulate
634 immutable sequences of bytes (for example Marshal.from_bytes ) and pre‐
635 viously used the string type for this purpose.
636
637
638
639
640 Iterators
641 val to_seq : t -> char Seq.t
642
643 Iterate on the string, in increasing index order. Modifications of the
644 string during iteration will be reflected in the iterator.
645
646
647 Since 4.07
648
649
650
651 val to_seqi : t -> (int * char) Seq.t
652
653 Iterate on the string, in increasing order, yielding indices along
654 chars
655
656
657 Since 4.07
658
659
660
661 val of_seq : char Seq.t -> t
662
663 Create a string from the generator
664
665
666 Since 4.07
667
668
669
670
671 Binary encoding/decoding of integers
672 The functions in this section binary encode and decode integers to and
673 from byte sequences.
674
675 All following functions raise Invalid_argument if the space needed at
676 index i to decode or encode the integer is not available.
677
678 Little-endian (resp. big-endian) encoding means that least (resp. most)
679 significant bytes are stored first. Big-endian is also known as net‐
680 work byte order. Native-endian encoding is either little-endian or
681 big-endian depending on Sys.big_endian .
682
683 32-bit and 64-bit integers are represented by the int32 and int64
684 types, which can be interpreted either as signed or unsigned numbers.
685
686 8-bit and 16-bit integers are represented by the int type, which has
687 more bits than the binary encoding. These extra bits are handled as
688 follows:
689
690 -Functions that decode signed (resp. unsigned) 8-bit or 16-bit integers
691 represented by int values sign-extend (resp. zero-extend) their result.
692
693 -Functions that encode 8-bit or 16-bit integers represented by int val‐
694 ues truncate their input to their least significant bytes.
695
696
697 val get_uint8 : bytes -> int -> int
698
699
700 get_uint8 b i is b 's unsigned 8-bit integer starting at byte index i .
701
702
703 Since 4.08
704
705
706
707 val get_int8 : bytes -> int -> int
708
709
710 get_int8 b i is b 's signed 8-bit integer starting at byte index i .
711
712
713 Since 4.08
714
715
716
717 val get_uint16_ne : bytes -> int -> int
718
719
720 get_uint16_ne b i is b 's native-endian unsigned 16-bit integer start‐
721 ing at byte index i .
722
723
724 Since 4.08
725
726
727
728 val get_uint16_be : bytes -> int -> int
729
730
731 get_uint16_be b i is b 's big-endian unsigned 16-bit integer starting
732 at byte index i .
733
734
735 Since 4.08
736
737
738
739 val get_uint16_le : bytes -> int -> int
740
741
742 get_uint16_le b i is b 's little-endian unsigned 16-bit integer start‐
743 ing at byte index i .
744
745
746 Since 4.08
747
748
749
750 val get_int16_ne : bytes -> int -> int
751
752
753 get_int16_ne b i is b 's native-endian signed 16-bit integer starting
754 at byte index i .
755
756
757 Since 4.08
758
759
760
761 val get_int16_be : bytes -> int -> int
762
763
764 get_int16_be b i is b 's big-endian signed 16-bit integer starting at
765 byte index i .
766
767
768 Since 4.08
769
770
771
772 val get_int16_le : bytes -> int -> int
773
774
775 get_int16_le b i is b 's little-endian signed 16-bit integer starting
776 at byte index i .
777
778
779 Since 4.08
780
781
782
783 val get_int32_ne : bytes -> int -> int32
784
785
786 get_int32_ne b i is b 's native-endian 32-bit integer starting at byte
787 index i .
788
789
790 Since 4.08
791
792
793
794 val get_int32_be : bytes -> int -> int32
795
796
797 get_int32_be b i is b 's big-endian 32-bit integer starting at byte in‐
798 dex i .
799
800
801 Since 4.08
802
803
804
805 val get_int32_le : bytes -> int -> int32
806
807
808 get_int32_le b i is b 's little-endian 32-bit integer starting at byte
809 index i .
810
811
812 Since 4.08
813
814
815
816 val get_int64_ne : bytes -> int -> int64
817
818
819 get_int64_ne b i is b 's native-endian 64-bit integer starting at byte
820 index i .
821
822
823 Since 4.08
824
825
826
827 val get_int64_be : bytes -> int -> int64
828
829
830 get_int64_be b i is b 's big-endian 64-bit integer starting at byte in‐
831 dex i .
832
833
834 Since 4.08
835
836
837
838 val get_int64_le : bytes -> int -> int64
839
840
841 get_int64_le b i is b 's little-endian 64-bit integer starting at byte
842 index i .
843
844
845 Since 4.08
846
847
848
849 val set_uint8 : bytes -> int -> int -> unit
850
851
852 set_uint8 b i v sets b 's unsigned 8-bit integer starting at byte index
853 i to v .
854
855
856 Since 4.08
857
858
859
860 val set_int8 : bytes -> int -> int -> unit
861
862
863 set_int8 b i v sets b 's signed 8-bit integer starting at byte index i
864 to v .
865
866
867 Since 4.08
868
869
870
871 val set_uint16_ne : bytes -> int -> int -> unit
872
873
874 set_uint16_ne b i v sets b 's native-endian unsigned 16-bit integer
875 starting at byte index i to v .
876
877
878 Since 4.08
879
880
881
882 val set_uint16_be : bytes -> int -> int -> unit
883
884
885 set_uint16_be b i v sets b 's big-endian unsigned 16-bit integer start‐
886 ing at byte index i to v .
887
888
889 Since 4.08
890
891
892
893 val set_uint16_le : bytes -> int -> int -> unit
894
895
896 set_uint16_le b i v sets b 's little-endian unsigned 16-bit integer
897 starting at byte index i to v .
898
899
900 Since 4.08
901
902
903
904 val set_int16_ne : bytes -> int -> int -> unit
905
906
907 set_int16_ne b i v sets b 's native-endian signed 16-bit integer start‐
908 ing at byte index i to v .
909
910
911 Since 4.08
912
913
914
915 val set_int16_be : bytes -> int -> int -> unit
916
917
918 set_int16_be b i v sets b 's big-endian signed 16-bit integer starting
919 at byte index i to v .
920
921
922 Since 4.08
923
924
925
926 val set_int16_le : bytes -> int -> int -> unit
927
928
929 set_int16_le b i v sets b 's little-endian signed 16-bit integer start‐
930 ing at byte index i to v .
931
932
933 Since 4.08
934
935
936
937 val set_int32_ne : bytes -> int -> int32 -> unit
938
939
940 set_int32_ne b i v sets b 's native-endian 32-bit integer starting at
941 byte index i to v .
942
943
944 Since 4.08
945
946
947
948 val set_int32_be : bytes -> int -> int32 -> unit
949
950
951 set_int32_be b i v sets b 's big-endian 32-bit integer starting at byte
952 index i to v .
953
954
955 Since 4.08
956
957
958
959 val set_int32_le : bytes -> int -> int32 -> unit
960
961
962 set_int32_le b i v sets b 's little-endian 32-bit integer starting at
963 byte index i to v .
964
965
966 Since 4.08
967
968
969
970 val set_int64_ne : bytes -> int -> int64 -> unit
971
972
973 set_int64_ne b i v sets b 's native-endian 64-bit integer starting at
974 byte index i to v .
975
976
977 Since 4.08
978
979
980
981 val set_int64_be : bytes -> int -> int64 -> unit
982
983
984 set_int64_be b i v sets b 's big-endian 64-bit integer starting at byte
985 index i to v .
986
987
988 Since 4.08
989
990
991
992 val set_int64_le : bytes -> int -> int64 -> unit
993
994
995 set_int64_le b i v sets b 's little-endian 64-bit integer starting at
996 byte index i to v .
997
998
999 Since 4.08
1000
1001
1002
1003
1004
1005OCamldoc 2021-07-22 Stdlib.Bytes(3)