1Stdlib.Bytes(3) OCaml library Stdlib.Bytes(3)
2
3
4
6 Stdlib.Bytes - no description
7
9 Module Stdlib.Bytes
10
12 Module Bytes
13 : (module Stdlib__bytes)
14
15
16
17
18
19
20
21
22 val length : bytes -> int
23
24 Return the length (number of bytes) of the argument.
25
26
27
28 val get : bytes -> int -> char
29
30
31 get s n returns the byte at index n in argument s .
32
33
34 Raises Invalid_argument if n is not a valid index in s .
35
36
37
38 val set : bytes -> int -> char -> unit
39
40
41 set s n c modifies s in place, replacing the byte at index n with c .
42
43
44 Raises Invalid_argument if n is not a valid index in s .
45
46
47
48 val create : int -> bytes
49
50
51 create n returns a new byte sequence of length n . The sequence is
52 uninitialized and contains arbitrary bytes.
53
54
55 Raises Invalid_argument if n < 0 or n > Sys.max_string_length .
56
57
58
59 val make : int -> char -> bytes
60
61
62 make n c returns a new byte sequence of length n , filled with the byte
63 c .
64
65
66 Raises Invalid_argument if n < 0 or n > Sys.max_string_length .
67
68
69
70 val init : int -> (int -> char) -> bytes
71
72
73 Bytes.init n f returns a fresh byte sequence of length n , with charac‐
74 ter i initialized to the result of f i (in increasing index order).
75
76
77 Raises Invalid_argument if n < 0 or n > Sys.max_string_length .
78
79
80
81 val empty : bytes
82
83 A byte sequence of size 0.
84
85
86
87 val copy : bytes -> bytes
88
89 Return a new byte sequence that contains the same bytes as the argu‐
90 ment.
91
92
93
94 val of_string : string -> bytes
95
96 Return a new byte sequence that contains the same bytes as the given
97 string.
98
99
100
101 val to_string : bytes -> string
102
103 Return a new string that contains the same bytes as the given byte
104 sequence.
105
106
107
108 val sub : bytes -> int -> int -> bytes
109
110
111 sub s start len returns a new byte sequence of length len , containing
112 the subsequence of s that starts at position start and has length len .
113
114
115 Raises Invalid_argument if start and len do not designate a valid range
116 of s .
117
118
119
120 val sub_string : bytes -> int -> int -> string
121
122 Same as sub but return a string instead of a byte sequence.
123
124
125
126 val extend : bytes -> int -> int -> bytes
127
128
129 extend s left right returns a new byte sequence that contains the bytes
130 of s , with left uninitialized bytes prepended and right uninitialized
131 bytes appended to it. If left or right is negative, then bytes are
132 removed (instead of appended) from the corresponding side of s .
133
134
135 Raises Invalid_argument if the result length is negative or longer than
136 Sys.max_string_length bytes.
137
138
139
140 val fill : bytes -> int -> int -> char -> unit
141
142
143 fill s start len c modifies s in place, replacing len characters with c
144 , starting at start .
145
146
147 Raises Invalid_argument if start and len do not designate a valid range
148 of s .
149
150
151
152 val blit : bytes -> int -> bytes -> int -> int -> unit
153
154
155 blit src srcoff dst dstoff len copies len bytes from sequence src ,
156 starting at index srcoff , to sequence dst , starting at index dstoff .
157 It works correctly even if src and dst are the same byte sequence, and
158 the source and destination intervals overlap.
159
160
161 Raises Invalid_argument if srcoff and len do not designate a valid
162 range of src , or if dstoff and len do not designate a valid range of
163 dst .
164
165
166
167 val blit_string : string -> int -> bytes -> int -> int -> unit
168
169
170 blit_string src srcoff dst dstoff len copies len bytes from string src
171 , starting at index srcoff , to byte sequence dst , starting at index
172 dstoff .
173
174
175 Raises Invalid_argument if srcoff and len do not designate a valid
176 range of src , or if dstoff and len do not designate a valid range of
177 dst .
178
179
180
181 val concat : bytes -> bytes list -> bytes
182
183
184 concat sep sl concatenates the list of byte sequences sl , inserting
185 the separator byte sequence sep between each, and returns the result as
186 a new byte sequence.
187
188
189 Raises Invalid_argument if the result is longer than
190 Sys.max_string_length bytes.
191
192
193
194 val cat : bytes -> bytes -> bytes
195
196
197 cat s1 s2 concatenates s1 and s2 and returns the result as a new byte
198 sequence.
199
200
201 Raises Invalid_argument if the result is longer than
202 Sys.max_string_length bytes.
203
204
205
206 val iter : (char -> unit) -> bytes -> unit
207
208
209 iter f s applies function f in turn to all the bytes of s . It is
210 equivalent to f (get s 0); f (get s 1); ...; f (get s
211 (length s - 1)); () .
212
213
214
215 val iteri : (int -> char -> unit) -> bytes -> unit
216
217 Same as Bytes.iter , but the function is applied to the index of the
218 byte as first argument and the byte itself as second argument.
219
220
221
222 val map : (char -> char) -> bytes -> bytes
223
224
225 map f s applies function f in turn to all the bytes of s (in increasing
226 index order) and stores the resulting bytes in a new sequence that is
227 returned as the result.
228
229
230
231 val mapi : (int -> char -> char) -> bytes -> bytes
232
233
234 mapi f s calls f with each character of s and its index (in increasing
235 index order) and stores the resulting bytes in a new sequence that is
236 returned as the result.
237
238
239
240 val trim : bytes -> bytes
241
242 Return a copy of the argument, without leading and trailing whitespace.
243 The bytes regarded as whitespace are the ASCII characters ' ' , '\012'
244 , '\n' , '\r' , and '\t' .
245
246
247
248 val escaped : bytes -> bytes
249
250 Return a copy of the argument, with special characters represented by
251 escape sequences, following the lexical conventions of OCaml. All
252 characters outside the ASCII printable range (32..126) are escaped, as
253 well as backslash and double-quote.
254
255
256 Raises Invalid_argument if the result is longer than
257 Sys.max_string_length bytes.
258
259
260
261 val index : bytes -> char -> int
262
263
264 index s c returns the index of the first occurrence of byte c in s .
265
266
267 Raises Not_found if c does not occur in s .
268
269
270
271 val index_opt : bytes -> char -> int option
272
273
274 index_opt s c returns the index of the first occurrence of byte c in s
275 or None if c does not occur in s .
276
277
278 Since 4.05
279
280
281
282 val rindex : bytes -> char -> int
283
284
285 rindex s c returns the index of the last occurrence of byte c in s .
286
287
288 Raises Not_found if c does not occur in s .
289
290
291
292 val rindex_opt : bytes -> char -> int option
293
294
295 rindex_opt s c returns the index of the last occurrence of byte c in s
296 or None if c does not occur in s .
297
298
299 Since 4.05
300
301
302
303 val index_from : bytes -> int -> char -> int
304
305
306 index_from s i c returns the index of the first occurrence of byte c in
307 s after position i . Bytes.index s c is equivalent to Bytes.index_from
308 s 0 c .
309
310
311 Raises Invalid_argument if i is not a valid position in s .
312
313
314 Raises Not_found if c does not occur in s after position i .
315
316
317
318 val index_from_opt : bytes -> int -> char -> int option
319
320
321 index_from_opt s i c returns the index of the first occurrence of byte
322 c in s after position i or None if c does not occur in s after position
323 i . Bytes.index_opt s c is equivalent to Bytes.index_from_opt s 0 c .
324
325
326 Since 4.05
327
328
329 Raises Invalid_argument if i is not a valid position in s .
330
331
332
333 val rindex_from : bytes -> int -> char -> int
334
335
336 rindex_from s i c returns the index of the last occurrence of byte c in
337 s before position i+1 . rindex s c is equivalent to rindex_from s
338 (Bytes.length s - 1) c .
339
340
341 Raises Invalid_argument if i+1 is not a valid position in s .
342
343
344 Raises Not_found if c does not occur in s before position i+1 .
345
346
347
348 val rindex_from_opt : bytes -> int -> char -> int option
349
350
351 rindex_from_opt s i c returns the index of the last occurrence of byte
352 c in s before position i+1 or None if c does not occur in s before
353 position i+1 . rindex_opt s c is equivalent to rindex_from s
354 (Bytes.length s - 1) c .
355
356
357 Since 4.05
358
359
360 Raises Invalid_argument if i+1 is not a valid position in s .
361
362
363
364 val contains : bytes -> char -> bool
365
366
367 contains s c tests if byte c appears in s .
368
369
370
371 val contains_from : bytes -> int -> char -> bool
372
373
374 contains_from s start c tests if byte c appears in s after position
375 start . contains s c is equivalent to contains_from
376 s 0 c .
377
378
379 Raises Invalid_argument if start is not a valid position in s .
380
381
382
383 val rcontains_from : bytes -> int -> char -> bool
384
385
386 rcontains_from s stop c tests if byte c appears in s before position
387 stop+1 .
388
389
390 Raises Invalid_argument if stop < 0 or stop+1 is not a valid position
391 in s .
392
393
394
395 val uppercase : bytes -> bytes
396
397 Deprecated. Functions operating on Latin-1 character set are depre‐
398 cated.
399
400
401 Return a copy of the argument, with all lowercase letters translated to
402 uppercase, including accented letters of the ISO Latin-1 (8859-1) char‐
403 acter set.
404
405
406
407 val lowercase : bytes -> bytes
408
409 Deprecated. Functions operating on Latin-1 character set are depre‐
410 cated.
411
412
413 Return a copy of the argument, with all uppercase letters translated to
414 lowercase, including accented letters of the ISO Latin-1 (8859-1) char‐
415 acter set.
416
417
418
419 val capitalize : bytes -> bytes
420
421 Deprecated. Functions operating on Latin-1 character set are depre‐
422 cated.
423
424
425 Return a copy of the argument, with the first character set to upper‐
426 case, using the ISO Latin-1 (8859-1) character set..
427
428
429
430 val uncapitalize : bytes -> bytes
431
432 Deprecated. Functions operating on Latin-1 character set are depre‐
433 cated.
434
435
436 Return a copy of the argument, with the first character set to lower‐
437 case, using the ISO Latin-1 (8859-1) character set..
438
439
440
441 val uppercase_ascii : bytes -> bytes
442
443 Return a copy of the argument, with all lowercase letters translated to
444 uppercase, using the US-ASCII character set.
445
446
447 Since 4.03.0
448
449
450
451 val lowercase_ascii : bytes -> bytes
452
453 Return a copy of the argument, with all uppercase letters translated to
454 lowercase, using the US-ASCII character set.
455
456
457 Since 4.03.0
458
459
460
461 val capitalize_ascii : bytes -> bytes
462
463 Return a copy of the argument, with the first character set to upper‐
464 case, using the US-ASCII character set.
465
466
467 Since 4.03.0
468
469
470
471 val uncapitalize_ascii : bytes -> bytes
472
473 Return a copy of the argument, with the first character set to lower‐
474 case, using the US-ASCII character set.
475
476
477 Since 4.03.0
478
479
480 type t = bytes
481
482
483 An alias for the type of byte sequences.
484
485
486
487 val compare : t -> t -> int
488
489 The comparison function for byte sequences, with the same specification
490 as compare . Along with the type t , this function compare allows the
491 module Bytes to be passed as argument to the functors Set.Make and
492 Map.Make .
493
494
495
496 val equal : t -> t -> bool
497
498 The equality function for byte sequences.
499
500
501 Since 4.03.0
502
503
504
505
506 Unsafe conversions (for advanced users)
507 This section describes unsafe, low-level conversion functions between
508 bytes and string . They do not copy the internal data; used improperly,
509 they can break the immutability invariant on strings provided by the
510 -safe-string option. They are available for expert library authors, but
511 for most purposes you should use the always-correct Bytes.to_string and
512 Bytes.of_string instead.
513
514 val unsafe_to_string : bytes -> string
515
516 Unsafely convert a byte sequence into a string.
517
518 To reason about the use of unsafe_to_string , it is convenient to con‐
519 sider an "ownership" discipline. A piece of code that manipulates some
520 data "owns" it; there are several disjoint ownership modes, including:
521
522 -Unique ownership: the data may be accessed and mutated
523
524 -Shared ownership: the data has several owners, that may only access
525 it, not mutate it.
526
527 Unique ownership is linear: passing the data to another piece of code
528 means giving up ownership (we cannot write the data again). A unique
529 owner may decide to make the data shared (giving up mutation rights on
530 it), but shared data may not become uniquely-owned again.
531
532
533 unsafe_to_string s can only be used when the caller owns the byte
534 sequence s -- either uniquely or as shared immutable data. The caller
535 gives up ownership of s , and gains ownership of the returned string.
536
537 There are two valid use-cases that respect this ownership discipline:
538
539 1. Creating a string by initializing and mutating a byte sequence that
540 is never changed after initialization is performed.
541
542
543 let string_init len f : string =
544 let s = Bytes.create len in
545 for i = 0 to len - 1 do Bytes.set s i (f i) done;
546 Bytes.unsafe_to_string s
547
548
549 This function is safe because the byte sequence s will never be
550 accessed or mutated after unsafe_to_string is called. The string_init
551 code gives up ownership of s , and returns the ownership of the result‐
552 ing string to its caller.
553
554 Note that it would be unsafe if s was passed as an additional parameter
555 to the function f as it could escape this way and be mutated in the
556 future -- string_init would give up ownership of s to pass it to f ,
557 and could not call unsafe_to_string safely.
558
559 We have provided the String.init , String.map and String.mapi functions
560 to cover most cases of building new strings. You should prefer those
561 over to_string or unsafe_to_string whenever applicable.
562
563 2. Temporarily giving ownership of a byte sequence to a function that
564 expects a uniquely owned string and returns ownership back, so that we
565 can mutate the sequence again after the call ended.
566
567
568 let bytes_length (s : bytes) =
569 String.length (Bytes.unsafe_to_string s)
570
571
572 In this use-case, we do not promise that s will never be mutated after
573 the call to bytes_length s . The String.length function temporarily
574 borrows unique ownership of the byte sequence (and sees it as a string
575 ), but returns this ownership back to the caller, which may assume that
576 s is still a valid byte sequence after the call. Note that this is only
577 correct because we know that String.length does not capture its argu‐
578 ment -- it could escape by a side-channel such as a memoization combi‐
579 nator.
580
581 The caller may not mutate s while the string is borrowed (it has tempo‐
582 rarily given up ownership). This affects concurrent programs, but also
583 higher-order functions: if String.length returned a closure to be
584 called later, s should not be mutated until this closure is fully
585 applied and returns ownership.
586
587
588
589 val unsafe_of_string : string -> bytes
590
591 Unsafely convert a shared string to a byte sequence that should not be
592 mutated.
593
594 The same ownership discipline that makes unsafe_to_string correct
595 applies to unsafe_of_string : you may use it if you were the owner of
596 the string value, and you will own the return bytes in the same mode.
597
598 In practice, unique ownership of string values is extremely difficult
599 to reason about correctly. You should always assume strings are shared,
600 never uniquely owned.
601
602 For example, string literals are implicitly shared by the compiler, so
603 you never uniquely own them.
604
605
606 let incorrect = Bytes.unsafe_of_string "hello"
607 let s = Bytes.of_string "hello"
608
609
610 The first declaration is incorrect, because the string literal "hello"
611 could be shared by the compiler with other parts of the program, and
612 mutating incorrect is a bug. You must always use the second version,
613 which performs a copy and is thus correct.
614
615 Assuming unique ownership of strings that are not string literals, but
616 are (partly) built from string literals, is also incorrect. For exam‐
617 ple, mutating unsafe_of_string ("foo" ^ s) could mutate the shared
618 string "foo" -- assuming a rope-like representation of strings. More
619 generally, functions operating on strings will assume shared ownership,
620 they do not preserve unique ownership. It is thus incorrect to assume
621 unique ownership of the result of unsafe_of_string .
622
623 The only case we have reasonable confidence is safe is if the produced
624 bytes is shared -- used as an immutable byte sequence. This is possibly
625 useful for incremental migration of low-level programs that manipulate
626 immutable sequences of bytes (for example Marshal.from_bytes ) and pre‐
627 viously used the string type for this purpose.
628
629
630
631
632 Iterators
633 val to_seq : t -> char Seq.t
634
635 Iterate on the string, in increasing index order. Modifications of the
636 string during iteration will be reflected in the iterator.
637
638
639 Since 4.07
640
641
642
643 val to_seqi : t -> (int * char) Seq.t
644
645 Iterate on the string, in increasing order, yielding indices along
646 chars
647
648
649 Since 4.07
650
651
652
653 val of_seq : char Seq.t -> t
654
655 Create a string from the generator
656
657
658 Since 4.07
659
660
661
662
663 Binary encoding/decoding of integers
664 The functions in this section binary encode and decode integers to and
665 from byte sequences.
666
667 All following functions raise Invalid_argument if the space needed at
668 index i to decode or encode the integer is not available.
669
670 Little-endian (resp. big-endian) encoding means that least (resp. most)
671 significant bytes are stored first. Big-endian is also known as net‐
672 work byte order. Native-endian encoding is either little-endian or
673 big-endian depending on Sys.big_endian .
674
675 32-bit and 64-bit integers are represented by the int32 and int64
676 types, which can be interpreted either as signed or unsigned numbers.
677
678 8-bit and 16-bit integers are represented by the int type, which has
679 more bits than the binary encoding. These extra bits are handled as
680 follows:
681
682 -Functions that decode signed (resp. unsigned) 8-bit or 16-bit integers
683 represented by int values sign-extend (resp. zero-extend) their result.
684
685 -Functions that encode 8-bit or 16-bit integers represented by int val‐
686 ues truncate their input to their least significant bytes.
687
688
689 val get_uint8 : bytes -> int -> int
690
691
692 get_uint8 b i is b 's unsigned 8-bit integer starting at byte index i .
693
694
695 Since 4.08
696
697
698
699 val get_int8 : bytes -> int -> int
700
701
702 get_int8 b i is b 's signed 8-bit integer starting at byte index i .
703
704
705 Since 4.08
706
707
708
709 val get_uint16_ne : bytes -> int -> int
710
711
712 get_uint16_ne b i is b 's native-endian unsigned 16-bit integer start‐
713 ing at byte index i .
714
715
716 Since 4.08
717
718
719
720 val get_uint16_be : bytes -> int -> int
721
722
723 get_uint16_be b i is b 's big-endian unsigned 16-bit integer starting
724 at byte index i .
725
726
727 Since 4.08
728
729
730
731 val get_uint16_le : bytes -> int -> int
732
733
734 get_uint16_le b i is b 's little-endian unsigned 16-bit integer start‐
735 ing at byte index i .
736
737
738 Since 4.08
739
740
741
742 val get_int16_ne : bytes -> int -> int
743
744
745 get_int16_ne b i is b 's native-endian signed 16-bit integer starting
746 at byte index i .
747
748
749 Since 4.08
750
751
752
753 val get_int16_be : bytes -> int -> int
754
755
756 get_int16_be b i is b 's big-endian signed 16-bit integer starting at
757 byte index i .
758
759
760 Since 4.08
761
762
763
764 val get_int16_le : bytes -> int -> int
765
766
767 get_int16_le b i is b 's little-endian signed 16-bit integer starting
768 at byte index i .
769
770
771 Since 4.08
772
773
774
775 val get_int32_ne : bytes -> int -> int32
776
777
778 get_int32_ne b i is b 's native-endian 32-bit integer starting at byte
779 index i .
780
781
782 Since 4.08
783
784
785
786 val get_int32_be : bytes -> int -> int32
787
788
789 get_int32_be b i is b 's big-endian 32-bit integer starting at byte
790 index i .
791
792
793 Since 4.08
794
795
796
797 val get_int32_le : bytes -> int -> int32
798
799
800 get_int32_le b i is b 's little-endian 32-bit integer starting at byte
801 index i .
802
803
804 Since 4.08
805
806
807
808 val get_int64_ne : bytes -> int -> int64
809
810
811 get_int64_ne b i is b 's native-endian 64-bit integer starting at byte
812 index i .
813
814
815 Since 4.08
816
817
818
819 val get_int64_be : bytes -> int -> int64
820
821
822 get_int64_be b i is b 's big-endian 64-bit integer starting at byte
823 index i .
824
825
826 Since 4.08
827
828
829
830 val get_int64_le : bytes -> int -> int64
831
832
833 get_int64_le b i is b 's little-endian 64-bit integer starting at byte
834 index i .
835
836
837 Since 4.08
838
839
840
841 val set_uint8 : bytes -> int -> int -> unit
842
843
844 set_uint8 b i v sets b 's unsigned 8-bit integer starting at byte index
845 i to v .
846
847
848 Since 4.08
849
850
851
852 val set_int8 : bytes -> int -> int -> unit
853
854
855 set_int8 b i v sets b 's signed 8-bit integer starting at byte index i
856 to v .
857
858
859 Since 4.08
860
861
862
863 val set_uint16_ne : bytes -> int -> int -> unit
864
865
866 set_uint16_ne b i v sets b 's native-endian unsigned 16-bit integer
867 starting at byte index i to v .
868
869
870 Since 4.08
871
872
873
874 val set_uint16_be : bytes -> int -> int -> unit
875
876
877 set_uint16_be b i v sets b 's big-endian unsigned 16-bit integer start‐
878 ing at byte index i to v .
879
880
881 Since 4.08
882
883
884
885 val set_uint16_le : bytes -> int -> int -> unit
886
887
888 set_uint16_le b i v sets b 's little-endian unsigned 16-bit integer
889 starting at byte index i to v .
890
891
892 Since 4.08
893
894
895
896 val set_int16_ne : bytes -> int -> int -> unit
897
898
899 set_int16_ne b i v sets b 's native-endian signed 16-bit integer start‐
900 ing at byte index i to v .
901
902
903 Since 4.08
904
905
906
907 val set_int16_be : bytes -> int -> int -> unit
908
909
910 set_int16_be b i v sets b 's big-endian signed 16-bit integer starting
911 at byte index i to v .
912
913
914 Since 4.08
915
916
917
918 val set_int16_le : bytes -> int -> int -> unit
919
920
921 set_int16_le b i v sets b 's little-endian signed 16-bit integer start‐
922 ing at byte index i to v .
923
924
925 Since 4.08
926
927
928
929 val set_int32_ne : bytes -> int -> int32 -> unit
930
931
932 set_int32_ne b i v sets b 's native-endian 32-bit integer starting at
933 byte index i to v .
934
935
936 Since 4.08
937
938
939
940 val set_int32_be : bytes -> int -> int32 -> unit
941
942
943 set_int32_be b i v sets b 's big-endian 32-bit integer starting at byte
944 index i to v .
945
946
947 Since 4.08
948
949
950
951 val set_int32_le : bytes -> int -> int32 -> unit
952
953
954 set_int32_le b i v sets b 's little-endian 32-bit integer starting at
955 byte index i to v .
956
957
958 Since 4.08
959
960
961
962 val set_int64_ne : bytes -> int -> int64 -> unit
963
964
965 set_int64_ne b i v sets b 's native-endian 64-bit integer starting at
966 byte index i to v .
967
968
969 Since 4.08
970
971
972
973 val set_int64_be : bytes -> int -> int64 -> unit
974
975
976 set_int64_be b i v sets b 's big-endian 64-bit integer starting at byte
977 index i to v .
978
979
980 Since 4.08
981
982
983
984 val set_int64_le : bytes -> int -> int64 -> unit
985
986
987 set_int64_le b i v sets b 's little-endian 64-bit integer starting at
988 byte index i to v .
989
990
991 Since 4.08
992
993
994
995
996
997OCamldoc 2020-09-01 Stdlib.Bytes(3)