1Stdlib.Bytes(3) OCaml library Stdlib.Bytes(3)
2
3
4
6 Stdlib.Bytes - no description
7
9 Module Stdlib.Bytes
10
12 Module Bytes
13 : (module Stdlib__bytes)
14
15
16
17
18
19
20
21
22 val length : bytes -> int
23
24 Return the length (number of bytes) of the argument.
25
26
27
28 val get : bytes -> int -> char
29
30
31 get s n returns the byte at index n in argument s .
32
33 Raise Invalid_argument if n is not a valid index in s .
34
35
36
37 val set : bytes -> int -> char -> unit
38
39
40 set s n c modifies s in place, replacing the byte at index n with c .
41
42 Raise Invalid_argument if n is not a valid index in s .
43
44
45
46 val create : int -> bytes
47
48
49 create n returns a new byte sequence of length n . The sequence is
50 uninitialized and contains arbitrary bytes.
51
52 Raise Invalid_argument if n < 0 or n > Sys.max_string_length .
53
54
55
56 val make : int -> char -> bytes
57
58
59 make n c returns a new byte sequence of length n , filled with the byte
60 c .
61
62 Raise Invalid_argument if n < 0 or n > Sys.max_string_length .
63
64
65
66 val init : int -> (int -> char) -> bytes
67
68
69 Bytes.init n f returns a fresh byte sequence of length n , with charac‐
70 ter i initialized to the result of f i (in increasing index order).
71
72 Raise Invalid_argument if n < 0 or n > Sys.max_string_length .
73
74
75
76 val empty : bytes
77
78 A byte sequence of size 0.
79
80
81
82 val copy : bytes -> bytes
83
84 Return a new byte sequence that contains the same bytes as the argu‐
85 ment.
86
87
88
89 val of_string : string -> bytes
90
91 Return a new byte sequence that contains the same bytes as the given
92 string.
93
94
95
96 val to_string : bytes -> string
97
98 Return a new string that contains the same bytes as the given byte
99 sequence.
100
101
102
103 val sub : bytes -> int -> int -> bytes
104
105
106 sub s start len returns a new byte sequence of length len , containing
107 the subsequence of s that starts at position start and has length len .
108
109 Raise Invalid_argument if start and len do not designate a valid range
110 of s .
111
112
113
114 val sub_string : bytes -> int -> int -> string
115
116 Same as sub but return a string instead of a byte sequence.
117
118
119
120 val extend : bytes -> int -> int -> bytes
121
122
123 extend s left right returns a new byte sequence that contains the bytes
124 of s , with left uninitialized bytes prepended and right uninitialized
125 bytes appended to it. If left or right is negative, then bytes are
126 removed (instead of appended) from the corresponding side of s .
127
128 Raise Invalid_argument if the result length is negative or longer than
129 Sys.max_string_length bytes.
130
131
132
133 val fill : bytes -> int -> int -> char -> unit
134
135
136 fill s start len c modifies s in place, replacing len characters with c
137 , starting at start .
138
139 Raise Invalid_argument if start and len do not designate a valid range
140 of s .
141
142
143
144 val blit : bytes -> int -> bytes -> int -> int -> unit
145
146
147 blit src srcoff dst dstoff len copies len bytes from sequence src ,
148 starting at index srcoff , to sequence dst , starting at index dstoff .
149 It works correctly even if src and dst are the same byte sequence, and
150 the source and destination intervals overlap.
151
152 Raise Invalid_argument if srcoff and len do not designate a valid range
153 of src , or if dstoff and len do not designate a valid range of dst .
154
155
156
157 val blit_string : string -> int -> bytes -> int -> int -> unit
158
159
160 blit src srcoff dst dstoff len copies len bytes from string src ,
161 starting at index srcoff , to byte sequence dst , starting at index
162 dstoff .
163
164 Raise Invalid_argument if srcoff and len do not designate a valid range
165 of src , or if dstoff and len do not designate a valid range of dst .
166
167
168
169 val concat : bytes -> bytes list -> bytes
170
171
172 concat sep sl concatenates the list of byte sequences sl , inserting
173 the separator byte sequence sep between each, and returns the result as
174 a new byte sequence.
175
176 Raise Invalid_argument if the result is longer than
177 Sys.max_string_length bytes.
178
179
180
181 val cat : bytes -> bytes -> bytes
182
183
184 cat s1 s2 concatenates s1 and s2 and returns the result as new byte
185 sequence.
186
187 Raise Invalid_argument if the result is longer than
188 Sys.max_string_length bytes.
189
190
191
192 val iter : (char -> unit) -> bytes -> unit
193
194
195 iter f s applies function f in turn to all the bytes of s . It is
196 equivalent to f (get s 0); f (get s 1); ...; f (get s (length s - 1));
197 () .
198
199
200
201 val iteri : (int -> char -> unit) -> bytes -> unit
202
203 Same as Bytes.iter , but the function is applied to the index of the
204 byte as first argument and the byte itself as second argument.
205
206
207
208 val map : (char -> char) -> bytes -> bytes
209
210
211 map f s applies function f in turn to all the bytes of s (in increasing
212 index order) and stores the resulting bytes in a new sequence that is
213 returned as the result.
214
215
216
217 val mapi : (int -> char -> char) -> bytes -> bytes
218
219
220 mapi f s calls f with each character of s and its index (in increasing
221 index order) and stores the resulting bytes in a new sequence that is
222 returned as the result.
223
224
225
226 val trim : bytes -> bytes
227
228 Return a copy of the argument, without leading and trailing whitespace.
229 The bytes regarded as whitespace are the ASCII characters ' ' , '\012'
230 , '\n' , '\r' , and '\t' .
231
232
233
234 val escaped : bytes -> bytes
235
236 Return a copy of the argument, with special characters represented by
237 escape sequences, following the lexical conventions of OCaml. All
238 characters outside the ASCII printable range (32..126) are escaped, as
239 well as backslash and double-quote.
240
241 Raise Invalid_argument if the result is longer than
242 Sys.max_string_length bytes.
243
244
245
246 val index : bytes -> char -> int
247
248
249 index s c returns the index of the first occurrence of byte c in s .
250
251 Raise Not_found if c does not occur in s .
252
253
254
255 val index_opt : bytes -> char -> int option
256
257
258 index_opt s c returns the index of the first occurrence of byte c in s
259 or None if c does not occur in s .
260
261
262 Since 4.05
263
264
265
266 val rindex : bytes -> char -> int
267
268
269 rindex s c returns the index of the last occurrence of byte c in s .
270
271 Raise Not_found if c does not occur in s .
272
273
274
275 val rindex_opt : bytes -> char -> int option
276
277
278 rindex_opt s c returns the index of the last occurrence of byte c in s
279 or None if c does not occur in s .
280
281
282 Since 4.05
283
284
285
286 val index_from : bytes -> int -> char -> int
287
288
289 index_from s i c returns the index of the first occurrence of byte c in
290 s after position i . Bytes.index s c is equivalent to Bytes.index_from
291 s 0 c .
292
293 Raise Invalid_argument if i is not a valid position in s . Raise
294 Not_found if c does not occur in s after position i .
295
296
297
298 val index_from_opt : bytes -> int -> char -> int option
299
300
301 index_from _opts i c returns the index of the first occurrence of byte
302 c in s after position i or None if c does not occur in s after position
303 i . Bytes.index_opt s c is equivalent to Bytes.index_from_opt s 0 c .
304
305 Raise Invalid_argument if i is not a valid position in s .
306
307
308 Since 4.05
309
310
311
312 val rindex_from : bytes -> int -> char -> int
313
314
315 rindex_from s i c returns the index of the last occurrence of byte c in
316 s before position i+1 . rindex s c is equivalent to rindex_from s
317 (Bytes.length s - 1) c .
318
319 Raise Invalid_argument if i+1 is not a valid position in s . Raise
320 Not_found if c does not occur in s before position i+1 .
321
322
323
324 val rindex_from_opt : bytes -> int -> char -> int option
325
326
327 rindex_from_opt s i c returns the index of the last occurrence of byte
328 c in s before position i+1 or None if c does not occur in s before
329 position i+1 . rindex_opt s c is equivalent to rindex_from s
330 (Bytes.length s - 1) c .
331
332 Raise Invalid_argument if i+1 is not a valid position in s .
333
334
335 Since 4.05
336
337
338
339 val contains : bytes -> char -> bool
340
341
342 contains s c tests if byte c appears in s .
343
344
345
346 val contains_from : bytes -> int -> char -> bool
347
348
349 contains_from s start c tests if byte c appears in s after position
350 start . contains s c is equivalent to contains_from s 0 c .
351
352 Raise Invalid_argument if start is not a valid position in s .
353
354
355
356 val rcontains_from : bytes -> int -> char -> bool
357
358
359 rcontains_from s stop c tests if byte c appears in s before position
360 stop+1 .
361
362 Raise Invalid_argument if stop < 0 or stop+1 is not a valid position in
363 s .
364
365
366
367 val uppercase : bytes -> bytes
368
369 Deprecated. Functions operating on Latin-1 character set are depre‐
370 cated.
371
372
373 Return a copy of the argument, with all lowercase letters translated to
374 uppercase, including accented letters of the ISO Latin-1 (8859-1) char‐
375 acter set.
376
377
378
379 val lowercase : bytes -> bytes
380
381 Deprecated. Functions operating on Latin-1 character set are depre‐
382 cated.
383
384
385 Return a copy of the argument, with all uppercase letters translated to
386 lowercase, including accented letters of the ISO Latin-1 (8859-1) char‐
387 acter set.
388
389
390
391 val capitalize : bytes -> bytes
392
393 Deprecated. Functions operating on Latin-1 character set are depre‐
394 cated.
395
396
397 Return a copy of the argument, with the first character set to upper‐
398 case, using the ISO Latin-1 (8859-1) character set..
399
400
401
402 val uncapitalize : bytes -> bytes
403
404 Deprecated. Functions operating on Latin-1 character set are depre‐
405 cated.
406
407
408 Return a copy of the argument, with the first character set to lower‐
409 case, using the ISO Latin-1 (8859-1) character set..
410
411
412
413 val uppercase_ascii : bytes -> bytes
414
415 Return a copy of the argument, with all lowercase letters translated to
416 uppercase, using the US-ASCII character set.
417
418
419 Since 4.03.0
420
421
422
423 val lowercase_ascii : bytes -> bytes
424
425 Return a copy of the argument, with all uppercase letters translated to
426 lowercase, using the US-ASCII character set.
427
428
429 Since 4.03.0
430
431
432
433 val capitalize_ascii : bytes -> bytes
434
435 Return a copy of the argument, with the first character set to upper‐
436 case, using the US-ASCII character set.
437
438
439 Since 4.03.0
440
441
442
443 val uncapitalize_ascii : bytes -> bytes
444
445 Return a copy of the argument, with the first character set to lower‐
446 case, using the US-ASCII character set.
447
448
449 Since 4.03.0
450
451
452 type t = bytes
453
454
455 An alias for the type of byte sequences.
456
457
458
459 val compare : t -> t -> int
460
461 The comparison function for byte sequences, with the same specification
462 as compare . Along with the type t , this function compare allows the
463 module Bytes to be passed as argument to the functors Set.Make and
464 Map.Make .
465
466
467
468 val equal : t -> t -> bool
469
470 The equality function for byte sequences.
471
472
473 Since 4.03.0
474
475
476
477
478 Unsafe conversions (for advanced users)
479 This section describes unsafe, low-level conversion functions between
480 bytes and string . They do not copy the internal data; used improperly,
481 they can break the immutability invariant on strings provided by the
482 -safe-string option. They are available for expert library authors, but
483 for most purposes you should use the always-correct Bytes.to_string and
484 Bytes.of_string instead.
485
486 val unsafe_to_string : bytes -> string
487
488 Unsafely convert a byte sequence into a string.
489
490 To reason about the use of unsafe_to_string , it is convenient to con‐
491 sider an "ownership" discipline. A piece of code that manipulates some
492 data "owns" it; there are several disjoint ownership modes, including:
493
494 -Unique ownership: the data may be accessed and mutated
495
496 -Shared ownership: the data has several owners, that may only access
497 it, not mutate it.
498
499 Unique ownership is linear: passing the data to another piece of code
500 means giving up ownership (we cannot write the data again). A unique
501 owner may decide to make the data shared (giving up mutation rights on
502 it), but shared data may not become uniquely-owned again.
503
504
505 unsafe_to_string s can only be used when the caller owns the byte
506 sequence s -- either uniquely or as shared immutable data. The caller
507 gives up ownership of s , and gains ownership of the returned string.
508
509 There are two valid use-cases that respect this ownership discipline:
510
511 1. Creating a string by initializing and mutating a byte sequence that
512 is never changed after initialization is performed.
513
514
515 let string_init len f : string = let s = Bytes.create len in for i = 0
516 to len - 1 do Bytes.set s i (f i) done; Bytes.unsafe_to_string s
517
518 This function is safe because the byte sequence s will never be
519 accessed or mutated after unsafe_to_string is called. The string_init
520 code gives up ownership of s , and returns the ownership of the result‐
521 ing string to its caller.
522
523 Note that it would be unsafe if s was passed as an additional parameter
524 to the function f as it could escape this way and be mutated in the
525 future -- string_init would give up ownership of s to pass it to f ,
526 and could not call unsafe_to_string safely.
527
528 We have provided the String.init , String.map and String.mapi functions
529 to cover most cases of building new strings. You should prefer those
530 over to_string or unsafe_to_string whenever applicable.
531
532 2. Temporarily giving ownership of a byte sequence to a function that
533 expects a uniquely owned string and returns ownership back, so that we
534 can mutate the sequence again after the call ended.
535
536
537 let bytes_length (s : bytes) = String.length (Bytes.unsafe_to_string s)
538
539 In this use-case, we do not promise that s will never be mutated after
540 the call to bytes_length s . The String.length function temporarily
541 borrows unique ownership of the byte sequence (and sees it as a string
542 ), but returns this ownership back to the caller, which may assume that
543 s is still a valid byte sequence after the call. Note that this is only
544 correct because we know that String.length does not capture its argu‐
545 ment -- it could escape by a side-channel such as a memoization combi‐
546 nator.
547
548 The caller may not mutate s while the string is borrowed (it has tempo‐
549 rarily given up ownership). This affects concurrent programs, but also
550 higher-order functions: if String.length returned a closure to be
551 called later, s should not be mutated until this closure is fully
552 applied and returns ownership.
553
554
555
556 val unsafe_of_string : string -> bytes
557
558 Unsafely convert a shared string to a byte sequence that should not be
559 mutated.
560
561 The same ownership discipline that makes unsafe_to_string correct
562 applies to unsafe_of_string : you may use it if you were the owner of
563 the string value, and you will own the return bytes in the same mode.
564
565 In practice, unique ownership of string values is extremely difficult
566 to reason about correctly. You should always assume strings are shared,
567 never uniquely owned.
568
569 For example, string literals are implicitly shared by the compiler, so
570 you never uniquely own them.
571
572
573 let incorrect = Bytes.unsafe_of_string hello let s = Bytes.of_string
574 hello
575
576 The first declaration is incorrect, because the string literal hello
577 could be shared by the compiler with other parts of the program, and
578 mutating incorrect is a bug. You must always use the second version,
579 which performs a copy and is thus correct.
580
581 Assuming unique ownership of strings that are not string literals, but
582 are (partly) built from string literals, is also incorrect. For exam‐
583 ple, mutating unsafe_of_string ("foo" ^ s) could mutate the shared
584 string foo -- assuming a rope-like representation of strings. More gen‐
585 erally, functions operating on strings will assume shared ownership,
586 they do not preserve unique ownership. It is thus incorrect to assume
587 unique ownership of the result of unsafe_of_string .
588
589 The only case we have reasonable confidence is safe is if the produced
590 bytes is shared -- used as an immutable byte sequence. This is possibly
591 useful for incremental migration of low-level programs that manipulate
592 immutable sequences of bytes (for example Marshal.from_bytes ) and pre‐
593 viously used the string type for this purpose.
594
595
596
597
598 Iterators
599 val to_seq : t -> char Seq.t
600
601 Iterate on the string, in increasing index order. Modifications of the
602 string during iteration will be reflected in the iterator.
603
604
605 Since 4.07
606
607
608
609 val to_seqi : t -> (int * char) Seq.t
610
611 Iterate on the string, in increasing order, yielding indices along
612 chars
613
614
615 Since 4.07
616
617
618
619 val of_seq : char Seq.t -> t
620
621 Create a string from the generator
622
623
624 Since 4.07
625
626
627
628
629 Binary encoding/decoding of integers
630 The functions in this section binary encode and decode integers to and
631 from byte sequences.
632
633 All following functions raise Invalid_argument if the space needed at
634 index i to decode or encode the integer is not available.
635
636 Little-endian (resp. big-endian) encoding means that least (resp. most)
637 significant bytes are stored first. Big-endian is also known as net‐
638 work byte order. Native-endian encoding is either little-endian or
639 big-endian depending on Sys.big_endian .
640
641 32-bit and 64-bit integers are represented by the int32 and int64
642 types, which can be interpreted either as signed or unsigned numbers.
643
644 8-bit and 16-bit integers are represented by the int type, which has
645 more bits than the binary encoding. These extra bits are handled as
646 follows:
647
648 -Functions that decode signed (resp. unsigned) 8-bit or 16-bit integers
649 represented by int values sign-extend (resp. zero-extend) their result.
650
651 -Functions that encode 8-bit or 16-bit integers represented by int val‐
652 ues truncate their input to their least significant bytes.
653
654
655 val get_uint8 : bytes -> int -> int
656
657
658 get_uint8 b i is b 's unsigned 8-bit integer starting at byte index i .
659
660
661 Since 4.08
662
663
664
665 val get_int8 : bytes -> int -> int
666
667
668 get_int8 b i is b 's signed 8-bit integer starting at byte index i .
669
670
671 Since 4.08
672
673
674
675 val get_uint16_ne : bytes -> int -> int
676
677
678 get_uint16_ne b i is b 's native-endian unsigned 16-bit integer start‐
679 ing at byte index i .
680
681
682 Since 4.08
683
684
685
686 val get_uint16_be : bytes -> int -> int
687
688
689 get_uint16_be b i is b 's big-endian unsigned 16-bit integer starting
690 at byte index i .
691
692
693 Since 4.08
694
695
696
697 val get_uint16_le : bytes -> int -> int
698
699
700 get_uint16_le b i is b 's little-endian unsigned 16-bit integer start‐
701 ing at byte index i .
702
703
704 Since 4.08
705
706
707
708 val get_int16_ne : bytes -> int -> int
709
710
711 get_int16_ne b i is b 's native-endian signed 16-bit integer starting
712 at byte index i .
713
714
715 Since 4.08
716
717
718
719 val get_int16_be : bytes -> int -> int
720
721
722 get_int16_be b i is b 's big-endian signed 16-bit integer starting at
723 byte index i .
724
725
726 Since 4.08
727
728
729
730 val get_int16_le : bytes -> int -> int
731
732
733 get_int16_le b i is b 's little-endian signed 16-bit integer starting
734 at byte index i .
735
736
737 Since 4.08
738
739
740
741 val get_int32_ne : bytes -> int -> int32
742
743
744 get_int32_ne b i is b 's native-endian 32-bit integer starting at byte
745 index i .
746
747
748 Since 4.08
749
750
751
752 val get_int32_be : bytes -> int -> int32
753
754
755 get_int32_be b i is b 's big-endian 32-bit integer starting at byte
756 index i .
757
758
759 Since 4.08
760
761
762
763 val get_int32_le : bytes -> int -> int32
764
765
766 get_int32_le b i is b 's little-endian 32-bit integer starting at byte
767 index i .
768
769
770 Since 4.08
771
772
773
774 val get_int64_ne : bytes -> int -> int64
775
776
777 get_int64_ne b i is b 's native-endian 64-bit integer starting at byte
778 index i .
779
780
781 Since 4.08
782
783
784
785 val get_int64_be : bytes -> int -> int64
786
787
788 get_int64_be b i is b 's big-endian 64-bit integer starting at byte
789 index i .
790
791
792 Since 4.08
793
794
795
796 val get_int64_le : bytes -> int -> int64
797
798
799 get_int64_le b i is b 's little-endian 64-bit integer starting at byte
800 index i .
801
802
803 Since 4.08
804
805
806
807 val set_uint8 : bytes -> int -> int -> unit
808
809
810 set_uint8 b i v sets b 's unsigned 8-bit integer starting at byte index
811 i to v .
812
813
814 Since 4.08
815
816
817
818 val set_int8 : bytes -> int -> int -> unit
819
820
821 set_int8 b i v sets b 's signed 8-bit integer starting at byte index i
822 to v .
823
824
825 Since 4.08
826
827
828
829 val set_uint16_ne : bytes -> int -> int -> unit
830
831
832 set_uint16_ne b i v sets b 's native-endian unsigned 16-bit integer
833 starting at byte index i to v .
834
835
836 Since 4.08
837
838
839
840 val set_uint16_be : bytes -> int -> int -> unit
841
842
843 set_uint16_be b i v sets b 's big-endian unsigned 16-bit integer start‐
844 ing at byte index i to v .
845
846
847 Since 4.08
848
849
850
851 val set_uint16_le : bytes -> int -> int -> unit
852
853
854 set_uint16_le b i v sets b 's little-endian unsigned 16-bit integer
855 starting at byte index i to v .
856
857
858 Since 4.08
859
860
861
862 val set_int16_ne : bytes -> int -> int -> unit
863
864
865 set_int16_ne b i v sets b 's native-endian signed 16-bit integer start‐
866 ing at byte index i to v .
867
868
869 Since 4.08
870
871
872
873 val set_int16_be : bytes -> int -> int -> unit
874
875
876 set_int16_be b i v sets b 's big-endian signed 16-bit integer starting
877 at byte index i to v .
878
879
880 Since 4.08
881
882
883
884 val set_int16_le : bytes -> int -> int -> unit
885
886
887 set_int16_le b i v sets b 's little-endian signed 16-bit integer start‐
888 ing at byte index i to v .
889
890
891 Since 4.08
892
893
894
895 val set_int32_ne : bytes -> int -> int32 -> unit
896
897
898 set_int32_ne b i v sets b 's native-endian 32-bit integer starting at
899 byte index i to v .
900
901
902 Since 4.08
903
904
905
906 val set_int32_be : bytes -> int -> int32 -> unit
907
908
909 set_int32_be b i v sets b 's big-endian 32-bit integer starting at byte
910 index i to v .
911
912
913 Since 4.08
914
915
916
917 val set_int32_le : bytes -> int -> int32 -> unit
918
919
920 set_int32_le b i v sets b 's little-endian 32-bit integer starting at
921 byte index i to v .
922
923
924 Since 4.08
925
926
927
928 val set_int64_ne : bytes -> int -> int64 -> unit
929
930
931 set_int64_ne b i v sets b 's native-endian 64-bit integer starting at
932 byte index i to v .
933
934
935 Since 4.08
936
937
938
939 val set_int64_be : bytes -> int -> int64 -> unit
940
941
942 set_int64_be b i v sets b 's big-endian 64-bit integer starting at byte
943 index i to v .
944
945
946 Since 4.08
947
948
949
950 val set_int64_le : bytes -> int -> int64 -> unit
951
952
953 set_int64_le b i v sets b 's little-endian 64-bit integer starting at
954 byte index i to v .
955
956
957 Since 4.08
958
959
960
961
962
963OCamldoc 2019-07-30 Stdlib.Bytes(3)