1Lingua::EN::Inflect(3)User Contributed Perl DocumentationLingua::EN::Inflect(3)
2
3
4

NAME

6       Lingua::EN::Inflect - Convert singular to plural. Select "a" or "an".
7

VERSION

9       This document describes version 1.905 of Lingua::EN::Inflect
10

SYNOPSIS

12        use Lingua::EN::Inflect qw ( PL PL_N PL_V PL_ADJ NO NUM
13                         PL_eq PL_N_eq PL_V_eq PL_ADJ_eq
14                         A AN
15                         PART_PRES
16                         ORD NUMWORDS
17                         WORDLIST
18                         inflect classical
19                         def_noun def_verb def_adj def_a def_an );
20
21
22        # UNCONDITIONALLY FORM THE PLURAL
23
24             print "The plural of ", $word, " is ", PL($word), "\n";
25
26
27        # CONDITIONALLY FORM THE PLURAL
28
29             print "I saw $cat_count ", PL("cat",$cat_count), "\n";
30
31
32        # FORM PLURALS FOR SPECIFIC PARTS OF SPEECH
33
34             print PL_N("I",$N1), PL_V("saw",$N1),
35               PL_ADJ("my",$N2), PL_N("saw",$N2), "\n";
36
37
38        # DEAL WITH "0/1/N" -> "no/1/N" TRANSLATION:
39
40             print "There ", PL_V("was",$errors), NO(" error",$errors), "\n";
41
42
43        # USE DEFAULT COUNTS:
44
45             print NUM($N1,""), PL("I"), PL_V(" saw"), NUM($N2), PL_N(" saw");
46             print "There ", NUM($errors,''), PL_V("was"), NO(" error"), "\n";
47
48
49        # COMPARE TWO WORDS "NUMBER-INSENSITIVELY":
50
51             print "same\n"      if PL_eq($word1, $word2);
52             print "same noun\n" if PL_N_eq($word1, $word2);
53             print "same verb\n" if PL_V_eq($word1, $word2);
54             print "same adj.\n" if PL_ADJ_eq($word1, $word2);
55
56
57        # ADD CORRECT "a" OR "an" FOR A GIVEN WORD:
58
59             print "Did you want ", A($thing), " or ", AN($idea), "\n";
60
61
62        # CONVERT NUMERALS INTO ORDINALS (i.e. 1->1st, 2->2nd, 3->3rd, etc.)
63
64             print "It was", ORD($position), " from the left\n";
65
66        # CONVERT NUMERALS TO WORDS (i.e. 1->"one", 101->"one hundred and one", etc.)
67        # IN A SCALAR CONTEXT: GET BACK A SINGLE STRING...
68
69           $words = NUMWORDS(1234);      # "one thousand, two hundred and thirty-four"
70           $words = NUMWORDS(ORD(1234)); # "one thousand, two hundred and thirty-fourth"
71
72
73        # IN A LIST CONTEXT: GET BACK A LIST OF STRINGSi, ONE FOR EACH "CHUNK"...
74
75           @words = NUMWORDS(1234);    # ("one thousand","two hundred and thirty-four")
76
77
78        # OPTIONAL PARAMETERS CHANGE TRANSLATION:
79
80           $words = NUMWORDS(12345, group=>1);
81                       # "one, two, three, four, five"
82
83           $words = NUMWORDS(12345, group=>2);
84                       # "twelve, thirty-four, five"
85
86           $words = NUMWORDS(12345, group=>3);
87                       # "one twenty-three, forty-five"
88
89           $words = NUMWORDS(1234, 'and'=>'');
90                       # "one thousand, two hundred thirty-four"
91
92           $words = NUMWORDS(1234, 'and'=>', plus');
93                       # "one thousand, two hundred, plus thirty-four"
94
95           $words = NUMWORDS(555_1202, group=>1, zero=>'oh');
96                       # "five, five, five, one, two, oh, two"
97
98           $words = NUMWORDS(555_1202, group=>1, one=>'unity');
99                       # "five, five, five, unity, two, zero, two"
100
101           $words = NUMWORDS(123.456, group=>1, decimal=>'mark');
102                       # "one two three mark four five six"
103
104        # LITERAL STYLE ONLY NAMES NUMBERS LESS THAN A CERTAIN THRESHOLD...
105
106           $words = NUMWORDS(   9, threshold=>10);    # "nine"
107           $words = NUMWORDS(  10, threshold=>10);    # "ten"
108           $words = NUMWORDS(  11, threshold=>10);    # "11"
109           $words = NUMWORDS(1000, threshold=>10);    # "1,000"
110
111        # JOIN WORDS INTO A LIST:
112
113           $list = WORDLIST("apple", "banana", "carrot");
114                       # "apple, banana, and carrot"
115
116           $list = WORDLIST("apple", "banana");
117                       # "apple and banana"
118
119           $list = WORDLIST("apple", "banana", "carrot", {final_sep=>""});
120                       # "apple, banana and carrot"
121
122
123        # REQUIRE "CLASSICAL" PLURALS (EG: "focus"->"foci", "cherub"->"cherubim")
124
125             classical;          # USE ALL CLASSICAL PLURALS
126
127             classical 1;           #  USE ALL CLASSICAL PLURALS
128             classical 0;           #  USE ALL MODERN PLURALS (DEFAULT)
129
130             classical 'zero';      #  "no error" INSTEAD OF "no errors"
131             classical zero=>1;     #  "no error" INSTEAD OF "no errors"
132             classical zero=>0;     #  "no errors" INSTEAD OF "no error"
133
134             classical 'herd';      #  "2 buffalo" INSTEAD OF "2 buffalos"
135             classical herd=>1;     #  "2 buffalo" INSTEAD OF "2 buffalos"
136             classical herd=>0;     #  "2 buffalos" INSTEAD OF "2 buffalo"
137
138             classical 'persons';   # "2 chairpersons" INSTEAD OF "2 chairpeople"
139             classical persons=>1;  # "2 chairpersons" INSTEAD OF "2 chairpeople"
140             classical persons=>0;  # "2 chairpeople" INSTEAD OF "2 chairpersons"
141
142             classical 'ancient';   # "2 formulae" INSTEAD OF "2 formulas"
143             classical ancient=>1;  # "2 formulae" INSTEAD OF "2 formulas"
144             classical ancient=>0;  # "2 formulas" INSTEAD OF "2 formulae"
145
146
147
148        # INTERPOLATE "PL()", "PL_N()", "PL_V()", "PL_ADJ()", A()", "AN()"
149        # "NUM()" AND "ORD()" WITHIN STRINGS:
150
151             print inflect("The plural of $word is PL($word)\n");
152             print inflect("I saw $cat_count PL(cat,$cat_count)\n");
153             print inflect("PL(I,$N1) PL_V(saw,$N1) PL(a,$N2) PL_N(saw,$N2)\n");
154             print inflect("NUM($N1,)PL(I) PL_V(saw) NUM($N2,)PL(a) PL_N(saw)\n");
155             print inflect("I saw NUM($cat_count) PL(cat)\n");
156             print inflect("There PL_V(was,$errors) NO(error,$errors)\n");
157             print inflect("There NUM($errors,)PL_V(was) NO(error)\n");
158             print inflect("Did you want A($thing) or AN($idea)\n");
159             print inflect("It was ORD($position) from the left\n");
160
161
162        # ADD USER-DEFINED INFLECTIONS (OVERRIDING INBUILT RULES):
163
164             def_noun  "VAX"  => "VAXen";  # SINGULAR => PLURAL
165
166             def_verb  "will" => "shall",  # 1ST PERSON SINGULAR => PLURAL
167                       "will" => "will",   # 2ND PERSON SINGULAR => PLURAL
168                       "will" => "will";   # 3RD PERSON SINGULAR => PLURAL
169
170             def_adj   "hir"  => "their";  # SINGULAR => PLURAL
171
172             def_a     "h";                # "AY HALWAYS SEZ 'HAITCH'!"
173
174             def_an    "horrendous.*";     # "AN HORRENDOUS AFFECTATION"
175

DESCRIPTION

177       [Note: This module is strictly in maintenance mode now.  Take a look at
178       the newer Lingua::EN::Inflexion module, which offers a cleaner and more
179       convenient interface, has many more features (including
180       plural->singular inflexions), and is also much better tested.  If you
181       have existing code that relies on Lingua::EN::Inflect, see the section
182       of the documentation entitled "CONVERTING FROM LINGUA::EN::INFLECT". ]
183
184       The exportable subroutines of Lingua::EN::Inflect provide plural
185       inflections, "a"/"an" selection for English words, and manipulation of
186       numbers as words
187
188       Plural forms of all nouns, most verbs, and some adjectives are
189       provided. Where appropriate, "classical" variants (for example:
190       "brother" -> "brethren", "dogma" -> "dogmata", etc.) are also provided.
191
192       Pronunciation-based "a"/"an" selection is provided for all English
193       words, and most initialisms.
194
195       It is also possible to inflect numerals (1,2,3) to ordinals (1st, 2nd,
196       3rd) and to English words ("one", "two", "three).
197
198       In generating these inflections, Lingua::EN::Inflect follows the Oxford
199       English Dictionary and the guidelines in Fowler's Modern English Usage,
200       preferring the former where the two disagree.
201
202       The module is built around standard British spelling, but is designed
203       to cope with common American variants as well. Slang, jargon, and other
204       English dialects are not explicitly catered for.
205
206       Where two or more inflected forms exist for a single word (typically a
207       "classical" form and a "modern" form), Lingua::EN::Inflect prefers the
208       more common form (typically the "modern" one), unless "classical"
209       processing has been specified (see "MODERN VS CLASSICAL INFLECTIONS").
210

FORMING PLURALS

212   Inflecting Plurals
213       All of the "PL_..." plural inflection subroutines take the word to be
214       inflected as their first argument and return the corresponding
215       inflection.  Note that all such subroutines expect the singular form of
216       the word. The results of passing a plural form are undefined (and
217       unlikely to be correct).
218
219       The "PL_..." subroutines also take an optional second argument, which
220       indicates the grammatical "number" of the word (or of another word with
221       which the word being inflected must agree). If the "number" argument is
222       supplied and is not 1 (or "one" or "a", or some other adjective that
223       implies the singular), the plural form of the word is returned. If the
224       "number" argument does indicate singularity, the (uninflected) word
225       itself is returned. If the number argument is omitted, the plural form
226       is returned unconditionally.
227
228       The various subroutines are:
229
230       "PL_N($;$)"
231               The exportable subroutine "PL_N()" takes a singular English
232               noun or pronoun and returns its plural. Pronouns in the
233               nominative ("I" -> "we") and accusative ("me" -> "us") cases
234               are handled, as are possessive pronouns ("mine" -> "ours").
235
236       "PL_V($;$)"
237               The exportable subroutine "PL_V()" takes the singular form of a
238               conjugated verb (that is, one which is already in the correct
239               "person" and "mood") and returns the corresponding plural
240               conjugation.
241
242       "PL_ADJ($;$)"
243               The exportable subroutine "PL_ADJ()" takes the singular form of
244               certain types of adjectives and returns the corresponding
245               plural form.  Adjectives that are correctly handled include:
246               "numerical" adjectives ("a" -> "some"), demonstrative
247               adjectives ("this" -> "these", "that" -> "those"), and
248               possessives ("my" -> "our", "cat's" -> "cats'", "child's" ->
249               "childrens'", etc.)
250
251       "PL($;$)"
252               The exportable subroutine "PL()" takes a singular English noun,
253               pronoun, verb, or adjective and returns its plural form. Where
254               a word has more than one inflection depending on its part of
255               speech (for example, the noun "thought" inflects to "thoughts",
256               the verb "thought" to "thought"), the (singular) noun sense is
257               preferred to the (singular) verb sense.
258
259               Hence "PL("knife")" will return "knives" ("knife" having been
260               treated as a singular noun), whereas "PL("knifes")" will return
261               "knife" ("knifes" having been treated as a 3rd person singular
262               verb).
263
264               The inherent ambiguity of such cases suggests that, where the
265               part of speech is known, "PL_N", "PL_V", and "PL_ADJ" should be
266               used in preference to "PL".
267
268       Note that all these subroutines ignore any whitespace surrounding the
269       word being inflected, but preserve that whitespace when the result is
270       returned. For example, "PL(" cat  ")" returns " cats  ".
271
272   Numbered plurals
273       The "PL_..." subroutines return only the inflected word, not the count
274       that was used to inflect it. Thus, in order to produce "I saw 3 ducks",
275       it is necessary to use:
276
277           print "I saw $N ", PL_N($animal,$N), "\n";
278
279       Since the usual purpose of producing a plural is to make it agree with
280       a preceding count, Lingua::EN::Inflect provides an exportable
281       subroutine ("NO($;$)") which, given a word and a(n optional) count,
282       returns the count followed by the correctly inflected word. Hence the
283       previous example can be rewritten:
284
285           print "I saw ", NO($animal,$N), "\n";
286
287       In addition, if the count is zero (or some other term which implies
288       zero, such as "zero", "nil", etc.) the count is replaced by the word
289       "no". Hence, if $N had the value zero, the previous example would print
290       the somewhat more elegant:
291
292           I saw no animals
293
294       rather than:
295
296           I saw 0 animals
297
298       Note that the name of the subroutine is a pun: the subroutine returns
299       either a number (a No.) or a "no", in front of the inflected word.
300
301       Wordy and comma'd plurals
302
303       The "NO()" subroutine takes an optional third argument: a hash of named
304       options that configure its behaviour.
305
306       The 'words_below' option informs "NO()" what other numbers (i.e.  apart
307       from zero) it should convert to words. For example:S
308
309           for my $count (0..12) {
310               print NO('cat', $count, {words_below => 10}), "\n";
311           }
312
313       would print:
314
315           no cats
316           one cat
317           two cats
318           three cats
319           four cats
320           five cats
321           six cats
322           seven cats
323           eight cats
324           nine cats
325           10 cats
326           11 cats
327           12 cats
328
329       The 'comma' and 'comma_every' options determine whether or not the
330       numbers produced by "NO()" have commas in them. That is:
331
332           2001 space odysseys
333
334       versus:
335
336           2,001 space odysseys
337
338       Normally, numbers are produced without commas, but if 'comma' or
339       'comma_every' is specified, then commas are added as requested.
340
341       The 'comma' option specifies which character to use as a comma.  It
342       defaults to ',', but may be set to anything convenient:
343
344           print NO('Euro', $amount, {comma=>'.'});
345
346           # prints:  1.000.000 Euros
347
348       The 'comma_every' option specifies how many characters between commas.
349       It defaults to 3, but may be set to any positive number:
350
351           print NO('Euro', $amount, {comma_every=>4});
352
353           # prints:  100,0000 Euros
354
355       Note that you can set both options at once, if you wish:
356
357           print NO('Euro', $amount, {comma_every=>2, comma=>'_'});
358
359           # prints:  1_00_00_00 Euros
360
361   Reducing the number of counts required
362       In some contexts, the need to supply an explicit count to the various
363       "PL_..." subroutines makes for tiresome repetition. For example:
364
365           print PL_ADJ("This",$errors), PL_N(" error",$errors),
366                 PL_V(" was",$errors), " fatal.\n";
367
368       Lingua::EN::Inflect therefore provides an exportable subroutine
369       ("NUM($;$)") that may be used to set a persistent "default number"
370       value. If such a value is set, it is subsequently used whenever an
371       optional second "number" argument is omitted. The default value thus
372       set can subsequently be removed by calling "NUM()" with no arguments.
373       Hence we could rewrite the previous example:
374
375           NUM($errors);
376           print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n";
377           NUM();
378
379       Normally, "NUM()" returns its first argument, so that it may also be
380       "inlined" in contexts like:
381
382           print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n"
383           print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
384               if $severity > 1;
385
386       However, in certain contexts (see "INTERPOLATING INFLECTIONS IN
387       STRINGS") it is preferable that "NUM()" return an empty string. Hence
388       "NUM()" provides an optional second argument. If that argument is
389       supplied (that is, if it is defined) and evaluates to false, "NUM"
390       returns an empty string instead of its first argument. For example:
391
392           print NUM($errors,0), NO("error"), PL_V(" was"), " detected.\n";
393           print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
394               if $severity > 1;
395
396   Number-insensitive equality
397       Lingua::EN::Inflect also provides a solution to the problem of
398       comparing words of differing plurality through the exportable
399       subroutines "PL_eq($$)", "PL_N_eq($$)", "PL_V_eq($$)", and
400       "PL_ADJ_eq($$)".  Each  of these subroutines takes two strings, and
401       compares them using the corresponding plural-inflection subroutine
402       ("PL()", "PL_N()", "PL_V()", and "PL_ADJ()" respectively).
403
404       The comparison returns true if:
405
406       •       the strings are "eq"-equal, or
407
408       •       one string is "eq"-equal to a plural form of the other, or
409
410       •       the strings are two different plural forms of the one word.
411
412       Hence all of the following return true:
413
414           PL_eq("index","index")      # RETURNS "eq"
415           PL_eq("index","indexes")    # RETURNS "s:p"
416           PL_eq("index","indices")    # RETURNS "s:p"
417           PL_eq("indexes","index")    # RETURNS "p:s"
418           PL_eq("indices","index")    # RETURNS "p:s"
419           PL_eq("indices","indexes")  # RETURNS "p:p"
420           PL_eq("indexes","indices")  # RETURNS "p:p"
421           PL_eq("indices","indices")  # RETURNS "eq"
422
423       As indicated by the comments in the previous example, the actual value
424       returned by the various "PL_eq" subroutines encodes which of the three
425       equality rules succeeded: "eq" is returned if the strings were
426       identical, "s:p" if the strings were singular and plural respectively,
427       "p:s" for plural and singular, and "p:p" for two distinct plurals.
428       Inequality is indicated by returning an empty string.
429
430       It should be noted that two distinct singular words which happen to
431       take the same plural form are not considered equal, nor are cases where
432       one (singular) word's plural is the other (plural) word's singular.
433       Hence all of the following return false:
434
435           PL_eq("base","basis")       # ALTHOUGH BOTH -> "bases"
436           PL_eq("syrinx","syringe")   # ALTHOUGH BOTH -> "syringes"
437           PL_eq("she","he")       # ALTHOUGH BOTH -> "they"
438
439           PL_eq("opus","operas")      # ALTHOUGH "opus" -> "opera" -> "operas"
440           PL_eq("taxi","taxes")       # ALTHOUGH "taxi" -> "taxis" -> "taxes"
441
442       Note too that, although the comparison is "number-insensitive" it is
443       not case-insensitive (that is, "PL("time","Times")" returns false. To
444       obtain both number and case insensitivity, prefix both arguments with
445       "lc" (that is, "PL(lc "time", lc "Times")" returns true).
446

OTHER VERB FORMS

448   Present participles
449       "Lingua::EN::Inflect" also provides the "PART_PRES" subroutine, which
450       can take a 3rd person singular verb and correctly inflect it to its
451       present participle:
452
453           PART_PRES("runs")   # "running"
454           PART_PRES("loves")  # "loving"
455           PART_PRES("eats")   # "eating"
456           PART_PRES("bats")   # "batting"
457           PART_PRES("spies")  # "spying"
458

PROVIDING INDEFINITE ARTICLES

460   Selecting indefinite articles
461       Lingua::EN::Inflect provides two exportable subroutines ("A($;$)" and
462       "AN($;$)") which will correctly prepend the appropriate indefinite
463       article to a word, depending on its pronunciation. For example:
464
465           A("cat")        # -> "a cat"
466           AN("cat")       # -> "a cat"
467           A("euphemism")      # -> "a euphemism"
468           A("Euler number")   # -> "an Euler number"
469           A("hour")       # -> "an hour"
470           A("houri")      # -> "a houri"
471
472       The two subroutines are identical in function and may be used
473       interchangeably. The only reason that two versions are provided is to
474       enhance the readability of code such as:
475
476           print "That is ", AN($errortype), " error\n;
477           print "That is ", A($fataltype), " fatal error\n;
478
479       Note that in both cases the actual article provided depends only on the
480       pronunciation of the first argument, not on the name of the subroutine.
481
482       "A()" and "AN()" will ignore any indefinite article that already exists
483       at the start of the string. Thus:
484
485           @half_arked = (
486               "a elephant",
487               "a giraffe",
488               "an ewe",
489               "a orangutan",
490           );
491
492           print A($_), "\n" for @half_arked;
493
494           # prints:
495           #     an elephant
496           #     a giraffe
497           #     a ewe
498           #     an orangutan
499
500       "A()" and "AN()" both take an optional second argument. As with the
501       "PL_..." subroutines, this second argument is a "number" specifier. If
502       its value is 1 (or some other value implying singularity), "A()" and
503       "AN()" insert "a" or "an" as appropriate. If the number specifier
504       implies plurality, ("A()" and "AN()" insert the actual second argument
505       instead.  For example:
506
507           A("cat",1)      # -> "a cat"
508           A("cat",2)      # -> "2 cat"
509           A("cat","one")      # -> "one cat"
510           A("cat","no")       # -> "no cat"
511
512       Note that, as implied by the previous examples, "A()" and "AN()" both
513       assume that their job is merely to provide the correct qualifier for a
514       word (that is: "a", "an", or the specified count).  In other words,
515       they assume that the word they are given has already been correctly
516       inflected for plurality. Hence, if $N has the value 2, then:
517
518             print A("cat",$N);
519
520       prints "2 cat", instead of "2 cats". The correct approach is to use:
521
522             print A(PL("cat",$N),$N);
523
524       or, better still:
525
526             print NO("cat",$N);
527
528       Note too that, like the various "PL_..." subroutines, whenever "A()"
529       and "AN()" are called with only one argument they are subject to the
530       effects of any preceding call to "NUM()". Hence, another possible
531       solution is:
532
533             NUM($N);
534             print A(PL("cat"));
535
536   Indefinite articles and initialisms
537       "Initialisms" (sometimes inaccurately called "acronyms") are terms
538       which have been formed from the initial letters of words in a phrase
539       (for example, "NATO", "NBL", "S.O.S.", "SCUBA", etc.)
540
541       Such terms present a particular challenge when selecting between "a"
542       and "an", since they are sometimes pronounced as if they were a single
543       word ("nay-tow", "sku-ba") and sometimes as a series of letter names
544       ("en-eff-ell", "ess-oh-ess").
545
546       "A()" and "AN()" cope with this dichotomy using a series of inbuilt
547       rules, which may be summarized as:
548
549       1.      If the word starts with a single letter, followed by a period
550               or dash (for example, "R.I.P.", "C.O.D.", "e-mail", "X-ray",
551               "T-square"), then choose the appropriate article for the sound
552               of the first letter ("an R.I.P.", "a C.O.D.", "an e-mail", "an
553               X-ray", "a T-square").
554
555       2.      If the first two letters of the word are capitals, consonants,
556               and do not appear at the start of any known English word, (for
557               example, "LCD", "XML", "YWCA"), then once again choose "a" or
558               "an" depending on the sound of the first letter ("an LCD", "an
559               XML", "a YWCA").
560
561       3.      Otherwise, assume the string is a capitalized word or a
562               pronounceable initialism (for example, "LED", "OPEC", "FAQ",
563               "UNESCO"), and therefore takes "a" or "an" according to the
564               (apparent) pronunciation of the entire word ("a LED", "an
565               OPEC", "a FAQ", "a UNESCO").
566
567       Note that rules 1 and 3 together imply that the presence or absence of
568       punctuation may change the selection of indefinite article for a
569       particular initialism (for example, "a FAQ" but "an F.A.Q.").
570
571   Indefinite articles and "soft H's"
572       Words beginning in the letter 'H' present another type of difficulty
573       when selecting a suitable indefinite article. In a few such words (for
574       example, "hour", "honour", "heir") the 'H' is not voiced at all, and so
575       such words inflect with "an". The remaining cases ("voiced H's") may be
576       divided into two categories: "hard H's" (such as "hangman",
577       "holograph", "hat", etc.) and "soft H's" (such as "hysterical",
578       "horrendous", "holy", etc.)
579
580       Hard H's always take "a" as their indefinite article, and soft H's
581       normally do so as well. But some English speakers prefer "an" for soft
582       H's (although the practice is now generally considered an affectation,
583       rather than a legitimate grammatical alternative).
584
585       At present, the "A()" and "AN()" subroutines ignore soft H's and use
586       "a" for any voiced 'H'. The author would, however, welcome feedback on
587       this decision (envisaging a possible future "soft H" mode).
588

INFLECTING ORDINALS

590       Occasionally it is useful to present an integer value as an ordinal
591       rather than as a numeral. For example:
592
593           Enter password (1st attempt): ********
594           Enter password (2nd attempt): *********
595           Enter password (3rd attempt): *********
596           No 4th attempt. Access denied.
597
598       To this end, Lingua::EN::Inflect provides the "ORD()" subroutine.
599       <ORD()> takes a single argument and forms its ordinal equivalent.  If
600       the argument isn't a numerical integer, it just adds "-th".
601

CONVERTING NUMBERS TO WORDS

603       The exportable subroutine "NUMWORDS" takes a number (cardinal or
604       ordinal) and returns an English representation of that number. In a
605       scalar context a string is returned. Hence:
606
607           use Lingua::EN::Inflect qw( NUMWORDS );
608
609           $words = NUMWORDS(1234567);
610
611       puts the string:
612
613           "one million, two hundred and thirty-four thousand, five hundred and sixty-seven"
614
615       into $words.
616
617       In a list context each comma-separated chunk is returned as a separate
618       element.  Hence:
619
620           @words = NUMWORDS(1234567);
621
622       puts the list:
623
624           ("one million",
625            "two hundred and thirty-four thousand",
626            "five hundred and sixty-seven")
627
628       into @words.
629
630       Note that this also means that:
631
632           print NUMWORDS(1234567);
633
634       will (misprint) print:
635
636           one milliontwo hundred and thirty-four thousandfive hundred and sixty-seven
637
638       To get readable output, make sure the call in in scalar context:
639
640           print scalar NUMWORDS(1234567);
641
642       Non-digits (apart from an optional leading plus or minus sign, any
643       decimal points, and ordinal suffixes -- see below) are silently
644       ignored, so the following all produce identical results:
645
646               NUMWORDS(5551202);
647               NUMWORDS(5_551_202);
648               NUMWORDS("5,551,202");
649               NUMWORDS("555-1202");
650
651       That last case is a little awkward since it's almost certainly a phone
652       number, and "five million, five hundred and fifty-one thousand, two
653       hundred and two" probably isn't what's wanted.
654
655       To overcome this, "NUMWORDS()" takes an optional named argument,
656       'group', which changes how numbers are translated. The argument must be
657       a positive integer less than four, which indicated how the digits of
658       the number are to be grouped. If the argument is 1, then each digit is
659       translated separately. If the argument is 2, pairs of digits (starting
660       from the left) are grouped together. If the argument is 3, triples of
661       numbers (again, from the left) are grouped. Hence:
662
663               NUMWORDS("555-1202", group=>1)
664
665       returns "five, five, five, one, two, zero, two", whilst:
666
667               NUMWORDS("555-1202", group=>2)
668
669       returns "fifty-five, fifty-one, twenty, two", and:
670
671               NUMWORDS("555-1202", group=>3)
672
673       returns "five fifty-five, one twenty, two".
674
675       Phone numbers are often written in words as
676       "five..five..five..one..two..zero..two", which is also easy to achieve:
677
678               join '..', NUMWORDS("555-1202", group=>1)
679
680       "NUMWORDS" also handles decimal fractions. Hence:
681
682               NUMWORDS("1.2345")
683
684       returns "one point two three four five" in a scalar context and
685       "("one","point","two","three","four","five")") in an array context.
686       Exponent form ("1.234e56") is not yet handled.
687
688       Multiple decimal points are only translated in one of the "grouping"
689       modes.  Hence:
690
691               NUMWORDS(101.202.303)
692
693       returns "one hundred and one point two zero two three zero three",
694       whereas:
695
696               NUMWORDS(101.202.303, group=>1)
697
698       returns "one zero one point two zero two point three zero three".
699
700       The digit '0' is unusual in that in may be translated to English as
701       "zero", "oh", or "nought". To cater for this diversity, "NUMWORDS" may
702       be passed a named argument, 'zero', which may be set to the desired
703       translation of '0'. For example:
704
705               print join "..", NUMWORDS("555-1202", group=>3, zero=>'oh')
706
707       prints "five..five..five..one..two..oh..two".  By default, zero is
708       rendered as "zero".
709
710       Likewise, the digit '1' may be rendered as "one" or "a/an" (or very
711       occasionally other variants), depending on the context. So there is a
712       'one' argument as well:
713
714               print NUMWORDS($_, one=>'a solitary', zero=>'no more'),
715                     PL(" bottle of beer on the wall\n", $_)
716                          for (3,2,1,0);
717
718               # prints:
719               #     three bottles of beer on the wall
720               #     two bottles of beer on the wall
721               #     a solitary bottle of beer on the wall
722               #     no more bottles of beer on the wall
723
724       Care is needed if the word "a/an" is to be used as a 'one' value.
725       Unless the next word is known in advance, it's almost always necessary
726       to use the "A" function as well:
727
728               print A( NUMWORDS(1, one=>'a') . " $_\n")
729                for qw(cat aardvark ewe hour);
730
731           # prints:
732           #     a cat
733           #     an aardvark
734           #     a ewe
735           #     an hour
736
737       Another major regional variation in number translation is the use of
738       "and" in certain contexts. The named argument 'and' allows the
739       programmer to specify how "and" should be handled. Hence:
740
741               print scalar NUMWORDS("765", 'and'=>'')
742
743       prints "seven hundred sixty-five", instead of "seven hundred and sixty-
744       five".  By default, the "and" is included.
745
746       The translation of the decimal point is also subject to variation (with
747       "point", "dot", and "decimal" being the favorites).  The named argument
748       'decimal' allows the programmer to how the decimal point should be
749       rendered. Hence:
750
751               print scalar NUMWORDS("666.124.64.101", group=>3, decimal=>'dot')
752
753       prints "six sixty-six, dot, one twenty-four, dot, sixty-four, dot, one
754       zero one" By default, the decimal point is rendered as "point".
755
756       "NUMWORDS" also handles the ordinal forms of numbers. So:
757
758               print scalar NUMWORDS('1st');
759               print scalar NUMWORDS('3rd');
760               print scalar NUMWORDS('202nd');
761               print scalar NUMWORDS('1000000th');
762
763       print:
764
765               first
766               third
767               two hundred and twenty-second
768               one millionth
769
770       Two common idioms in this regard are:
771
772               print scalar NUMWORDS(ORD($number));
773
774       and:
775
776               print scalar ORD(NUMWORDS($number));
777
778       These are identical in effect, except when $number contains a decimal:
779
780               $number = 99.09;
781               print scalar NUMWORDS(ORD($number));    # ninety-ninth point zero nine
782               print scalar ORD(NUMWORDS($number));    # ninety-nine point zero ninth
783
784       Use whichever you feel is most appropriate.
785

CONVERTING LISTS OF WORDS TO PHRASES

787       When creating a list of words, commas are used between adjacent items,
788       except if the items contain commas, in which case semicolons are used.
789       But if there are less than two items, the commas/semicolons are omitted
790       entirely. The final item also has a conjunction (usually "and" or "or")
791       before it. And although it's technically incorrect (and sometimes
792       misleading), some people prefer to omit the comma before that final
793       conjunction, even when there are more than two items.
794
795       That's complicated enough to warrant its own subroutine: "WORDLIST()".
796       This subroutine expects a list of words, possibly with one or more hash
797       references containing options. It returns a string that joins the list
798       together in the normal English usage. For example:
799
800           print "You chose ", WORDLIST(@selected_items), "\n";
801           # You chose barley soup, roast beef, and Yorkshire pudding
802
803           print "You chose ", WORDLIST(@selected_items, {final_sep=>""}), "\n";
804           # You chose barley soup, roast beef and Yorkshire pudding
805
806           print "Please chose ", WORDLIST(@side_orders, {conj=>"or"}), "\n";
807           # Please chose salad, vegetables, or ice-cream
808
809       The available options are:
810
811           Option named    Specifies                Default value
812
813           conj            Final conjunction        "and"
814           sep             Inter-item separator     ","
815           last_sep        Final separator          value of 'sep' option
816

INTERPOLATING INFLECTIONS IN STRINGS

818       By far the commonest use of the inflection subroutines is to produce
819       message strings for various purposes. For example:
820
821               print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n";
822               print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
823                       if $severity > 1;
824
825       Unfortunately the need to separate each subroutine call detracts
826       significantly from the readability of the resulting code. To ameliorate
827       this problem, Lingua::EN::Inflect provides an exportable string-
828       interpolating subroutine (inflect($)), which recognizes calls to the
829       various inflection subroutines within a string and interpolates them
830       appropriately.
831
832       Using "inflect" the previous example could be rewritten:
833
834               print inflect "NUM($errors) PL_N(error) PL_V(was) detected.\n";
835               print inflect "PL_ADJ(This) PL_N(error) PL_V(was) fatal.\n"
836                       if $severity > 1;
837
838       Note that "inflect" also correctly handles calls to the "NUM()"
839       subroutine (whether interpolated or antecedent). The "inflect()"
840       subroutine has a related extra feature, in that it automatically
841       cancels any "default number" value before it returns its interpolated
842       string. This means that calls to "NUM()" which are embedded in an
843       "inflect()"-interpolated string do not "escape" and interfere with
844       subsequent inflections.
845

MODERN VS CLASSICAL INFLECTIONS

847       Certain words, mainly of Latin or Ancient Greek origin, can form
848       plurals either using the standard English "-s" suffix, or with their
849       original Latin or Greek inflections. For example:
850
851               PL("stigma")            # -> "stigmas" or "stigmata"
852               PL("torus")             # -> "toruses" or "tori"
853               PL("index")             # -> "indexes" or "indices"
854               PL("millennium")        # -> "millenniums" or "millennia"
855               PL("ganglion")          # -> "ganglions" or "ganglia"
856               PL("octopus")           # -> "octopuses" or "octopodes"
857
858       Lingua::EN::Inflect caters to such words by providing an "alternate
859       state" of inflection known as "classical mode".  By default, words are
860       inflected using their contemporary English plurals, but if classical
861       mode is invoked, the more traditional plural forms are returned
862       instead.
863
864       The exportable subroutine "classical()" controls this feature.  If
865       "classical()" is called with no arguments, it unconditionally invokes
866       classical mode. If it is called with a single argument, it turns all
867       classical inflects on or off (depending on whether the argument is true
868       or false). If called with two or more arguments, those arguments
869       specify which aspects of classical behaviour are to be used.
870
871       Thus:
872
873               classical;                  # SWITCH ON CLASSICAL MODE
874               print PL("formula");        # -> "formulae"
875
876               classical 0;                # SWITCH OFF CLASSICAL MODE
877               print PL("formula");        # -> "formulas"
878
879               classical $cmode;           # CLASSICAL MODE IFF $cmode
880               print PL("formula");        # -> "formulae" (IF $cmode)
881                                           # -> "formulas" (OTHERWISE)
882
883               classical herd=>1;          # SWITCH ON CLASSICAL MODE FOR "HERD" NOUNS
884               print PL("wilderbeest");    # -> "wilderbeest"
885
886               classical names=>1;         # SWITCH ON CLASSICAL MODE FOR NAMES
887               print PL("sally");          # -> "sallies"
888               print PL("Sally");          # -> "Sallys"
889
890       Note however that "classical()" has no effect on the inflection of
891       words which are now fully assimilated. Hence:
892
893               PL("forum")             # ALWAYS -> "forums"
894               PL("criterion")         # ALWAYS -> "criteria"
895
896       LEI assumes that a capitalized word is a person's name. So it forms the
897       plural according to the rules for names (which is that you don't
898       inflect, you just add -s or -es). You can choose to turn that behaviour
899       off (it's on by the default, even when the module isn't in classical
900       mode) by calling "classical(names=>0)".
901

USER-DEFINED INFLECTIONS

903   Adding plurals at run-time
904       Lingua::EN::Inflect provides five exportable subroutines which allow
905       the programmer to override the module's behaviour for specific cases:
906
907       "def_noun($$)"
908               The "def_noun" subroutine takes a pair of string arguments: the
909               singular and plural forms of the noun being specified. The
910               singular form specifies a pattern to be interpolated (as
911               "m/^(?:$first_arg)$/i").  Any noun matching this pattern is
912               then replaced by the string in the second argument. The second
913               argument specifies a string which is interpolated after the
914               match succeeds, and is then used as the plural form. For
915               example:
916
917                     def_noun  'cow'        => 'kine';
918                     def_noun  '(.+i)o'     => '$1i';
919                     def_noun  'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!';
920
921               Note that both arguments should usually be specified in single
922               quotes, so that they are not interpolated when they are
923               specified, but later (when words are compared to them). As
924               indicated by the last example, care also needs to be taken with
925               certain characters in the second argument, to ensure that they
926               are not unintentionally interpolated during comparison.
927
928               The second argument string may also specify a second variant of
929               the plural form, to be used when "classical" plurals have been
930               requested. The beginning of the second variant is marked by a
931               '|' character:
932
933                     def_noun  'cow'        => 'cows|kine';
934                     def_noun  '(.+i)o'     => '$1os|$1i';
935                     def_noun  'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!|varmints';
936
937               If no classical variant is given, the specified plural form is
938               used in both normal and "classical" modes.
939
940               If the second argument is "undef" instead of a string, then the
941               current user definition for the first argument is removed, and
942               the standard plural inflection(s) restored.
943
944               Note that in all cases, later plural definitions for a
945               particular singular form replace earlier definitions of the
946               same form. For example:
947
948                     # FIRST, HIDE THE MODERN FORM....
949                     def_noun  'aviatrix' => 'aviatrices';
950
951                     # LATER, HIDE THE CLASSICAL FORM...
952                     def_noun  'aviatrix' => 'aviatrixes';
953
954                     # FINALLY, RESTORE THE DEFAULT BEHAVIOUR...
955                     def_noun  'aviatrix' => undef;
956
957               Special care is also required when defining general patterns
958               and associated specific exceptions: put the more specific cases
959               after the general pattern. For example:
960
961                     def_noun  '(.+)us' => '$1i';      # EVERY "-us" TO "-i"
962                     def_noun  'bus'    => 'buses';    # EXCEPT FOR "bus"
963
964               This "try-most-recently-defined-first" approach to matching
965               user-defined words is also used by "def_verb", "def_a" and
966               "def_an".
967
968       "def_verb($$$$$$)"
969               The "def_verb" subroutine takes three pairs of string arguments
970               (that is, six arguments in total), specifying the singular and
971               plural forms of the three "persons" of verb. As with
972               "def_noun", the singular forms are specifications of run-time-
973               interpolated patterns, whilst the plural forms are
974               specifications of (up to two) run-time-interpolated strings:
975
976                      def_verb 'am'       => 'are',
977                               'are'      => 'are|art',
978                               'is'       => 'are';
979
980                      def_verb 'have'     => 'have',
981                               'have'     => 'have',
982                               'ha(s|th)' => 'have';
983
984               Note that as with "def_noun", modern/classical variants of
985               plurals may be separately specified, subsequent definitions
986               replace previous ones, and "undef"'ed plural forms revert to
987               the standard behaviour.
988
989       "def_adj($$)"
990               The "def_adj" subroutine takes a pair of string arguments,
991               which specify the singular and plural forms of the adjective
992               being defined.  As with "def_noun" and "def_adj", the singular
993               forms are specifications of run-time-interpolated patterns,
994               whilst the plural forms are specifications of (up to two) run-
995               time-interpolated strings:
996
997                      def_adj  'this'     => 'these',
998                      def_adj  'red'      => 'red|gules',
999
1000               As previously, modern/classical variants of plurals may be
1001               separately specified, subsequent definitions replace previous
1002               ones, and "undef"'ed plural forms revert to the standard
1003               behaviour.
1004
1005       def_a($) and def_an($)
1006               The "def_a" and "def_an" subroutines each take a single
1007               argument, which specifies a pattern. If a word passed to "A()"
1008               or "AN()" matches this pattern, it will be prefixed
1009               (unconditionally) with the corresponding indefinite article.
1010               For example:
1011
1012                     def_a  'error';
1013                     def_a  'in.+';
1014
1015                     def_an 'mistake';
1016                     def_an 'error';
1017
1018               As with the other "def_..." subroutines, such redefinitions are
1019               sequential in effect so that, after the above example, "error"
1020               will be inflected with "an".
1021
1022   The $HOME/.inflectrc file
1023       When it is imported, Lingua::EN::Inflect executes (as Perl code) the
1024       contents of any file named .inflectrc which it finds in the in the
1025       directory where Lingua/EN/Inflect.pm is installed, or in the current
1026       home directory ($ENV{HOME}), or in both.  Note that the code is
1027       executed within the Lingua::EN::Inflect namespace.
1028
1029       Hence the user or the local Perl guru can make appropriate calls to
1030       "def_noun", "def_verb", etc. in one of these .inflectrc files, to
1031       permanently and universally modify the behaviour of the module. For
1032       example
1033
1034             > cat /usr/local/lib/perl5/Text/Inflect/.inflectrc
1035
1036             def_noun  "UNIX"  => "UN*X|UNICES";
1037
1038             def_verb  "teco"  => "teco",      # LITERALLY: "to edit with TECO"
1039                       "teco"  => "teco",
1040                       "tecos" => "teco";
1041
1042             def_a     "Euler.*";              # "Yewler" TURNS IN HIS GRAVE
1043
1044       Note that calls to the "def_..." subroutines from within a program will
1045       take precedence over the contents of the home directory .inflectrc
1046       file, which in turn takes precedence over the system-wide .inflectrc
1047       file.
1048

DIAGNOSTICS

1050       On loading, if the Perl code in a .inflectrc file is invalid
1051       (syntactically or otherwise), an appropriate fatal error is issued.  A
1052       common problem is not ending the file with something that evaluates to
1053       true (as the five "def_..." subroutines do).
1054
1055       Using the five "def_..." subroutines directly in a program may also
1056       result in fatal diagnostics, if a (singular) pattern or an interpolated
1057       (plural) string is somehow invalid.
1058
1059       Specific diagnostics related to user-defined inflections are:
1060
1061       "Bad user-defined singular pattern:\n\t %s"
1062               The singular form of a user-defined noun or verb (as defined by
1063               a call to "def_noun", "def_verb", "def_adj", "def_a" or
1064               "def_an") is not a valid Perl regular expression. The actual
1065               Perl error message is also given.
1066
1067       "Bad user-defined plural string: '%s'"
1068               The plural form(s) of a user-defined noun or verb (as defined
1069               by a call to "def_noun", "def_verb" or "def_adj") is not a
1070               valid Perl interpolated string (usually because it interpolates
1071               some undefined variable).
1072
1073       "Bad .inflectrc file (%s):\n %s"
1074               Some other problem occurred in loading the named local or
1075               global .inflectrc file. The Perl error message (including the
1076               line number) is also given.
1077
1078       There are no diagnosable run-time error conditions for the actual
1079       inflection subroutines, except "NUMWORDS" and hence no run-time
1080       diagnostics. If the inflection subroutines are unable to form a plural
1081       via a user-definition or an inbuilt rule, they just "guess" the
1082       commonest English inflection: adding "-s" for nouns, removing "-s" for
1083       verbs, and no inflection for adjectives.
1084
1085       "Lingua::EN::Inflect::NUMWORDS()" can "die" with the following
1086       messages:
1087
1088       "Bad grouping option: %s"
1089               The optional argument to "NUMWORDS()" wasn't 1, 2 or 3.
1090
1091       "Number out of range"
1092               "NUMWORDS()" was passed a number larger than the number
1093               represented by 3006 consecutive nines. The words representing
1094               that number are 63,681 characters long, including commas and
1095               spaces. If you're interested in the actual value, see
1096               t/numwords.t.
1097
1098               The reference for the names is
1099               http://en.wikipedia.org/wiki/Names_of_large_numbers
1100
1101               There are no names for any higher numbers.
1102

OTHER ISSUES

1104   2nd Person precedence
1105       If a verb has identical 1st and 2nd person singular forms, but
1106       different 1st and 2nd person plural forms, then when its plural is
1107       constructed, the 2nd person plural form is always preferred.
1108
1109       The author is not currently aware of any such verbs in English, but is
1110       not quite arrogant enough to assume ipso facto that none exist.
1111
1112   Nominative precedence
1113       The singular pronoun "it" presents a special problem because its plural
1114       form can vary, depending on its "case". For example:
1115
1116               It ate my homework       ->  They ate my homework
1117               It ate it                ->  They ate them
1118               I fed my homework to it  ->  I fed my homework to them
1119
1120       As a consequence of this ambiguity, "PL()" or "PL_N" have been
1121       implemented so that they always return the nominative plural (that is,
1122       "they").
1123
1124       However, when asked for the plural of an unambiguously accusative "it"
1125       (namely, "PL("to it")", "PL_N("from it")", "PL("with it")", etc.), both
1126       subroutines will correctly return the accusative plural ("to them",
1127       "from them", "with them", etc.)
1128
1129   The plurality of zero
1130       The rules governing the choice between:
1131
1132             There were no errors.
1133
1134       and
1135
1136             There was no error.
1137
1138       are complex and often depend more on intent rather than content.  Hence
1139       it is infeasible to specify such rules algorithmically.
1140
1141       Therefore, Lingua::EN::Text contents itself with the following
1142       compromise: If the governing number is zero, inflections always return
1143       the plural form unless the appropriate "classical" inflection is in
1144       effect, in which case the singular form is always returned.
1145
1146       Thus, the sequence:
1147
1148             NUM(0);
1149             print inflect "There PL(was) NO(choice)";
1150
1151       produces "There were no choices", whereas:
1152
1153             classical 'zero';     # or: classical(zero=>1);
1154             NUM(0);
1155             print inflect "There PL(was) NO(choice)";
1156
1157       it will print "There was no choice".
1158
1159   Homographs with heterogeneous plurals
1160       Another context in which intent (and not content) sometimes determines
1161       plurality is where two distinct meanings of a word require different
1162       plurals. For example:
1163
1164             Three basses were stolen from the band's equipment trailer.
1165             Three bass were stolen from the band's aquarium.
1166
1167             I put the mice next to the cheese.
1168             I put the mouses next to the computers.
1169
1170             Several thoughts about leaving crossed my mind.
1171             Several thought about leaving across my lawn.
1172
1173       Lingua::EN::Inflect handles such words in two ways:
1174
1175       •       If both meanings of the word are the same part of speech (for
1176               example, "bass" is a noun in both sentences above), then one
1177               meaning is chosen as the "usual" meaning, and only that
1178               meaning's plural is ever returned by any of the inflection
1179               subroutines.
1180
1181       •       If each meaning of the word is a different part of speech (for
1182               example, "thought" is both a noun and a verb), then the noun's
1183               plural is returned by "PL()" and "PL_N()" and the verb's plural
1184               is returned only by "PL_V()".
1185
1186       Such contexts are, fortunately, uncommon (particularly "same-part-of-
1187       speech" examples). An informal study of nearly 600 "difficult plurals"
1188       indicates that "PL()" can be relied upon to "get it right" about 98% of
1189       the time (although, of course, ichthyophilic guitarists or cyber-
1190       behaviouralists may experience higher rates of confusion).
1191
1192       If the choice of a particular "usual inflection" is considered
1193       inappropriate, it can always be reversed with a preliminary call to the
1194       corresponding "def_..." subroutine.
1195

NOTE

1197       I'm not taking any further correspondence on:
1198
1199       "octopi".
1200           Despite the populist pandering of certain New World dictionaries,
1201           the plural is "octopuses" or (for the pedantic classicist)
1202           "octopodes". The suffix "-pus" is Greek, not Latin, so the plural
1203           is "-podes", not "pi".
1204
1205       "virus".
1206           Had no plural in Latin (possibly because it was a mass noun).  The
1207           only plural is the Anglicized "viruses".
1208

AUTHOR

1210       Damian Conway (damian@conway.org)
1211

BUGS AND IRRITATIONS

1213       The endless inconsistencies of English.
1214
1215       (Please report words for which the correct plural or indefinite article
1216       is not formed, so that the reliability of Lingua::EN::Inflect can be
1217       improved.)
1218
1220        Copyright (c) 1997-2009, Damian Conway. All Rights Reserved.
1221        This module is free software. It may be used, redistributed
1222            and/or modified under the same terms as Perl itself.
1223
1224
1225
1226perl v5.32.1                      2021-01-27            Lingua::EN::Inflect(3)
Impressum