1Lingua::EN::Inflect(3)User Contributed Perl DocumentationLingua::EN::Inflect(3)
2
3
4

NAME

6       Lingua::EN::Inflect - Convert singular to plural. Select "a" or "an".
7

VERSION

9       This document describes version 1.905 of Lingua::EN::Inflect
10

SYNOPSIS

12        use Lingua::EN::Inflect qw ( PL PL_N PL_V PL_ADJ NO NUM
13                         PL_eq PL_N_eq PL_V_eq PL_ADJ_eq
14                         A AN
15                         PART_PRES
16                         ORD NUMWORDS
17                         WORDLIST
18                         inflect classical
19                         def_noun def_verb def_adj def_a def_an );
20
21
22        # UNCONDITIONALLY FORM THE PLURAL
23
24             print "The plural of ", $word, " is ", PL($word), "\n";
25
26
27        # CONDITIONALLY FORM THE PLURAL
28
29             print "I saw $cat_count ", PL("cat",$cat_count), "\n";
30
31
32        # FORM PLURALS FOR SPECIFIC PARTS OF SPEECH
33
34             print PL_N("I",$N1), PL_V("saw",$N1),
35               PL_ADJ("my",$N2), PL_N("saw",$N2), "\n";
36
37
38        # DEAL WITH "0/1/N" -> "no/1/N" TRANSLATION:
39
40             print "There ", PL_V("was",$errors), NO(" error",$errors), "\n";
41
42
43        # USE DEFAULT COUNTS:
44
45             print NUM($N1,""), PL("I"), PL_V(" saw"), NUM($N2), PL_N(" saw");
46             print "There ", NUM($errors,''), PL_V("was"), NO(" error"), "\n";
47
48
49        # COMPARE TWO WORDS "NUMBER-INSENSITIVELY":
50
51             print "same\n"      if PL_eq($word1, $word2);
52             print "same noun\n" if PL_N_eq($word1, $word2);
53             print "same verb\n" if PL_V_eq($word1, $word2);
54             print "same adj.\n" if PL_ADJ_eq($word1, $word2);
55
56
57        # ADD CORRECT "a" OR "an" FOR A GIVEN WORD:
58
59             print "Did you want ", A($thing), " or ", AN($idea), "\n";
60
61
62        # CONVERT NUMERALS INTO ORDINALS (i.e. 1->1st, 2->2nd, 3->3rd, etc.)
63
64             print "It was", ORD($position), " from the left\n";
65
66        # CONVERT NUMERALS TO WORDS (i.e. 1->"one", 101->"one hundred and one", etc.)
67        # IN A SCALAR CONTEXT: GET BACK A SINGLE STRING...
68
69           $words = NUMWORDS(1234);      # "one thousand, two hundred and thirty-four"
70           $words = NUMWORDS(ORD(1234)); # "one thousand, two hundred and thirty-fourth"
71
72
73        # IN A LIST CONTEXT: GET BACK A LIST OF STRINGSi, ONE FOR EACH "CHUNK"...
74
75           @words = NUMWORDS(1234);    # ("one thousand","two hundred and thirty-four")
76
77
78        # OPTIONAL PARAMETERS CHANGE TRANSLATION:
79
80           $words = NUMWORDS(12345, group=>1);
81                       # "one, two, three, four, five"
82
83           $words = NUMWORDS(12345, group=>2);
84                       # "twelve, thirty-four, five"
85
86           $words = NUMWORDS(12345, group=>3);
87                       # "one twenty-three, forty-five"
88
89           $words = NUMWORDS(1234, 'and'=>'');
90                       # "one thousand, two hundred thirty-four"
91
92           $words = NUMWORDS(1234, 'and'=>', plus');
93                       # "one thousand, two hundred, plus thirty-four"
94
95           $words = NUMWORDS(555_1202, group=>1, zero=>'oh');
96                       # "five, five, five, one, two, oh, two"
97
98           $words = NUMWORDS(555_1202, group=>1, one=>'unity');
99                       # "five, five, five, unity, two, zero, two"
100
101           $words = NUMWORDS(123.456, group=>1, decimal=>'mark');
102                       # "one two three mark four five six"
103
104        # LITERAL STYLE ONLY NAMES NUMBERS LESS THAN A CERTAIN THRESHOLD...
105
106           $words = NUMWORDS(   9, threshold=>10);    # "nine"
107           $words = NUMWORDS(  10, threshold=>10);    # "ten"
108           $words = NUMWORDS(  11, threshold=>10);    # "11"
109           $words = NUMWORDS(1000, threshold=>10);    # "1,000"
110
111        # JOIN WORDS INTO A LIST:
112
113           $list = WORDLIST("apple", "banana", "carrot");
114                       # "apple, banana, and carrot"
115
116           $list = WORDLIST("apple", "banana");
117                       # "apple and banana"
118
119           $list = WORDLIST("apple", "banana", "carrot", {final_sep=>""});
120                       # "apple, banana and carrot"
121
122
123        # REQUIRE "CLASSICAL" PLURALS (EG: "focus"->"foci", "cherub"->"cherubim")
124
125             classical;          # USE ALL CLASSICAL PLURALS
126
127             classical 1;           #  USE ALL CLASSICAL PLURALS
128             classical 0;           #  USE ALL MODERN PLURALS (DEFAULT)
129
130             classical 'zero';      #  "no error" INSTEAD OF "no errors"
131             classical zero=>1;     #  "no error" INSTEAD OF "no errors"
132             classical zero=>0;     #  "no errors" INSTEAD OF "no error"
133
134             classical 'herd';      #  "2 buffalo" INSTEAD OF "2 buffalos"
135             classical herd=>1;     #  "2 buffalo" INSTEAD OF "2 buffalos"
136             classical herd=>0;     #  "2 buffalos" INSTEAD OF "2 buffalo"
137
138             classical 'persons';   # "2 chairpersons" INSTEAD OF "2 chairpeople"
139             classical persons=>1;  # "2 chairpersons" INSTEAD OF "2 chairpeople"
140             classical persons=>0;  # "2 chairpeople" INSTEAD OF "2 chairpersons"
141
142             classical 'ancient';   # "2 formulae" INSTEAD OF "2 formulas"
143             classical ancient=>1;  # "2 formulae" INSTEAD OF "2 formulas"
144             classical ancient=>0;  # "2 formulas" INSTEAD OF "2 formulae"
145
146
147
148        # INTERPOLATE "PL()", "PL_N()", "PL_V()", "PL_ADJ()", A()", "AN()"
149        # "NUM()" AND "ORD()" WITHIN STRINGS:
150
151             print inflect("The plural of $word is PL($word)\n");
152             print inflect("I saw $cat_count PL(cat,$cat_count)\n");
153             print inflect("PL(I,$N1) PL_V(saw,$N1) PL(a,$N2) PL_N(saw,$N2)\n");
154             print inflect("NUM($N1,)PL(I) PL_V(saw) NUM($N2,)PL(a) PL_N(saw)\n");
155             print inflect("I saw NUM($cat_count) PL(cat)\n");
156             print inflect("There PL_V(was,$errors) NO(error,$errors)\n");
157             print inflect("There NUM($errors,)PL_V(was) NO(error)\n");
158             print inflect("Did you want A($thing) or AN($idea)\n");
159             print inflect("It was ORD($position) from the left\n");
160
161
162        # ADD USER-DEFINED INFLECTIONS (OVERRIDING INBUILT RULES):
163
164             def_noun  "VAX"  => "VAXen";  # SINGULAR => PLURAL
165
166             def_verb  "will" => "shall",  # 1ST PERSON SINGULAR => PLURAL
167                       "will" => "will",   # 2ND PERSON SINGULAR => PLURAL
168                       "will" => "will";   # 3RD PERSON SINGULAR => PLURAL
169
170             def_adj   "hir"  => "their";  # SINGULAR => PLURAL
171
172             def_a     "h";                # "AY HALWAYS SEZ 'HAITCH'!"
173
174             def_an    "horrendous.*";     # "AN HORRENDOUS AFFECTATION"
175

DESCRIPTION

177       [Note: This module is strictly in maintenance mode now.  Take a look at
178       the newer Lingua::EN::Inflexion module, which offers a cleaner and more
179       convenient interface, has many more features (including
180       plural->singular inflexions), and is also much better tested.  If you
181       have existing code that relies on Lingua::EN::Inflect, see the section
182       of the documentation entitled "CONVERTING FROM LINGUA::EN::INFLECT". ]
183
184       The exportable subroutines of Lingua::EN::Inflect provide plural
185       inflections, "a"/"an" selection for English words, and manipulation of
186       numbers as words
187
188       Plural forms of all nouns, most verbs, and some adjectives are
189       provided. Where appropriate, "classical" variants (for example:
190       "brother" -> "brethren", "dogma" -> "dogmata", etc.) are also provided.
191
192       Pronunciation-based "a"/"an" selection is provided for all English
193       words, and most initialisms.
194
195       It is also possible to inflect numerals (1,2,3) to ordinals (1st, 2nd,
196       3rd) and to English words ("one", "two", "three).
197
198       In generating these inflections, Lingua::EN::Inflect follows the Oxford
199       English Dictionary and the guidelines in Fowler's Modern English Usage,
200       preferring the former where the two disagree.
201
202       The module is built around standard British spelling, but is designed
203       to cope with common American variants as well. Slang, jargon, and other
204       English dialects are not explicitly catered for.
205
206       Where two or more inflected forms exist for a single word (typically a
207       "classical" form and a "modern" form), Lingua::EN::Inflect prefers the
208       more common form (typically the "modern" one), unless "classical"
209       processing has been specified (see "MODERN VS CLASSICAL INFLECTIONS").
210

FORMING PLURALS

212   Inflecting Plurals
213       All of the "PL_..." plural inflection subroutines take the word to be
214       inflected as their first argument and return the corresponding
215       inflection.  Note that all such subroutines expect the singular form of
216       the word. The results of passing a plural form are undefined (and
217       unlikely to be correct).
218
219       The "PL_..." subroutines also take an optional second argument, which
220       indicates the grammatical "number" of the word (or of another word with
221       which the word being inflected must agree). If the "number" argument is
222       supplied and is not 1 (or "one" or "a", or some other adjective that
223       implies the singular), the plural form of the word is returned. If the
224       "number" argument does indicate singularity, the (uninflected) word
225       itself is returned. If the number argument is omitted, the plural form
226       is returned unconditionally.
227
228       The various subroutines are:
229
230       PL_N($;$)
231               The exportable subroutine PL_N() takes a singular English noun
232               or pronoun and returns its plural. Pronouns in the nominative
233               ("I" -> "we") and accusative ("me" -> "us") cases are handled,
234               as are possessive pronouns ("mine" -> "ours").
235
236       PL_V($;$)
237               The exportable subroutine PL_V() takes the singular form of a
238               conjugated verb (that is, one which is already in the correct
239               "person" and "mood") and returns the corresponding plural
240               conjugation.
241
242       PL_ADJ($;$)
243               The exportable subroutine PL_ADJ() takes the singular form of
244               certain types of adjectives and returns the corresponding
245               plural form.  Adjectives that are correctly handled include:
246               "numerical" adjectives ("a" -> "some"), demonstrative
247               adjectives ("this" -> "these", "that" -> "those"), and
248               possessives ("my" -> "our", "cat's" -> "cats'", "child's" ->
249               "childrens'", etc.)
250
251       PL($;$) The exportable subroutine PL() takes a singular English noun,
252               pronoun, verb, or adjective and returns its plural form. Where
253               a word has more than one inflection depending on its part of
254               speech (for example, the noun "thought" inflects to "thoughts",
255               the verb "thought" to "thought"), the (singular) noun sense is
256               preferred to the (singular) verb sense.
257
258               Hence PL("knife") will return "knives" ("knife" having been
259               treated as a singular noun), whereas PL("knifes") will return
260               "knife" ("knifes" having been treated as a 3rd person singular
261               verb).
262
263               The inherent ambiguity of such cases suggests that, where the
264               part of speech is known, "PL_N", "PL_V", and "PL_ADJ" should be
265               used in preference to "PL".
266
267       Note that all these subroutines ignore any whitespace surrounding the
268       word being inflected, but preserve that whitespace when the result is
269       returned. For example, PL(" cat  ") returns " cats  ".
270
271   Numbered plurals
272       The "PL_..." subroutines return only the inflected word, not the count
273       that was used to inflect it. Thus, in order to produce "I saw 3 ducks",
274       it is necessary to use:
275
276           print "I saw $N ", PL_N($animal,$N), "\n";
277
278       Since the usual purpose of producing a plural is to make it agree with
279       a preceding count, Lingua::EN::Inflect provides an exportable
280       subroutine (NO($;$)) which, given a word and a(n optional) count,
281       returns the count followed by the correctly inflected word. Hence the
282       previous example can be rewritten:
283
284           print "I saw ", NO($animal,$N), "\n";
285
286       In addition, if the count is zero (or some other term which implies
287       zero, such as "zero", "nil", etc.) the count is replaced by the word
288       "no". Hence, if $N had the value zero, the previous example would print
289       the somewhat more elegant:
290
291           I saw no animals
292
293       rather than:
294
295           I saw 0 animals
296
297       Note that the name of the subroutine is a pun: the subroutine returns
298       either a number (a No.) or a "no", in front of the inflected word.
299
300       Wordy and comma'd plurals
301
302       The NO() subroutine takes an optional third argument: a hash of named
303       options that configure its behaviour.
304
305       The 'words_below' option informs NO() what other numbers (i.e.  apart
306       from zero) it should convert to words. For example:S
307
308           for my $count (0..12) {
309               print NO('cat', $count, {words_below => 10}), "\n";
310           }
311
312       would print:
313
314           no cats
315           one cat
316           two cats
317           three cats
318           four cats
319           five cats
320           six cats
321           seven cats
322           eight cats
323           nine cats
324           10 cats
325           11 cats
326           12 cats
327
328       The 'comma' and 'comma_every' options determine whether or not the
329       numbers produced by NO() have commas in them. That is:
330
331           2001 space odysseys
332
333       versus:
334
335           2,001 space odysseys
336
337       Normally, numbers are produced without commas, but if 'comma' or
338       'comma_every' is specified, then commas are added as requested.
339
340       The 'comma' option specifies which character to use as a comma.  It
341       defaults to ',', but may be set to anything convenient:
342
343           print NO('Euro', $amount, {comma=>'.'});
344
345           # prints:  1.000.000 Euros
346
347       The 'comma_every' option specifies how many characters between commas.
348       It defaults to 3, but may be set to any positive number:
349
350           print NO('Euro', $amount, {comma_every=>4});
351
352           # prints:  100,0000 Euros
353
354       Note that you can set both options at once, if you wish:
355
356           print NO('Euro', $amount, {comma_every=>2, comma=>'_'});
357
358           # prints:  1_00_00_00 Euros
359
360   Reducing the number of counts required
361       In some contexts, the need to supply an explicit count to the various
362       "PL_..." subroutines makes for tiresome repetition. For example:
363
364           print PL_ADJ("This",$errors), PL_N(" error",$errors),
365                 PL_V(" was",$errors), " fatal.\n";
366
367       Lingua::EN::Inflect therefore provides an exportable subroutine
368       (NUM($;$)) that may be used to set a persistent "default number" value.
369       If such a value is set, it is subsequently used whenever an optional
370       second "number" argument is omitted. The default value thus set can
371       subsequently be removed by calling NUM() with no arguments.  Hence we
372       could rewrite the previous example:
373
374           NUM($errors);
375           print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n";
376           NUM();
377
378       Normally, NUM() returns its first argument, so that it may also be
379       "inlined" in contexts like:
380
381           print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n"
382           print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
383               if $severity > 1;
384
385       However, in certain contexts (see "INTERPOLATING INFLECTIONS IN
386       STRINGS") it is preferable that NUM() return an empty string. Hence
387       NUM() provides an optional second argument. If that argument is
388       supplied (that is, if it is defined) and evaluates to false, "NUM"
389       returns an empty string instead of its first argument. For example:
390
391           print NUM($errors,0), NO("error"), PL_V(" was"), " detected.\n";
392           print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
393               if $severity > 1;
394
395   Number-insensitive equality
396       Lingua::EN::Inflect also provides a solution to the problem of
397       comparing words of differing plurality through the exportable
398       subroutines PL_eq($$), PL_N_eq($$), PL_V_eq($$), and PL_ADJ_eq($$).
399       Each  of these subroutines takes two strings, and  compares them using
400       the corresponding plural-inflection subroutine (PL(), PL_N(), PL_V(),
401       and PL_ADJ() respectively).
402
403       The comparison returns true if:
404
405       •       the strings are "eq"-equal, or
406
407       •       one string is "eq"-equal to a plural form of the other, or
408
409       •       the strings are two different plural forms of the one word.
410
411       Hence all of the following return true:
412
413           PL_eq("index","index")      # RETURNS "eq"
414           PL_eq("index","indexes")    # RETURNS "s:p"
415           PL_eq("index","indices")    # RETURNS "s:p"
416           PL_eq("indexes","index")    # RETURNS "p:s"
417           PL_eq("indices","index")    # RETURNS "p:s"
418           PL_eq("indices","indexes")  # RETURNS "p:p"
419           PL_eq("indexes","indices")  # RETURNS "p:p"
420           PL_eq("indices","indices")  # RETURNS "eq"
421
422       As indicated by the comments in the previous example, the actual value
423       returned by the various "PL_eq" subroutines encodes which of the three
424       equality rules succeeded: "eq" is returned if the strings were
425       identical, "s:p" if the strings were singular and plural respectively,
426       "p:s" for plural and singular, and "p:p" for two distinct plurals.
427       Inequality is indicated by returning an empty string.
428
429       It should be noted that two distinct singular words which happen to
430       take the same plural form are not considered equal, nor are cases where
431       one (singular) word's plural is the other (plural) word's singular.
432       Hence all of the following return false:
433
434           PL_eq("base","basis")       # ALTHOUGH BOTH -> "bases"
435           PL_eq("syrinx","syringe")   # ALTHOUGH BOTH -> "syringes"
436           PL_eq("she","he")       # ALTHOUGH BOTH -> "they"
437
438           PL_eq("opus","operas")      # ALTHOUGH "opus" -> "opera" -> "operas"
439           PL_eq("taxi","taxes")       # ALTHOUGH "taxi" -> "taxis" -> "taxes"
440
441       Note too that, although the comparison is "number-insensitive" it is
442       not case-insensitive (that is, "PL("time","Times")" returns false. To
443       obtain both number and case insensitivity, prefix both arguments with
444       "lc" (that is, "PL(lc "time", lc "Times")" returns true).
445

OTHER VERB FORMS

447   Present participles
448       "Lingua::EN::Inflect" also provides the "PART_PRES" subroutine, which
449       can take a 3rd person singular verb and correctly inflect it to its
450       present participle:
451
452           PART_PRES("runs")   # "running"
453           PART_PRES("loves")  # "loving"
454           PART_PRES("eats")   # "eating"
455           PART_PRES("bats")   # "batting"
456           PART_PRES("spies")  # "spying"
457

PROVIDING INDEFINITE ARTICLES

459   Selecting indefinite articles
460       Lingua::EN::Inflect provides two exportable subroutines (A($;$) and
461       AN($;$)) which will correctly prepend the appropriate indefinite
462       article to a word, depending on its pronunciation. For example:
463
464           A("cat")        # -> "a cat"
465           AN("cat")       # -> "a cat"
466           A("euphemism")      # -> "a euphemism"
467           A("Euler number")   # -> "an Euler number"
468           A("hour")       # -> "an hour"
469           A("houri")      # -> "a houri"
470
471       The two subroutines are identical in function and may be used
472       interchangeably. The only reason that two versions are provided is to
473       enhance the readability of code such as:
474
475           print "That is ", AN($errortype), " error\n;
476           print "That is ", A($fataltype), " fatal error\n;
477
478       Note that in both cases the actual article provided depends only on the
479       pronunciation of the first argument, not on the name of the subroutine.
480
481       A() and AN() will ignore any indefinite article that already exists at
482       the start of the string. Thus:
483
484           @half_arked = (
485               "a elephant",
486               "a giraffe",
487               "an ewe",
488               "a orangutan",
489           );
490
491           print A($_), "\n" for @half_arked;
492
493           # prints:
494           #     an elephant
495           #     a giraffe
496           #     a ewe
497           #     an orangutan
498
499       A() and AN() both take an optional second argument. As with the
500       "PL_..." subroutines, this second argument is a "number" specifier. If
501       its value is 1 (or some other value implying singularity), A() and AN()
502       insert "a" or "an" as appropriate. If the number specifier implies
503       plurality, (A() and AN() insert the actual second argument instead.
504       For example:
505
506           A("cat",1)      # -> "a cat"
507           A("cat",2)      # -> "2 cat"
508           A("cat","one")      # -> "one cat"
509           A("cat","no")       # -> "no cat"
510
511       Note that, as implied by the previous examples, A() and AN() both
512       assume that their job is merely to provide the correct qualifier for a
513       word (that is: "a", "an", or the specified count).  In other words,
514       they assume that the word they are given has already been correctly
515       inflected for plurality. Hence, if $N has the value 2, then:
516
517             print A("cat",$N);
518
519       prints "2 cat", instead of "2 cats". The correct approach is to use:
520
521             print A(PL("cat",$N),$N);
522
523       or, better still:
524
525             print NO("cat",$N);
526
527       Note too that, like the various "PL_..." subroutines, whenever A() and
528       AN() are called with only one argument they are subject to the effects
529       of any preceding call to NUM(). Hence, another possible solution is:
530
531             NUM($N);
532             print A(PL("cat"));
533
534   Indefinite articles and initialisms
535       "Initialisms" (sometimes inaccurately called "acronyms") are terms
536       which have been formed from the initial letters of words in a phrase
537       (for example, "NATO", "NBL", "S.O.S.", "SCUBA", etc.)
538
539       Such terms present a particular challenge when selecting between "a"
540       and "an", since they are sometimes pronounced as if they were a single
541       word ("nay-tow", "sku-ba") and sometimes as a series of letter names
542       ("en-eff-ell", "ess-oh-ess").
543
544       A() and AN() cope with this dichotomy using a series of inbuilt rules,
545       which may be summarized as:
546
547       1.      If the word starts with a single letter, followed by a period
548               or dash (for example, "R.I.P.", "C.O.D.", "e-mail", "X-ray",
549               "T-square"), then choose the appropriate article for the sound
550               of the first letter ("an R.I.P.", "a C.O.D.", "an e-mail", "an
551               X-ray", "a T-square").
552
553       2.      If the first two letters of the word are capitals, consonants,
554               and do not appear at the start of any known English word, (for
555               example, "LCD", "XML", "YWCA"), then once again choose "a" or
556               "an" depending on the sound of the first letter ("an LCD", "an
557               XML", "a YWCA").
558
559       3.      Otherwise, assume the string is a capitalized word or a
560               pronounceable initialism (for example, "LED", "OPEC", "FAQ",
561               "UNESCO"), and therefore takes "a" or "an" according to the
562               (apparent) pronunciation of the entire word ("a LED", "an
563               OPEC", "a FAQ", "a UNESCO").
564
565       Note that rules 1 and 3 together imply that the presence or absence of
566       punctuation may change the selection of indefinite article for a
567       particular initialism (for example, "a FAQ" but "an F.A.Q.").
568
569   Indefinite articles and "soft H's"
570       Words beginning in the letter 'H' present another type of difficulty
571       when selecting a suitable indefinite article. In a few such words (for
572       example, "hour", "honour", "heir") the 'H' is not voiced at all, and so
573       such words inflect with "an". The remaining cases ("voiced H's") may be
574       divided into two categories: "hard H's" (such as "hangman",
575       "holograph", "hat", etc.) and "soft H's" (such as "hysterical",
576       "horrendous", "holy", etc.)
577
578       Hard H's always take "a" as their indefinite article, and soft H's
579       normally do so as well. But some English speakers prefer "an" for soft
580       H's (although the practice is now generally considered an affectation,
581       rather than a legitimate grammatical alternative).
582
583       At present, the A() and AN() subroutines ignore soft H's and use "a"
584       for any voiced 'H'. The author would, however, welcome feedback on this
585       decision (envisaging a possible future "soft H" mode).
586

INFLECTING ORDINALS

588       Occasionally it is useful to present an integer value as an ordinal
589       rather than as a numeral. For example:
590
591           Enter password (1st attempt): ********
592           Enter password (2nd attempt): *********
593           Enter password (3rd attempt): *********
594           No 4th attempt. Access denied.
595
596       To this end, Lingua::EN::Inflect provides the ORD() subroutine.
597       <ORD()> takes a single argument and forms its ordinal equivalent.  If
598       the argument isn't a numerical integer, it just adds "-th".
599

CONVERTING NUMBERS TO WORDS

601       The exportable subroutine "NUMWORDS" takes a number (cardinal or
602       ordinal) and returns an English representation of that number. In a
603       scalar context a string is returned. Hence:
604
605           use Lingua::EN::Inflect qw( NUMWORDS );
606
607           $words = NUMWORDS(1234567);
608
609       puts the string:
610
611           "one million, two hundred and thirty-four thousand, five hundred and sixty-seven"
612
613       into $words.
614
615       In a list context each comma-separated chunk is returned as a separate
616       element.  Hence:
617
618           @words = NUMWORDS(1234567);
619
620       puts the list:
621
622           ("one million",
623            "two hundred and thirty-four thousand",
624            "five hundred and sixty-seven")
625
626       into @words.
627
628       Note that this also means that:
629
630           print NUMWORDS(1234567);
631
632       will (misprint) print:
633
634           one milliontwo hundred and thirty-four thousandfive hundred and sixty-seven
635
636       To get readable output, make sure the call in in scalar context:
637
638           print scalar NUMWORDS(1234567);
639
640       Non-digits (apart from an optional leading plus or minus sign, any
641       decimal points, and ordinal suffixes -- see below) are silently
642       ignored, so the following all produce identical results:
643
644               NUMWORDS(5551202);
645               NUMWORDS(5_551_202);
646               NUMWORDS("5,551,202");
647               NUMWORDS("555-1202");
648
649       That last case is a little awkward since it's almost certainly a phone
650       number, and "five million, five hundred and fifty-one thousand, two
651       hundred and two" probably isn't what's wanted.
652
653       To overcome this, NUMWORDS() takes an optional named argument, 'group',
654       which changes how numbers are translated. The argument must be a
655       positive integer less than four, which indicated how the digits of the
656       number are to be grouped. If the argument is 1, then each digit is
657       translated separately. If the argument is 2, pairs of digits (starting
658       from the left) are grouped together. If the argument is 3, triples of
659       numbers (again, from the left) are grouped. Hence:
660
661               NUMWORDS("555-1202", group=>1)
662
663       returns "five, five, five, one, two, zero, two", whilst:
664
665               NUMWORDS("555-1202", group=>2)
666
667       returns "fifty-five, fifty-one, twenty, two", and:
668
669               NUMWORDS("555-1202", group=>3)
670
671       returns "five fifty-five, one twenty, two".
672
673       Phone numbers are often written in words as
674       "five..five..five..one..two..zero..two", which is also easy to achieve:
675
676               join '..', NUMWORDS("555-1202", group=>1)
677
678       "NUMWORDS" also handles decimal fractions. Hence:
679
680               NUMWORDS("1.2345")
681
682       returns "one point two three four five" in a scalar context and
683       "("one","point","two","three","four","five")") in an array context.
684       Exponent form ("1.234e56") is not yet handled.
685
686       Multiple decimal points are only translated in one of the "grouping"
687       modes.  Hence:
688
689               NUMWORDS(101.202.303)
690
691       returns "one hundred and one point two zero two three zero three",
692       whereas:
693
694               NUMWORDS(101.202.303, group=>1)
695
696       returns "one zero one point two zero two point three zero three".
697
698       The digit '0' is unusual in that in may be translated to English as
699       "zero", "oh", or "nought". To cater for this diversity, "NUMWORDS" may
700       be passed a named argument, 'zero', which may be set to the desired
701       translation of '0'. For example:
702
703               print join "..", NUMWORDS("555-1202", group=>3, zero=>'oh')
704
705       prints "five..five..five..one..two..oh..two".  By default, zero is
706       rendered as "zero".
707
708       Likewise, the digit '1' may be rendered as "one" or "a/an" (or very
709       occasionally other variants), depending on the context. So there is a
710       'one' argument as well:
711
712               print NUMWORDS($_, one=>'a solitary', zero=>'no more'),
713                     PL(" bottle of beer on the wall\n", $_)
714                          for (3,2,1,0);
715
716               # prints:
717               #     three bottles of beer on the wall
718               #     two bottles of beer on the wall
719               #     a solitary bottle of beer on the wall
720               #     no more bottles of beer on the wall
721
722       Care is needed if the word "a/an" is to be used as a 'one' value.
723       Unless the next word is known in advance, it's almost always necessary
724       to use the "A" function as well:
725
726               print A( NUMWORDS(1, one=>'a') . " $_\n")
727                for qw(cat aardvark ewe hour);
728
729           # prints:
730           #     a cat
731           #     an aardvark
732           #     a ewe
733           #     an hour
734
735       Another major regional variation in number translation is the use of
736       "and" in certain contexts. The named argument 'and' allows the
737       programmer to specify how "and" should be handled. Hence:
738
739               print scalar NUMWORDS("765", 'and'=>'')
740
741       prints "seven hundred sixty-five", instead of "seven hundred and sixty-
742       five".  By default, the "and" is included.
743
744       The translation of the decimal point is also subject to variation (with
745       "point", "dot", and "decimal" being the favorites).  The named argument
746       'decimal' allows the programmer to how the decimal point should be
747       rendered. Hence:
748
749               print scalar NUMWORDS("666.124.64.101", group=>3, decimal=>'dot')
750
751       prints "six sixty-six, dot, one twenty-four, dot, sixty-four, dot, one
752       zero one" By default, the decimal point is rendered as "point".
753
754       "NUMWORDS" also handles the ordinal forms of numbers. So:
755
756               print scalar NUMWORDS('1st');
757               print scalar NUMWORDS('3rd');
758               print scalar NUMWORDS('202nd');
759               print scalar NUMWORDS('1000000th');
760
761       print:
762
763               first
764               third
765               two hundred and twenty-second
766               one millionth
767
768       Two common idioms in this regard are:
769
770               print scalar NUMWORDS(ORD($number));
771
772       and:
773
774               print scalar ORD(NUMWORDS($number));
775
776       These are identical in effect, except when $number contains a decimal:
777
778               $number = 99.09;
779               print scalar NUMWORDS(ORD($number));    # ninety-ninth point zero nine
780               print scalar ORD(NUMWORDS($number));    # ninety-nine point zero ninth
781
782       Use whichever you feel is most appropriate.
783

CONVERTING LISTS OF WORDS TO PHRASES

785       When creating a list of words, commas are used between adjacent items,
786       except if the items contain commas, in which case semicolons are used.
787       But if there are less than two items, the commas/semicolons are omitted
788       entirely. The final item also has a conjunction (usually "and" or "or")
789       before it. And although it's technically incorrect (and sometimes
790       misleading), some people prefer to omit the comma before that final
791       conjunction, even when there are more than two items.
792
793       That's complicated enough to warrant its own subroutine: WORDLIST().
794       This subroutine expects a list of words, possibly with one or more hash
795       references containing options. It returns a string that joins the list
796       together in the normal English usage. For example:
797
798           print "You chose ", WORDLIST(@selected_items), "\n";
799           # You chose barley soup, roast beef, and Yorkshire pudding
800
801           print "You chose ", WORDLIST(@selected_items, {final_sep=>""}), "\n";
802           # You chose barley soup, roast beef and Yorkshire pudding
803
804           print "Please chose ", WORDLIST(@side_orders, {conj=>"or"}), "\n";
805           # Please chose salad, vegetables, or ice-cream
806
807       The available options are:
808
809           Option named    Specifies                Default value
810
811           conj            Final conjunction        "and"
812           sep             Inter-item separator     ","
813           last_sep        Final separator          value of 'sep' option
814

INTERPOLATING INFLECTIONS IN STRINGS

816       By far the commonest use of the inflection subroutines is to produce
817       message strings for various purposes. For example:
818
819               print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n";
820               print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
821                       if $severity > 1;
822
823       Unfortunately the need to separate each subroutine call detracts
824       significantly from the readability of the resulting code. To ameliorate
825       this problem, Lingua::EN::Inflect provides an exportable string-
826       interpolating subroutine (inflect($)), which recognizes calls to the
827       various inflection subroutines within a string and interpolates them
828       appropriately.
829
830       Using "inflect" the previous example could be rewritten:
831
832               print inflect "NUM($errors) PL_N(error) PL_V(was) detected.\n";
833               print inflect "PL_ADJ(This) PL_N(error) PL_V(was) fatal.\n"
834                       if $severity > 1;
835
836       Note that "inflect" also correctly handles calls to the NUM()
837       subroutine (whether interpolated or antecedent). The inflect()
838       subroutine has a related extra feature, in that it automatically
839       cancels any "default number" value before it returns its interpolated
840       string. This means that calls to NUM() which are embedded in an
841       inflect()-interpolated string do not "escape" and interfere with
842       subsequent inflections.
843

MODERN VS CLASSICAL INFLECTIONS

845       Certain words, mainly of Latin or Ancient Greek origin, can form
846       plurals either using the standard English "-s" suffix, or with their
847       original Latin or Greek inflections. For example:
848
849               PL("stigma")            # -> "stigmas" or "stigmata"
850               PL("torus")             # -> "toruses" or "tori"
851               PL("index")             # -> "indexes" or "indices"
852               PL("millennium")        # -> "millenniums" or "millennia"
853               PL("ganglion")          # -> "ganglions" or "ganglia"
854               PL("octopus")           # -> "octopuses" or "octopodes"
855
856       Lingua::EN::Inflect caters to such words by providing an "alternate
857       state" of inflection known as "classical mode".  By default, words are
858       inflected using their contemporary English plurals, but if classical
859       mode is invoked, the more traditional plural forms are returned
860       instead.
861
862       The exportable subroutine classical() controls this feature.  If
863       classical() is called with no arguments, it unconditionally invokes
864       classical mode. If it is called with a single argument, it turns all
865       classical inflects on or off (depending on whether the argument is true
866       or false). If called with two or more arguments, those arguments
867       specify which aspects of classical behaviour are to be used.
868
869       Thus:
870
871               classical;                  # SWITCH ON CLASSICAL MODE
872               print PL("formula");        # -> "formulae"
873
874               classical 0;                # SWITCH OFF CLASSICAL MODE
875               print PL("formula");        # -> "formulas"
876
877               classical $cmode;           # CLASSICAL MODE IFF $cmode
878               print PL("formula");        # -> "formulae" (IF $cmode)
879                                           # -> "formulas" (OTHERWISE)
880
881               classical herd=>1;          # SWITCH ON CLASSICAL MODE FOR "HERD" NOUNS
882               print PL("wilderbeest");    # -> "wilderbeest"
883
884               classical names=>1;         # SWITCH ON CLASSICAL MODE FOR NAMES
885               print PL("sally");          # -> "sallies"
886               print PL("Sally");          # -> "Sallys"
887
888       Note however that classical() has no effect on the inflection of words
889       which are now fully assimilated. Hence:
890
891               PL("forum")             # ALWAYS -> "forums"
892               PL("criterion")         # ALWAYS -> "criteria"
893
894       LEI assumes that a capitalized word is a person's name. So it forms the
895       plural according to the rules for names (which is that you don't
896       inflect, you just add -s or -es). You can choose to turn that behaviour
897       off (it's on by the default, even when the module isn't in classical
898       mode) by calling classical(names=>0).
899

USER-DEFINED INFLECTIONS

901   Adding plurals at run-time
902       Lingua::EN::Inflect provides five exportable subroutines which allow
903       the programmer to override the module's behaviour for specific cases:
904
905       def_noun($$)
906               The "def_noun" subroutine takes a pair of string arguments: the
907               singular and plural forms of the noun being specified. The
908               singular form specifies a pattern to be interpolated (as
909               "m/^(?:$first_arg)$/i").  Any noun matching this pattern is
910               then replaced by the string in the second argument. The second
911               argument specifies a string which is interpolated after the
912               match succeeds, and is then used as the plural form. For
913               example:
914
915                     def_noun  'cow'        => 'kine';
916                     def_noun  '(.+i)o'     => '$1i';
917                     def_noun  'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!';
918
919               Note that both arguments should usually be specified in single
920               quotes, so that they are not interpolated when they are
921               specified, but later (when words are compared to them). As
922               indicated by the last example, care also needs to be taken with
923               certain characters in the second argument, to ensure that they
924               are not unintentionally interpolated during comparison.
925
926               The second argument string may also specify a second variant of
927               the plural form, to be used when "classical" plurals have been
928               requested. The beginning of the second variant is marked by a
929               '|' character:
930
931                     def_noun  'cow'        => 'cows|kine';
932                     def_noun  '(.+i)o'     => '$1os|$1i';
933                     def_noun  'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!|varmints';
934
935               If no classical variant is given, the specified plural form is
936               used in both normal and "classical" modes.
937
938               If the second argument is "undef" instead of a string, then the
939               current user definition for the first argument is removed, and
940               the standard plural inflection(s) restored.
941
942               Note that in all cases, later plural definitions for a
943               particular singular form replace earlier definitions of the
944               same form. For example:
945
946                     # FIRST, HIDE THE MODERN FORM....
947                     def_noun  'aviatrix' => 'aviatrices';
948
949                     # LATER, HIDE THE CLASSICAL FORM...
950                     def_noun  'aviatrix' => 'aviatrixes';
951
952                     # FINALLY, RESTORE THE DEFAULT BEHAVIOUR...
953                     def_noun  'aviatrix' => undef;
954
955               Special care is also required when defining general patterns
956               and associated specific exceptions: put the more specific cases
957               after the general pattern. For example:
958
959                     def_noun  '(.+)us' => '$1i';      # EVERY "-us" TO "-i"
960                     def_noun  'bus'    => 'buses';    # EXCEPT FOR "bus"
961
962               This "try-most-recently-defined-first" approach to matching
963               user-defined words is also used by "def_verb", "def_a" and
964               "def_an".
965
966       def_verb($$$$$$)
967               The "def_verb" subroutine takes three pairs of string arguments
968               (that is, six arguments in total), specifying the singular and
969               plural forms of the three "persons" of verb. As with
970               "def_noun", the singular forms are specifications of run-time-
971               interpolated patterns, whilst the plural forms are
972               specifications of (up to two) run-time-interpolated strings:
973
974                      def_verb 'am'       => 'are',
975                               'are'      => 'are|art',
976                               'is'       => 'are';
977
978                      def_verb 'have'     => 'have',
979                               'have'     => 'have',
980                               'ha(s|th)' => 'have';
981
982               Note that as with "def_noun", modern/classical variants of
983               plurals may be separately specified, subsequent definitions
984               replace previous ones, and "undef"'ed plural forms revert to
985               the standard behaviour.
986
987       def_adj($$)
988               The "def_adj" subroutine takes a pair of string arguments,
989               which specify the singular and plural forms of the adjective
990               being defined.  As with "def_noun" and "def_adj", the singular
991               forms are specifications of run-time-interpolated patterns,
992               whilst the plural forms are specifications of (up to two) run-
993               time-interpolated strings:
994
995                      def_adj  'this'     => 'these',
996                      def_adj  'red'      => 'red|gules',
997
998               As previously, modern/classical variants of plurals may be
999               separately specified, subsequent definitions replace previous
1000               ones, and "undef"'ed plural forms revert to the standard
1001               behaviour.
1002
1003       def_a($) and def_an($)
1004               The "def_a" and "def_an" subroutines each take a single
1005               argument, which specifies a pattern. If a word passed to A() or
1006               AN() matches this pattern, it will be prefixed
1007               (unconditionally) with the corresponding indefinite article.
1008               For example:
1009
1010                     def_a  'error';
1011                     def_a  'in.+';
1012
1013                     def_an 'mistake';
1014                     def_an 'error';
1015
1016               As with the other "def_..." subroutines, such redefinitions are
1017               sequential in effect so that, after the above example, "error"
1018               will be inflected with "an".
1019
1020   The $HOME/.inflectrc file
1021       When it is imported, Lingua::EN::Inflect executes (as Perl code) the
1022       contents of any file named .inflectrc which it finds in the in the
1023       directory where Lingua/EN/Inflect.pm is installed, or in the current
1024       home directory ($ENV{HOME}), or in both.  Note that the code is
1025       executed within the Lingua::EN::Inflect namespace.
1026
1027       Hence the user or the local Perl guru can make appropriate calls to
1028       "def_noun", "def_verb", etc. in one of these .inflectrc files, to
1029       permanently and universally modify the behaviour of the module. For
1030       example
1031
1032             > cat /usr/local/lib/perl5/Text/Inflect/.inflectrc
1033
1034             def_noun  "UNIX"  => "UN*X|UNICES";
1035
1036             def_verb  "teco"  => "teco",      # LITERALLY: "to edit with TECO"
1037                       "teco"  => "teco",
1038                       "tecos" => "teco";
1039
1040             def_a     "Euler.*";              # "Yewler" TURNS IN HIS GRAVE
1041
1042       Note that calls to the "def_..." subroutines from within a program will
1043       take precedence over the contents of the home directory .inflectrc
1044       file, which in turn takes precedence over the system-wide .inflectrc
1045       file.
1046

DIAGNOSTICS

1048       On loading, if the Perl code in a .inflectrc file is invalid
1049       (syntactically or otherwise), an appropriate fatal error is issued.  A
1050       common problem is not ending the file with something that evaluates to
1051       true (as the five "def_..." subroutines do).
1052
1053       Using the five "def_..." subroutines directly in a program may also
1054       result in fatal diagnostics, if a (singular) pattern or an interpolated
1055       (plural) string is somehow invalid.
1056
1057       Specific diagnostics related to user-defined inflections are:
1058
1059       "Bad user-defined singular pattern:\n\t %s"
1060               The singular form of a user-defined noun or verb (as defined by
1061               a call to "def_noun", "def_verb", "def_adj", "def_a" or
1062               "def_an") is not a valid Perl regular expression. The actual
1063               Perl error message is also given.
1064
1065       "Bad user-defined plural string: '%s'"
1066               The plural form(s) of a user-defined noun or verb (as defined
1067               by a call to "def_noun", "def_verb" or "def_adj") is not a
1068               valid Perl interpolated string (usually because it interpolates
1069               some undefined variable).
1070
1071       "Bad .inflectrc file (%s):\n %s"
1072               Some other problem occurred in loading the named local or
1073               global .inflectrc file. The Perl error message (including the
1074               line number) is also given.
1075
1076       There are no diagnosable run-time error conditions for the actual
1077       inflection subroutines, except "NUMWORDS" and hence no run-time
1078       diagnostics. If the inflection subroutines are unable to form a plural
1079       via a user-definition or an inbuilt rule, they just "guess" the
1080       commonest English inflection: adding "-s" for nouns, removing "-s" for
1081       verbs, and no inflection for adjectives.
1082
1083       Lingua::EN::Inflect::NUMWORDS() can "die" with the following messages:
1084
1085       "Bad grouping option: %s"
1086               The optional argument to NUMWORDS() wasn't 1, 2 or 3.
1087
1088       "Number out of range"
1089               NUMWORDS() was passed a number larger than the number
1090               represented by 3006 consecutive nines. The words representing
1091               that number are 63,681 characters long, including commas and
1092               spaces. If you're interested in the actual value, see
1093               t/numwords.t.
1094
1095               The reference for the names is
1096               http://en.wikipedia.org/wiki/Names_of_large_numbers
1097
1098               There are no names for any higher numbers.
1099

OTHER ISSUES

1101   2nd Person precedence
1102       If a verb has identical 1st and 2nd person singular forms, but
1103       different 1st and 2nd person plural forms, then when its plural is
1104       constructed, the 2nd person plural form is always preferred.
1105
1106       The author is not currently aware of any such verbs in English, but is
1107       not quite arrogant enough to assume ipso facto that none exist.
1108
1109   Nominative precedence
1110       The singular pronoun "it" presents a special problem because its plural
1111       form can vary, depending on its "case". For example:
1112
1113               It ate my homework       ->  They ate my homework
1114               It ate it                ->  They ate them
1115               I fed my homework to it  ->  I fed my homework to them
1116
1117       As a consequence of this ambiguity, PL() or "PL_N" have been
1118       implemented so that they always return the nominative plural (that is,
1119       "they").
1120
1121       However, when asked for the plural of an unambiguously accusative "it"
1122       (namely, "PL("to it")", "PL_N("from it")", "PL("with it")", etc.), both
1123       subroutines will correctly return the accusative plural ("to them",
1124       "from them", "with them", etc.)
1125
1126   The plurality of zero
1127       The rules governing the choice between:
1128
1129             There were no errors.
1130
1131       and
1132
1133             There was no error.
1134
1135       are complex and often depend more on intent rather than content.  Hence
1136       it is infeasible to specify such rules algorithmically.
1137
1138       Therefore, Lingua::EN::Text contents itself with the following
1139       compromise: If the governing number is zero, inflections always return
1140       the plural form unless the appropriate "classical" inflection is in
1141       effect, in which case the singular form is always returned.
1142
1143       Thus, the sequence:
1144
1145             NUM(0);
1146             print inflect "There PL(was) NO(choice)";
1147
1148       produces "There were no choices", whereas:
1149
1150             classical 'zero';     # or: classical(zero=>1);
1151             NUM(0);
1152             print inflect "There PL(was) NO(choice)";
1153
1154       it will print "There was no choice".
1155
1156   Homographs with heterogeneous plurals
1157       Another context in which intent (and not content) sometimes determines
1158       plurality is where two distinct meanings of a word require different
1159       plurals. For example:
1160
1161             Three basses were stolen from the band's equipment trailer.
1162             Three bass were stolen from the band's aquarium.
1163
1164             I put the mice next to the cheese.
1165             I put the mouses next to the computers.
1166
1167             Several thoughts about leaving crossed my mind.
1168             Several thought about leaving across my lawn.
1169
1170       Lingua::EN::Inflect handles such words in two ways:
1171
1172       •       If both meanings of the word are the same part of speech (for
1173               example, "bass" is a noun in both sentences above), then one
1174               meaning is chosen as the "usual" meaning, and only that
1175               meaning's plural is ever returned by any of the inflection
1176               subroutines.
1177
1178       •       If each meaning of the word is a different part of speech (for
1179               example, "thought" is both a noun and a verb), then the noun's
1180               plural is returned by PL() and PL_N() and the verb's plural is
1181               returned only by PL_V().
1182
1183       Such contexts are, fortunately, uncommon (particularly "same-part-of-
1184       speech" examples). An informal study of nearly 600 "difficult plurals"
1185       indicates that PL() can be relied upon to "get it right" about 98% of
1186       the time (although, of course, ichthyophilic guitarists or cyber-
1187       behaviouralists may experience higher rates of confusion).
1188
1189       If the choice of a particular "usual inflection" is considered
1190       inappropriate, it can always be reversed with a preliminary call to the
1191       corresponding "def_..." subroutine.
1192

NOTE

1194       I'm not taking any further correspondence on:
1195
1196       "octopi".
1197           Despite the populist pandering of certain New World dictionaries,
1198           the plural is "octopuses" or (for the pedantic classicist)
1199           "octopodes". The suffix "-pus" is Greek, not Latin, so the plural
1200           is "-podes", not "pi".
1201
1202       "virus".
1203           Had no plural in Latin (possibly because it was a mass noun).  The
1204           only plural is the Anglicized "viruses".
1205

AUTHOR

1207       Damian Conway (damian@conway.org)
1208

BUGS AND IRRITATIONS

1210       The endless inconsistencies of English.
1211
1212       (Please report words for which the correct plural or indefinite article
1213       is not formed, so that the reliability of Lingua::EN::Inflect can be
1214       improved.)
1215
1217        Copyright (c) 1997-2009, Damian Conway. All Rights Reserved.
1218        This module is free software. It may be used, redistributed
1219            and/or modified under the same terms as Perl itself.
1220
1221
1222
1223perl v5.36.0                      2023-01-20            Lingua::EN::Inflect(3)
Impressum