1Lingua::EN::Inflect(3)User Contributed Perl DocumentationLingua::EN::Inflect(3)
2
3
4

NAME

6       Lingua::EN::Inflect - Convert singular to plural. Select "a" or "an".
7

VERSION

9       This document describes version 1.892 of Lingua::EN::Inflect
10

SYNOPSIS

12        use Lingua::EN::Inflect qw ( PL PL_N PL_V PL_ADJ NO NUM
13                         PL_eq PL_N_eq PL_V_eq PL_ADJ_eq
14                         A AN
15                         PART_PRES
16                         ORD NUMWORDS
17                         WORDLIST
18                         inflect classical
19                         def_noun def_verb def_adj def_a def_an );
20
21
22        # UNCONDITIONALLY FORM THE PLURAL
23
24             print "The plural of ", $word, " is ", PL($word), "\n";
25
26
27        # CONDITIONALLY FORM THE PLURAL
28
29             print "I saw $cat_count ", PL("cat",$cat_count), "\n";
30
31
32        # FORM PLURALS FOR SPECIFIC PARTS OF SPEECH
33
34             print PL_N("I",$N1), PL_V("saw",$N1),
35               PL_ADJ("my",$N2), PL_N("saw",$N2), "\n";
36
37
38        # DEAL WITH "0/1/N" -> "no/1/N" TRANSLATION:
39
40             print "There ", PL_V("was",$errors), NO(" error",$errors), "\n";
41
42
43        # USE DEFAULT COUNTS:
44
45             print NUM($N1,""), PL("I"), PL_V(" saw"), NUM($N2), PL_N(" saw");
46             print "There ", NUM($errors,''), PL_V("was"), NO(" error"), "\n";
47
48
49        # COMPARE TWO WORDS "NUMBER-INSENSITIVELY":
50
51             print "same\n"      if PL_eq($word1, $word2);
52             print "same noun\n" if PL_N_eq($word1, $word2);
53             print "same verb\n" if PL_V_eq($word1, $word2);
54             print "same adj.\n" if PL_ADJ_eq($word1, $word2);
55
56
57        # ADD CORRECT "a" OR "an" FOR A GIVEN WORD:
58
59             print "Did you want ", A($thing), " or ", AN($idea), "\n";
60
61
62        # CONVERT NUMERALS INTO ORDINALS (i.e. 1->1st, 2->2nd, 3->3rd, etc.)
63
64             print "It was", ORD($position), " from the left\n";
65
66        # CONVERT NUMERALS TO WORDS (i.e. 1->"one", 101->"one hundred and one", etc.)
67        # IN A SCALAR CONTEXT: GET BACK A SINGLE STRING...
68
69           $words = NUMWORDS(1234);      # "one thousand, two hundred and thirty-four"
70           $words = NUMWORDS(ORD(1234)); # "one thousand, two hundred and thirty-fourth"
71
72
73        # IN A LIST CONTEXT: GET BACK A LIST OF STRINGSi, ONE FOR EACH "CHUNK"...
74
75           @words = NUMWORDS(1234);    # ("one thousand","two hundred and thirty-four")
76
77
78        # OPTIONAL PARAMETERS CHANGE TRANSLATION:
79
80           $words = NUMWORDS(12345, group=>1);
81                       # "one, two, three, four, five"
82
83           $words = NUMWORDS(12345, group=>2);
84                       # "twelve, thirty-four, five"
85
86           $words = NUMWORDS(12345, group=>3);
87                       # "one twenty-three, forty-five"
88
89           $words = NUMWORDS(1234, 'and'=>'');
90                       # "one thousand, two hundred thirty-four"
91
92           $words = NUMWORDS(1234, 'and'=>', plus');
93                       # "one thousand, two hundred, plus thirty-four"
94
95           $words = NUMWORDS(555_1202, group=>1, zero=>'oh');
96                       # "five, five, five, one, two, oh, two"
97
98           $words = NUMWORDS(555_1202, group=>1, one=>'unity');
99                       # "five, five, five, unity, two, oh, two"
100
101           $words = NUMWORDS(123.456, group=>1, decimal=>'mark');
102                       # "one two three mark four five six"
103
104       # LITERAL STYLE ONLY NAMES NUMBERS LESS THAN A CERTAIN THRESHOLD...
105
106           $words = NUMWORDS(   9, threshold=>10);    # "nine"
107           $words = NUMWORDS(  10, threshold=>10);    # "ten"
108           $words = NUMWORDS(  11, threshold=>10);    # "11"
109           $words = NUMWORDS(1000, threshold=>10);    # "1,000"
110
111        # JOIN WORDS INTO A LIST:
112
113           $list = WORDLIST("apple", "banana", "carrot");
114                       # "apple, banana, and carrot"
115
116           $list = WORDLIST("apple", "banana");
117                       # "apple and banana"
118
119           $list = WORDLIST("apple", "banana", "carrot", {final_sep=>""});
120                       # "apple, banana and carrot"
121
122
123        # REQUIRE "CLASSICAL" PLURALS (EG: "focus"->"foci", "cherub"->"cherubim")
124
125             classical;          # USE ALL CLASSICAL PLURALS
126
127             classical 1;           #  USE ALL CLASSICAL PLURALS
128             classical 0;           #  USE ALL MODERN PLURALS (DEFAULT)
129
130             classical 'zero';      #  "no error" INSTEAD OF "no errors"
131             classical zero=>1;     #  "no error" INSTEAD OF "no errors"
132             classical zero=>0;     #  "no errors" INSTEAD OF "no error"
133
134             classical 'herd';      #  "2 buffalo" INSTEAD OF "2 buffalos"
135             classical herd=>1;     #  "2 buffalo" INSTEAD OF "2 buffalos"
136             classical herd=>0;     #  "2 buffalos" INSTEAD OF "2 buffalo"
137
138             classical 'persons';   # "2 chairpersons" INSTEAD OF "2 chairpeople"
139             classical persons=>1;  # "2 chairpersons" INSTEAD OF "2 chairpeople"
140             classical persons=>0;  # "2 chairpeople" INSTEAD OF "2 chairpersons"
141
142             classical 'ancient';   # "2 formulae" INSTEAD OF "2 formulas"
143             classical ancient=>1;  # "2 formulae" INSTEAD OF "2 formulas"
144             classical ancient=>0;  # "2 formulas" INSTEAD OF "2 formulae"
145
146
147
148        # INTERPOLATE "PL()", "PL_N()", "PL_V()", "PL_ADJ()", A()", "AN()"
149        # "NUM()" AND "ORD()" WITHIN STRINGS:
150
151             print inflect("The plural of $word is PL($word)\n");
152             print inflect("I saw $cat_count PL("cat",$cat_count)\n");
153             print inflect("PL(I,$N1) PL_V(saw,$N1) PL(a,$N2) PL_N(saw,$N2)");
154             print inflect("NUM($N1,)PL(I) PL_V(saw) NUM($N2,)PL(a) PL_N(saw)");
155             print inflect("I saw NUM($cat_count) PL("cat")\nNUM()");
156             print inflect("There PL_V(was,$errors) NO(error,$errors)\n");
157             print inflect("There NUM($errors,) PL_V(was) NO(error)\n";
158             print inflect("Did you want A($thing) or AN($idea)\n");
159             print inflect("It was ORD($position) from the left\n");
160
161
162        # ADD USER-DEFINED INFLECTIONS (OVERRIDING INBUILT RULES):
163
164             def_noun  "VAX"  => "VAXen";  # SINGULAR => PLURAL
165
166             def_verb  "will" => "shall",  # 1ST PERSON SINGULAR => PLURAL
167                       "will" => "will",   # 2ND PERSON SINGULAR => PLURAL
168                       "will" => "will",   # 3RD PERSON SINGULAR => PLURAL
169
170             def_adj   "hir"  => "their",  # SINGULAR => PLURAL
171
172             def_a "h"         # "AY HALWAYS SEZ 'HAITCH'!"
173
174             def_an    "horrendous.*"      # "AN HORRENDOUS AFFECTATION"
175

DESCRIPTION

177       The exportable subroutines of Lingua::EN::Inflect provide plural
178       inflections, "a"/"an" selection for English words, and manipulation of
179       numbers as words
180
181       Plural forms of all nouns, most verbs, and some adjectives are
182       provided. Where appropriate, "classical" variants (for example:
183       "brother" -> "brethren", "dogma" -> "dogmata", etc.) are also provided.
184
185       Pronunciation-based "a"/"an" selection is provided for all English
186       words, and most initialisms.
187
188       It is also possible to inflect numerals (1,2,3) to ordinals (1st, 2nd,
189       3rd) and to english words ("one", "two", "three).
190
191       In generating these inflections, Lingua::EN::Inflect follows the Oxford
192       English Dictionary and the guidelines in Fowler's Modern English Usage,
193       preferring the former where the two disagree.
194
195       The module is built around standard British spelling, but is designed
196       to cope with common American variants as well. Slang, jargon, and other
197       English dialects are not explicitly catered for.
198
199       Where two or more inflected forms exist for a single word (typically a
200       "classical" form and a "modern" form), Lingua::EN::Inflect prefers the
201       more common form (typically the "modern" one), unless "classical"
202       processing has been specified (see "MODERN VS CLASSICAL INFLECTIONS").
203

FORMING PLURALS

205   Inflecting Plurals
206       All of the "PL_..." plural inflection subroutines take the word to be
207       inflected as their first argument and return the corresponding
208       inflection.  Note that all such subroutines expect the singular form of
209       the word. The results of passing a plural form are undefined (and
210       unlikely to be correct).
211
212       The "PL_..." subroutines also take an optional second argument, which
213       indicates the grammatical "number" of the word (or of another word with
214       which the word being inflected must agree). If the "number" argument is
215       supplied and is not 1 (or "one" or "a", or some other adjective that
216       implies the singular), the plural form of the word is returned. If the
217       "number" argument does indicate singularity, the (uninflected) word
218       itself is returned. If the number argument is omitted, the plural form
219       is returned unconditionally.
220
221       The various subroutines are:
222
223       "PL_N($;$)"
224               The exportable subroutine "PL_N()" takes a singular English
225               noun or pronoun and returns its plural. Pronouns in the
226               nominative ("I" -> "we") and accusative ("me" -> "us") cases
227               are handled, as are possessive pronouns ("mine" -> "ours").
228
229       "PL_V($;$)"
230               The exportable subroutine "PL_V()" takes the singular form of a
231               conjugated verb (that is, one which is already in the correct
232               "person" and "mood") and returns the corresponding plural
233               conjugation.
234
235       "PL_ADJ($;$)"
236               The exportable subroutine "PL_ADJ()" takes the singular form of
237               certain types of adjectives and returns the corresponding
238               plural form.  Adjectives that are correctly handled include:
239               "numerical" adjectives ("a" -> "some"), demonstrative
240               adjectives ("this" -> "these", "that" -> "those"), and
241               possessives ("my" -> "our", "cat's" -> "cats'", "child's" ->
242               "childrens'", etc.)
243
244       "PL($;$)"
245               The exportable subroutine "PL()" takes a singular English noun,
246               pronoun, verb, or adjective and returns its plural form. Where
247               a word has more than one inflection depending on its part of
248               speech (for example, the noun "thought" inflects to "thoughts",
249               the verb "thought" to "thought"), the (singular) noun sense is
250               preferred to the (singular) verb sense.
251
252               Hence "PL("knife")" will return "knives" ("knife" having been
253               treated as a singular noun), whereas "PL("knifes")" will return
254               "knife" ("knifes" having been treated as a 3rd person singular
255               verb).
256
257               The inherent ambiguity of such cases suggests that, where the
258               part of speech is known, "PL_N", "PL_V", and "PL_ADJ" should be
259               used in preference to "PL".
260
261       Note that all these subroutines ignore any whitespace surrounding the
262       word being inflected, but preserve that whitespace when the result is
263       returned. For example, "PL(" cat  ")" returns " cats  ".
264
265   Numbered plurals
266       The "PL_..." subroutines return only the inflected word, not the count
267       that was used to inflect it. Thus, in order to produce "I saw 3 ducks",
268       it is necessary to use:
269
270           print "I saw $N ", PL_N($animal,$N), "\n";
271
272       Since the usual purpose of producing a plural is to make it agree with
273       a preceding count, Lingua::EN::Inflect provides an exportable
274       subroutine ("NO($;$)") which, given a word and a(n optional) count,
275       returns the count followed by the correctly inflected word. Hence the
276       previous example can be rewritten:
277
278           print "I saw ", NO($animal,$N), "\n";
279
280       In addition, if the count is zero (or some other term which implies
281       zero, such as "zero", "nil", etc.) the count is replaced by the word
282       "no". Hence, if $N had the value zero, the previous example would print
283       the somewhat more elegant:
284
285           I saw no animals
286
287       rather than:
288
289           I saw 0 animals
290
291       Note that the name of the subroutine is a pun: the subroutine returns
292       either a number (a No.) or a "no", in front of the inflected word.
293
294   Reducing the number of counts required
295       In some contexts, the need to supply an explicit count to the various
296       "PL_..." subroutines makes for tiresome repetition. For example:
297
298           print PL_ADJ("This",$errors), PL_N(" error",$errors),
299                 PL_V(" was",$errors), " fatal.\n";
300
301       Lingua::EN::Inflect therefore provides an exportable subroutine
302       ("NUM($;$)") which may be used to set a persistent "default number"
303       value. If such a value is set, it is subsequently used whenever an
304       optional second "number" argument is omitted. The default value thus
305       set can subsequently be removed by calling "NUM()" with no arguments.
306       Hence we could rewrite the previous example:
307
308           NUM($errors);
309           print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n";
310           NUM();
311
312       Normally, "NUM()" returns its first argument, so that it may also be
313       "inlined" in contexts like:
314
315           print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n"
316           print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
317               if $severity > 1;
318
319       However, in certain contexts (see "INTERPOLATING INFLECTIONS IN
320       STRINGS") it is preferable that "NUM()" return an empty string. Hence
321       "NUM()" provides an optional second argument. If that argument is
322       supplied (that is, if it is defined) and evaluates to false, "NUM"
323       returns an empty string instead of its first argument. For example:
324
325           print NUM($errors,0), NO("error"), PL_V(" was"), " detected.\n";
326           print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
327               if $severity > 1;
328
329   Number-insensitive equality
330       Lingua::EN::Inflect also provides a solution to the problem of
331       comparing words of differing plurality through the exportable
332       subroutines "PL_eq($$)", "PL_N_eq($$)", "PL_V_eq($$)", and
333       "PL_ADJ_eq($$)".  Each  of these subroutines takes two strings, and
334       compares them using the corresponding plural-inflection subroutine
335       ("PL()", "PL_N()", "PL_V()", and "PL_ADJ()" respectively).
336
337       The comparison returns true if:
338
339       ·       the strings are "eq"-equal, or
340
341       ·       one string is "eq"-equal to a plural form of the other, or
342
343       ·       the strings are two different plural forms of the one word.
344
345       Hence all of the following return true:
346
347           PL_eq("index","index")      # RETURNS "eq"
348           PL_eq("index","indexes")    # RETURNS "s:p"
349           PL_eq("index","indices")    # RETURNS "s:p"
350           PL_eq("indexes","index")    # RETURNS "p:s"
351           PL_eq("indices","index")    # RETURNS "p:s"
352           PL_eq("indices","indexes")  # RETURNS "p:p"
353           PL_eq("indexes","indices")  # RETURNS "p:p"
354           PL_eq("indices","indices")  # RETURNS "eq"
355
356       As indicated by the comments in the previous example, the actual value
357       returned by the various "PL_eq" subroutines encodes which of the three
358       equality rules succeeded: "eq" is returned if the strings were
359       identical, "s:p" if the strings were singular and plural respectively,
360       "p:s" for plural and singular, and "p:p" for two distinct plurals.
361       Inequality is indicated by returning an empty string.
362
363       It should be noted that two distinct singular words which happen to
364       take the same plural form are not considered equal, nor are cases where
365       one (singular) word's plural is the other (plural) word's singular.
366       Hence all of the following return false:
367
368           PL_eq("base","basis")       # ALTHOUGH BOTH -> "bases"
369           PL_eq("syrinx","syringe")   # ALTHOUGH BOTH -> "syringes"
370           PL_eq("she","he")       # ALTHOUGH BOTH -> "they"
371
372           PL_eq("opus","operas")      # ALTHOUGH "opus" -> "opera" -> "operas"
373           PL_eq("taxi","taxes")       # ALTHOUGH "taxi" -> "taxis" -> "taxes"
374
375       Note too that, although the comparison is "number-insensitive" it is
376       not case-insensitive (that is, "PL("time","Times")" returns false. To
377       obtain both number and case insensitivity, prefix both arguments with
378       "lc" (that is, "PL(lc "time", lc "Times")" returns true).
379

OTHER VERB FORMS

381   Present participles
382       "Lingua::EN::Inflect" also provides the "PART_PRES" subroutine, which
383       can take a 3rd person singular verb and correctly inflect it to its
384       present participle:
385
386           PART_PRES("runs")   # "running"
387           PART_PRES("loves")  # "loving"
388           PART_PRES("eats")   # "eating"
389           PART_PRES("bats")   # "batting"
390           PART_PRES("spies")  # "spying"
391

PROVIDING INDEFINITE ARTICLES

393   Selecting indefinite articles
394       Lingua::EN::Inflect provides two exportable subroutines ("A($;$)" and
395       "AN($;$)") which will correctly prepend the appropriate indefinite
396       article to a word, depending on its pronunciation. For example:
397
398           A("cat")        # -> "a cat"
399           AN("cat")       # -> "a cat"
400           A("euphemism")      # -> "a euphemism"
401           A("Euler number")   # -> "an Euler number"
402           A("hour")       # -> "an hour"
403           A("houri")      # -> "a houri"
404
405       The two subroutines are identical in function and may be used
406       interchangeably. The only reason that two versions are provided is to
407       enhance the readability of code such as:
408
409           print "That is ", AN($errortype), " error\n;
410           print "That is ", A($fataltype), " fatal error\n;
411
412       Note that in both cases the actual article provided depends only on the
413       pronunciation of the first argument, not on the name of the subroutine.
414
415       "A()" and "AN()" will ignore any indefinite article that already exists
416       at the start of the string. Thus:
417
418           @half_arked = (
419               "a elephant",
420               "a giraffe",
421               "an ewe",
422               "a orangutan",
423           );
424
425           print A($_), "\n" for @half_arked;
426
427           # prints:
428           #     an elephant
429           #     a giraffe
430           #     a ewe
431           #     an orangutan
432
433       "A()" and "AN()" both take an optional second argument. As with the
434       "PL_..." subroutines, this second argument is a "number" specifier. If
435       its value is 1 (or some other value implying singularity), "A()" and
436       "AN()" insert "a" or "an" as appropriate. If the number specifier
437       implies plurality, ("A()" and "AN()" insert the actual second argument
438       instead.  For example:
439
440           A("cat",1)      # -> "a cat"
441           A("cat",2)      # -> "2 cat"
442           A("cat","one")      # -> "one cat"
443           A("cat","no")       # -> "no cat"
444
445       Note that, as implied by the previous examples, "A()" and "AN()" both
446       assume that their job is merely to provide the correct qualifier for a
447       word (that is: "a", "an", or the specified count).  In other words,
448       they assume that the word they are given has already been correctly
449       inflected for plurality. Hence, if $N has the value 2, then:
450
451             print A("cat",$N);
452
453       prints "2 cat", instead of "2 cats". The correct approach is to use:
454
455             print A(PL("cat",$N),$N);
456
457       or, better still:
458
459             print NO("cat",$N);
460
461       Note too that, like the various "PL_..." subroutines, whenever "A()"
462       and "AN()" are called with only one argument they are subject to the
463       effects of any preceding call to "NUM()". Hence, another possible
464       solution is:
465
466             NUM($N);
467             print A(PL("cat"));
468
469   Indefinite articles and initialisms
470       "Initialisms" (sometimes inaccurately called "acronyms") are terms
471       which have been formed from the initial letters of words in a phrase
472       (for example, "NATO", "NBL", "S.O.S.", "SCUBA", etc.)
473
474       Such terms present a particular challenge when selecting between "a"
475       and "an", since they are sometimes pronounced as if they were a single
476       word ("nay-tow", "sku-ba") and sometimes as a series of letter names
477       ("en-eff-ell", "ess-oh-ess").
478
479       "A()" and "AN()" cope with this dichotomy using a series of inbuilt
480       rules, which may be summarized as:
481
482       1.      If the word starts with a single letter, followed by a period
483               or dash (for example, "R.I.P.", "C.O.D.", "e-mail", "X-ray",
484               "T-square"), then choose the appropriate article for the sound
485               of the first letter ("an R.I.P.", "a C.O.D.", "an e-mail", "an
486               X-ray", "a T-square").
487
488       2.      If the first two letters of the word are capitals, consonants,
489               and do not appear at the start of any known English word, (for
490               example, "LCD", "XML", "YWCA"), then once again choose "a" or
491               "an" depending on the sound of the first letter ("an LCD", "an
492               XML", "a YWCA").
493
494       3.      Otherwise, assume the string is a capitalized word or a
495               pronounceable initialism (for example, "LED", "OPEC", "FAQ",
496               "UNESCO"), and therefore takes "a" or "an" according to the
497               (apparent) pronunciation of the entire word ("a LED", "an
498               OPEC", "a FAQ", "a UNESCO").
499
500       Note that rules 1 and 3 together imply that the presence or absence of
501       punctuation may change the selection of indefinite article for a
502       particular initialism (for example, "a FAQ" but "an F.A.Q.").
503
504   Indefinite articles and "soft H's"
505       Words beginning in the letter 'H' present another type of difficulty
506       when selecting a suitable indefinite article. In a few such words (for
507       example, "hour", "honour", "heir") the 'H' is not voiced at all, and so
508       such words inflect with "an". The remaining cases ("voiced H's") may be
509       divided into two categories: "hard H's" (such as "hangman",
510       "holograph", "hat", etc.) and "soft H's" (such as "hysterical",
511       "horrendous", "holy", etc.)
512
513       Hard H's always take "a" as their indefinite article, and soft H's
514       normally do so as well. But some English speakers prefer "an" for soft
515       H's (although the practice is now generally considered an affectation,
516       rather than a legitimate grammatical alternative).
517
518       At present, the "A()" and "AN()" subroutines ignore soft H's and use
519       "a" for any voiced 'H'. The author would, however, welcome feedback on
520       this decision (envisaging a possible future "soft H" mode).
521

INFLECTING ORDINALS

523       Occasionally it is useful to present an integer value as an ordinal
524       rather than as a numeral. For example:
525
526           Enter password (1st attempt): ********
527           Enter password (2nd attempt): *********
528           Enter password (3rd attempt): *********
529           No 4th attempt. Access denied.
530
531       To this end, Lingua::EN::Inflect provides the "ORD()" subroutine.
532       <ORD()> takes a single argument and forms its ordinal equivalent.  If
533       the argument isn't a numerical integer, it just adds "-th".
534

CONVERTING NUMBERS TO WORDS

536       The exportable subroutine "NUMWORDS" takes a number (cardinal or
537       ordinal) and returns an English representation of that number. In a
538       scalar context a string is returned. Hence:
539
540           use Lingua::EN::Inflect qw( NUMWORDS );
541
542           $words = NUMWORDS(1234567);
543
544       puts the string:
545
546           "one million, two hundred and thirty-four thousand, five hundred and sixty-seven"
547
548       into $words.
549
550       In a list context each comma-separated chunk is returned as a separate
551       element.  Hence:
552
553           @words = NUMWORDS(1234567);
554
555       puts the list:
556
557           ("one million",
558            "two hundred and thirty-four thousand",
559            "five hundred and sixty-seven")
560
561       into @words.
562
563       Non-digits (apart from an optional leading plus or minus sign, any
564       decimal points, and ordinal suffixes -- see below) are silently
565       ignored, so the following all produce identical results:
566
567               NUMWORDS(5551202);
568               NUMWORDS(5_551_202);
569               NUMWORDS("5,551,202");
570               NUMWORDS("555-1202");
571
572       That last case is a little awkward since it's almost certainly a phone
573       number, and "five million, five hundred and fifty-one thousand, two
574       hundred and two" probably isn't what's wanted.
575
576       To overcome this, "NUMWORDS()" takes an optional named argument,
577       'group', which changes how numbers are translated. The argument must be
578       a positive integer less than four, which indicated how the digits of
579       the number are to be grouped. If the argument is 1, then each digit is
580       translated separately. If the argument is 2, pairs of digits (starting
581       from the left) are grouped together. If the argument is 3, triples of
582       numbers (again, from the left) are grouped. Hence:
583
584               NUMWORDS("555-1202", group=>1)
585
586       returns "five, five, five, one, two, zero, two", whilst:
587
588               NUMWORDS("555-1202", group=>2)
589
590       returns "fifty-five, fifty-one, twenty, two", and:
591
592               NUMWORDS("555-1202", group=>3)
593
594       returns "five fifty-five, one twenty, two".
595
596       Phone numbers are often written in words as
597       "five..five..five..one..two..zero..two", which is also easy to achieve:
598
599               join '..', NUMWORDS("555-1202", group=>1)
600
601       "NUMWORDS" also handles decimal fractions. Hence:
602
603               NUMWORDS("1.2345")
604
605       returns "one point two three four five" in a scalar context and
606       "("one","point","two","three","four","five")") in an array context.
607       Exponent form ("1.234e56") is not yet handled.
608
609       Multiple decimal points are only translated in one of the "grouping"
610       modes.  Hence:
611
612               NUMWORDS(101.202.303)
613
614       returns "one hundred and one point two zero two three zero three",
615       whereas:
616
617               NUMWORDS(101.202.303, group=>1)
618
619       returns "one zero one point two zero two point three zero three".
620
621       The digit '0' is unusual in that in may be translated to English as
622       "zero", "oh", or "nought". To cater for this diversity, "NUMWORDS" may
623       be passed a named argument, 'zero', which may be set to the desired
624       translation of '0'. For example:
625
626               print join "..", NUMWORDS("555-1202", group=>3, zero=>'oh')
627
628       prints "five..five..five..one..two..oh..two".  By default, zero is
629       rendered as "zero".
630
631       Likewise, the digit '1' may be rendered as "one" or "a/an" (or very
632       occasionally other variants), depending on the context. So there is a
633       'one' argument as well:
634
635               print NUMWORDS($_, one=>'a solitary', zero=>'no more'),
636                     PL(" bottle of beer on the wall\n", $_)
637                          for (3,2,1,0);
638
639               # prints:
640               #     three bottles of beer on the wall
641               #     two bottles of beer on the wall
642               #     a solitary bottle of beer on the wall
643               #     no more bottles of beer on the wall
644
645       Care is needed if the word "a/an" is to be used as a 'one' value.
646       Unless the next word is known in advance, it's almost always necessary
647       to use the "A" function as well:
648
649               print A( NUMWORDS(1, one=>'a') . " $_\n")
650                for qw(cat aardvark ewe hour);
651
652           # prints:
653           #     a cat
654           #     an aardvark
655           #     a ewe
656           #     an hour
657
658       Another major regional variation in number translation is the use of
659       "and" in certain contexts. The named argument 'and' allows the
660       programmer to specify how "and" should be handled. Hence:
661
662               print scalar NUMWORDS("765", 'and'=>'')
663
664       prints "seven hundred sixty-five", instead of "seven hundred and sixty-
665       five".  By default, the "and" is included.
666
667       The translation of the decimal point is also subject to variation (with
668       "point", "dot", and "decimal" being the favorites).  The named argument
669       'decimal' allows the programmer to how the decimal point should be
670       rendered. Hence:
671
672               print scalar NUMWORDS("666.124.64.101", group=>3, decimal=>'dot')
673
674       prints "six sixty-six, dot, one twenty-four, dot, sixty-four, dot, one
675       zero one" By default, the decimal point is rendered as "point".
676
677       "NUMWORDS" also handles the ordinal forms of numbers. So:
678
679               print scalar NUMWORDS('1st');
680               print scalar NUMWORDS('3rd');
681               print scalar NUMWORDS('202nd');
682               print scalar NUMWORDS('1000000th');
683
684       print:
685
686               first
687               third
688               two hundred and twenty-second
689               one millionth
690
691       Two common idioms in this regard are:
692
693               print scalar NUMWORDS(ORD($number));
694
695       and:
696
697               print scalar ORD(NUMWORDS($number));
698
699       These are identical in effect, except when $number contains a decimal:
700
701               $number = 99.09;
702               print scalar NUMWORDS(ORD($number));    # ninety-ninth point zero nine
703               print scalar ORD(NUMWORDS($number));    # ninety-nine point zero ninth
704
705       Use whichever you feel is most appropriate.
706

CONVERTING LISTS OF WORDS TO PHRASES

708       When creating a list of words, commas are used between adjacent items,
709       except if the items contain commas, in which case semicolons are used.
710       But if there are less than two items, the commas/semicolons are omitted
711       entirely. The final item also has a conjunction (usually "and" or "or")
712       before it. And although it's technically incorrect (and sometimes
713       misleading), some people prefer to omit the comma before that final
714       conjunction, even when there are more than two items.
715
716       That's complicated enough to warrant its own subroutine: "WORDLIST()".
717       This subroutine expects a list of words, possibly with one or more hash
718       references containing options. It returns a string that joins the list
719       together in the normal English usage. For example:
720
721           print "You chose ", WORDLIST(@selected_items), "\n";
722           # You chose barley soup, roast beef, and Yorkshire pudding
723
724           print "You chose ", WORDLIST(@selected_items, {final_sep=>""}), "\n";
725           # You chose barley soup, roast beef and Yorkshire pudding
726
727           print "Please chose ", WORDLIST(@side_orders, {conj=>"or"}), "\n";
728           # Please chose salad, vegetables, or ice-cream
729
730       The available options are:
731
732           Option named    Specifies                Default value
733
734           conj            Final conjunction        "and"
735           sep             Inter-item separator     ","
736           last_sep        Final separator          value of 'sep' option
737

INTERPOLATING INFLECTIONS IN STRINGS

739       By far the commonest use of the inflection subroutines is to produce
740       message strings for various purposes. For example:
741
742               print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n";
743               print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
744                       if $severity > 1;
745
746       Unfortunately the need to separate each subroutine call detracts
747       significantly from the readability of the resulting code. To ameliorate
748       this problem, Lingua::EN::Inflect provides an exportable string-
749       interpolating subroutine (inflect($)), which recognizes calls to the
750       various inflection subroutines within a string and interpolates them
751       appropriately.
752
753       Using "inflect" the previous example could be rewritten:
754
755               print inflect "NUM($errors) PL_N(error) PL_V(was) detected.\n";
756               print inflect "PL_ADJ(This) PL_N(error) PL_V(was) fatal.\n"
757                       if $severity > 1;
758
759       Note that "inflect" also correctly handles calls to the "NUM()"
760       subroutine (whether interpolated or antecedent). The "inflect()"
761       subroutine has a related extra feature, in that it automatically
762       cancels any "default number" value before it returns its interpolated
763       string. This means that calls to "NUM()" which are embedded in an
764       "inflect()"-interpolated string do not "escape" and interfere with
765       subsequent inflections.
766

MODERN VS CLASSICAL INFLECTIONS

768       Certain words, mainly of Latin or Ancient Greek origin, can form
769       plurals either using the standard English "-s" suffix, or with their
770       original Latin or Greek inflections. For example:
771
772               PL("stigma")            # -> "stigmas" or "stigmata"
773               PL("torus")             # -> "toruses" or "tori"
774               PL("index")             # -> "indexes" or "indices"
775               PL("millennium")        # -> "millenniums" or "millennia"
776               PL("ganglion")          # -> "ganglions" or "ganglia"
777               PL("octopus")           # -> "octopuses" or "octopodes"
778
779       Lingua::EN::Inflect caters to such words by providing an "alternate
780       state" of inflection known as "classical mode".  By default, words are
781       inflected using their contemporary English plurals, but if classical
782       mode is invoked, the more traditional plural forms are returned
783       instead.
784
785       The exportable subroutine "classical()" controls this feature.  If
786       "classical()" is called with no arguments, it unconditionally invokes
787       classical mode. If it is called with a single argument, it turns all
788       classical inflects on or off (depending on whether the argument is true
789       or false). If called with two or more arguments, those arguments
790       specify which aspects of classical behaviour are to be used.
791
792       Thus:
793
794               classical;                  # SWITCH ON CLASSICAL MODE
795               print PL("formula");        # -> "formulae"
796
797               classical 0;                # SWITCH OFF CLASSICAL MODE
798               print PL("formula");        # -> "formulas"
799
800               classical $cmode;           # CLASSICAL MODE IFF $cmode
801               print PL("formula");        # -> "formulae" (IF $cmode)
802                                           # -> "formulas" (OTHERWISE)
803
804               classical herd=>1;          # SWITCH ON CLASSICAL MODE FOR "HERD" NOUNS
805               print PL("wilderbeest");    # -> "wilderbeest"
806
807               classical names=>1;         # SWITCH ON CLASSICAL MODE FOR NAMES
808               print PL("sally");          # -> "sallies"
809               print PL("Sally");          # -> "Sallys"
810
811       Note however that "classical()" has no effect on the inflection of
812       words which are now fully assimilated. Hence:
813
814               PL("forum")             # ALWAYS -> "forums"
815               PL("criterion")         # ALWAYS -> "criteria"
816
817       LEI assumes that a capitalized word is a person's name. So it forms the
818       plural according to the rules for names (which is that you don't
819       inflect, you just add -s or -es). You can choose to turn that behaviour
820       off (it's on by the default, even when the module isn't in classical
821       mode) by calling "classical(names=>0)".
822

USER-DEFINED INFLECTIONS

824   Adding plurals at run-time
825       Lingua::EN::Inflect provides five exportable subroutines which allow
826       the programmer to override the module's behaviour for specific cases:
827
828       "def_noun($$)"
829               The "def_noun" subroutine takes a pair of string arguments: the
830               singular and plural forms of the noun being specified. The
831               singular form specifies a pattern to be interpolated (as
832               "m/^(?:$first_arg)$/i").  Any noun matching this pattern is
833               then replaced by the string in the second argument. The second
834               argument specifies a string which is interpolated after the
835               match succeeds, and is then used as the plural form. For
836               example:
837
838                     def_noun  'cow'        => 'kine';
839                     def_noun  '(.+i)o'     => '$1i';
840                     def_noun  'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!';
841
842               Note that both arguments should usually be specified in single
843               quotes, so that they are not interpolated when they are
844               specified, but later (when words are compared to them). As
845               indicated by the last example, care also needs to be taken with
846               certain characters in the second argument, to ensure that they
847               are not unintentionally interpolated during comparison.
848
849               The second argument string may also specify a second variant of
850               the plural form, to be used when "classical" plurals have been
851               requested. The beginning of the second variant is marked by a
852               '|' character:
853
854                     def_noun  'cow'        => 'cows|kine';
855                     def_noun  '(.+i)o'     => '$1os|$1i';
856                     def_noun  'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!|varmints';
857
858               If no classical variant is given, the specified plural form is
859               used in both normal and "classical" modes.
860
861               If the second argument is "undef" instead of a string, then the
862               current user definition for the first argument is removed, and
863               the standard plural inflection(s) restored.
864
865               Note that in all cases, later plural definitions for a
866               particular singular form replace earlier definitions of the
867               same form. For example:
868
869                     # FIRST, HIDE THE MODERN FORM....
870                     def_noun  'aviatrix' => 'aviatrices';
871
872                     # LATER, HIDE THE CLASSICAL FORM...
873                     def_noun  'aviatrix' => 'aviatrixes';
874
875                     # FINALLY, RESTORE THE DEFAULT BEHAVIOUR...
876                     def_noun  'aviatrix' => undef;
877
878               Special care is also required when defining general patterns
879               and associated specific exceptions: put the more specific cases
880               after the general pattern. For example:
881
882                     def_noun  '(.+)us' => '$1i';      # EVERY "-us" TO "-i"
883                     def_noun  'bus'    => 'buses';    # EXCEPT FOR "bus"
884
885               This "try-most-recently-defined-first" approach to matching
886               user-defined words is also used by "def_verb", "def_a" and
887               "def_an".
888
889       "def_verb($$$$$$)"
890               The "def_verb" subroutine takes three pairs of string arguments
891               (that is, six arguments in total), specifying the singular and
892               plural forms of the three "persons" of verb. As with
893               "def_noun", the singular forms are specifications of run-time-
894               interpolated patterns, whilst the plural forms are
895               specifications of (up to two) run-time-interpolated strings:
896
897                      def_verb 'am'       => 'are',
898                               'are'      => 'are|art",
899                               'is'       => 'are';
900
901                      def_verb 'have'     => 'have',
902                               'have'     => 'have",
903                               'ha(s|th)' => 'have';
904
905               Note that as with "def_noun", modern/classical variants of
906               plurals may be separately specified, subsequent definitions
907               replace previous ones, and "undef"'ed plural forms revert to
908               the standard behaviour.
909
910       "def_adj($$)"
911               The "def_adj" subroutine takes a pair of string arguments,
912               which specify the singular and plural forms of the adjective
913               being defined.  As with "def_noun" and "def_adj", the singular
914               forms are specifications of run-time-interpolated patterns,
915               whilst the plural forms are specifications of (up to two) run-
916               time-interpolated strings:
917
918                      def_adj  'this'     => 'these',
919                      def_adj  'red'      => 'red|gules',
920
921               As previously, modern/classical variants of plurals may be
922               separately specified, subsequent definitions replace previous
923               ones, and "undef"'ed plural forms revert to the standard
924               behaviour.
925
926       def_a($) and def_an($)
927               The "def_a" and "def_an" subroutines each take a single
928               argument, which specifies a pattern. If a word passed to "A()"
929               or "AN()" matches this pattern, it will be prefixed
930               (unconditionally) with the corresponding indefinite article.
931               For example:
932
933                     def_a  'error';
934                     def_a  'in.+';
935
936                     def_an 'mistake';
937                     def_an 'error';
938
939               As with the other "def_..." subroutines, such redefinitions are
940               sequential in effect so that, after the above example, "error"
941               will be inflected with "an".
942
943   The $HOME/.inflectrc file
944       When it is imported, Lingua::EN::Inflect executes (as Perl code) the
945       contents of any file named .inflectrc which it finds in the in the
946       directory where Lingua/EN/Inflect.pm is installed, or in the current
947       home directory ($ENV{HOME}), or in both.  Note that the code is
948       executed within the Lingua::EN::Inflect namespace.
949
950       Hence the user or the local Perl guru can make appropriate calls to
951       "def_noun", "def_verb", etc. in one of these .inflectrc files, to
952       permanently and universally modify the behaviour of the module. For
953       example
954
955             > cat /usr/local/lib/perl5/Text/Inflect/.inflectrc
956
957             def_noun  "UNIX"  => "UN*X|UNICES";
958
959             def_verb  "teco"  => "teco",      # LITERALLY: "to edit with TECO"
960                       "teco"  => "teco",
961                       "tecos" => "teco";
962
963             def_a     "Euler.*";              # "Yewler" TURNS IN HIS GRAVE
964
965       Note that calls to the "def_..." subroutines from within a program will
966       take precedence over the contents of the home directory .inflectrc
967       file, which in turn takes precedence over the system-wide .inflectrc
968       file.
969

DIAGNOSTICS

971       On loading, if the Perl code in a .inflectrc file is invalid
972       (syntactically or otherwise), an appropriate fatal error is issued.  A
973       common problem is not ending the file with something that evaluates to
974       true (as the five "def_..." subroutines do).
975
976       Using the five "def_..." subroutines directly in a program may also
977       result in fatal diagnostics, if a (singular) pattern or an interpolated
978       (plural) string is somehow invalid.
979
980       Specific diagnostics related to user-defined inflections are:
981
982       "Bad user-defined singular pattern:\n\t %s"
983               The singular form of a user-defined noun or verb (as defined by
984               a call to "def_noun", "def_verb", "def_adj", "def_a" or
985               "def_an") is not a valid Perl regular expression. The actual
986               Perl error message is also given.
987
988       "Bad user-defined plural string: '%s'"
989               The plural form(s) of a user-defined noun or verb (as defined
990               by a call to "def_noun", "def_verb" or "def_adj") is not a
991               valid Perl interpolated string (usually because it interpolates
992               some undefined variable).
993
994       "Bad .inflectrc file (%s):\n %s"
995               Some other problem occurred in loading the named local or
996               global .inflectrc file. The Perl error message (including the
997               line number) is also given.
998
999       There are no diagnosable run-time error conditions for the actual
1000       inflection subroutines, except "NUMWORDS" and hence no run-time
1001       diagnostics. If the inflection subroutines are unable to form a plural
1002       via a user-definition or an inbuilt rule, they just "guess" the
1003       commonest English inflection: adding "-s" for nouns, removing "-s" for
1004       verbs, and no inflection for adjectives.
1005
1006       "Lingua::EN::Inflect::NUMWORDS()" can "die" with the following
1007       messages:
1008
1009       "Bad grouping option: %s"
1010               The optional argument to "NUMWORDS()" wasn't 1, 2 or 3.
1011
1012       "Number out of range"
1013               "NUMWORDS()" was passed a number larger than
1014               999,999,999,999,999,999,999,999,999,999,999,999 (that is: nine
1015               hundred and ninety-nine decillion, nine hundred and ninety-nine
1016               nonillion, nine hundred and ninety-nine octillion, nine hundred
1017               and ninety-nine septillion, nine hundred and ninety-nine
1018               sextillion, nine hundred and ninety-nine quintillion, nine
1019               hundred and ninety-nine quadrillion, nine hundred and ninety-
1020               nine trillion, nine hundred and ninety-nine billion, nine
1021               hundred and ninety-nine million, nine hundred and ninety-nine
1022               thousand, nine hundred and ninety-nine :-)
1023
1024               The problem is that "NUMWORDS" doesn't know any words for
1025               number components bigger than "decillion".
1026

OTHER ISSUES

1028   2nd Person precedence
1029       If a verb has identical 1st and 2nd person singular forms, but
1030       different 1st and 2nd person plural forms, then when its plural is
1031       constructed, the 2nd person plural form is always preferred.
1032
1033       The author is not currently aware of any such verbs in English, but is
1034       not quite arrogant enough to assume ipso facto that none exist.
1035
1036   Nominative precedence
1037       The singular pronoun "it" presents a special problem because its plural
1038       form can vary, depending on its "case". For example:
1039
1040               It ate my homework       ->  They ate my homework
1041               It ate it                ->  They ate them
1042               I fed my homework to it  ->  I fed my homework to them
1043
1044       As a consequence of this ambiguity, "PL()" or "PL_N" have been
1045       implemented so that they always return the nominative plural (that is,
1046       "they").
1047
1048       However, when asked for the plural of an unambiguously accusative "it"
1049       (namely, "PL("to it")", "PL_N("from it")", "PL("with it")", etc.), both
1050       subroutines will correctly return the accusative plural ("to them",
1051       "from them", "with them", etc.)
1052
1053   The plurality of zero
1054       The rules governing the choice between:
1055
1056             There were no errors.
1057
1058       and
1059
1060             There was no error.
1061
1062       are complex and often depend more on intent rather than content.  Hence
1063       it is infeasible to specify such rules algorithmically.
1064
1065       Therefore, Lingua::EN::Text contents itself with the following
1066       compromise: If the governing number is zero, inflections always return
1067       the plural form unless the appropriate "classical" inflection is in
1068       effect, in which case the singular form is always returned.
1069
1070       Thus, the sequence:
1071
1072             NUM(0);
1073             print inflect "There PL(was) NO(choice)";
1074
1075       produces "There were no choices", whereas:
1076
1077             classical 'zero';     # or: classical(zero=>1);
1078             NUM(0);
1079             print inflect "There PL(was) NO(choice)";
1080
1081       it will print "There was no choice".
1082
1083   Homographs with heterogeneous plurals
1084       Another context in which intent (and not content) sometimes determines
1085       plurality is where two distinct meanings of a word require different
1086       plurals. For example:
1087
1088             Three basses were stolen from the band's equipment trailer.
1089             Three bass were stolen from the band's aquarium.
1090
1091             I put the mice next to the cheese.
1092             I put the mouses next to the computers.
1093
1094             Several thoughts about leaving crossed my mind.
1095             Several thought about leaving across my lawn.
1096
1097       Lingua::EN::Inflect handles such words in two ways:
1098
1099       ·       If both meanings of the word are the same part of speech (for
1100               example, "bass" is a noun in both sentences above), then one
1101               meaning is chosen as the "usual" meaning, and only that
1102               meaning's plural is ever returned by any of the inflection
1103               subroutines.
1104
1105       ·       If each meaning of the word is a different part of speech (for
1106               example, "thought" is both a noun and a verb), then the noun's
1107               plural is returned by "PL()" and "PL_N()" and the verb's plural
1108               is returned only by "PL_V()".
1109
1110       Such contexts are, fortunately, uncommon (particularly "same-part-of-
1111       speech" examples). An informal study of nearly 600 "difficult plurals"
1112       indicates that "PL()" can be relied upon to "get it right" about 98% of
1113       the time (although, of course, ichthyophilic guitarists or cyber-
1114       behaviouralists may experience higher rates of confusion).
1115
1116       If the choice of a particular "usual inflection" is considered
1117       inappropriate, it can always be reversed with a preliminary call to the
1118       corresponding "def_..." subroutine.
1119

NOTE

1121       I'm not taking any further correspondence on:
1122
1123       "octopi".
1124           Despite the populist pandering of certain New World dictionaries,
1125           the plural is "octopuses" or (for the pendantic classicist)
1126           "octopodes". The suffix "-pus" is Greek, not Latin, so the plural
1127           is "-podes", not "pi".
1128
1129       "virus".
1130           Had no plural in Latin (possibly because it was a mass noun).  The
1131           only plural is the Anglicized "viruses".
1132

AUTHORS

1134       Damian Conway (damian@conway.org) Matthew Persico (ORD inflection)
1135

BUGS AND IRRITATIONS

1137       The endless inconsistencies of English.
1138
1139       (Please report words for which the correct plural or indefinite article
1140       is not formed, so that the reliability of Lingua::EN::Inflect can be
1141       improved.)
1142
1144        Copyright (c) 1997-2009, Damian Conway. All Rights Reserved.
1145        This module is free software. It may be used, redistributed
1146            and/or modified under the same terms as Perl itself.
1147

POD ERRORS

1149       Hey! The above document had some coding errors, which are explained
1150       below:
1151
1152       Around line 2744:
1153           You forgot a '=back' before '=head1'
1154
1155
1156
1157perl v5.12.1                      2010-06-28            Lingua::EN::Inflect(3)
Impressum