1Lingua::EN::Inflect(3)User Contributed Perl DocumentationLingua::EN::Inflect(3)
2
3
4

NAME

6       Lingua::EN::Inflect - Convert singular to plural. Select "a" or "an".
7

VERSION

9       This document describes version 1.86 of Lingua::EN::Inflect, released
10       October 20, 2000.
11

SYNOPSIS

13        use Lingua::EN::Inflect qw ( PL PL_N PL_V PL_ADJ NO NUM
14                                     PL_eq PL_N_eq PL_V_eq PL_ADJ_eq
15                                     A AN
16                                     PART_PRES
17                                     ORD NUMWORDS
18                                     inflect classical
19                                     def_noun def_verb def_adj def_a def_an );
20
21        # UNCONDITIONALLY FORM THE PLURAL
22
23             print "The plural of ", $word, " is ", PL($word), "\n";
24
25        # CONDITIONALLY FORM THE PLURAL
26
27             print "I saw $cat_count ", PL("cat",$cat_count), "\n";
28
29        # FORM PLURALS FOR SPECIFIC PARTS OF SPEECH
30
31             print PL_N("I",$N1), PL_V("saw",$N1),
32                   PL_ADJ("my",$N2), PL_N("saw",$N2), "\n";
33
34        # DEAL WITH "0/1/N" -> "no/1/N" TRANSLATION:
35
36             print "There ", PL_V("was",$errors), NO(" error",$errors), "\n";
37
38        # USE DEFAULT COUNTS:
39
40             print NUM($N1,""), PL("I"), PL_V(" saw"), NUM($N2), PL_N(" saw");
41             print "There ", NUM($errors,''), PL_V("was"), NO(" error"), "\n";
42
43        # COMPARE TWO WORDS "NUMBER-INSENSITIVELY":
44
45             print "same\n"      if PL_eq($word1, $word2);
46             print "same noun\n" if PL_eq_N($word1, $word2);
47             print "same verb\n" if PL_eq_V($word1, $word2);
48             print "same adj.\n" if PL_eq_ADJ($word1, $word2);
49
50        # ADD CORRECT "a" OR "an" FOR A GIVEN WORD:
51
52             print "Did you want ", A($thing), " or ", AN($idea), "\n";
53
54        # CONVERT NUMERALS INTO ORDINALS (i.e. 1->1st, 2->2nd, 3->3rd, etc.)
55
56             print "It was", ORD($position), " from the left\n";
57
58        # CONVERT NUMERALS TO WORDS (i.e. 1->"one", 101->"one hundred and one", etc.)
59        # IN A SCALAR CONTEXT: GET BACK A SINGLE STRING...
60
61           $words = NUMWORDS(1234);      # "one thousand, two hundred and thirty-four"
62           $words = NUMWORDS(ORD(1234)); # "one thousand, two hundred and thirty-fourth"
63
64        # IN A LIST CONTEXT: GET BACK A LIST OF STRINGSi, ONE FOR EACH "CHUNK"...
65
66           @words = NUMWORDS(1234);    # ("one thousand","two hundred and thirty-four")
67
68        # OPTIONAL PARAMETERS CHANGE TRANSLATION:
69
70           $words = NUMWORDS(12345, group=>1);
71                                       # "one, two, three, four, five"
72
73           $words = NUMWORDS(12345, group=>2);
74                                       # "twelve, thirty-four, five"
75
76           $words = NUMWORDS(12345, group=>3);
77                                       # "one twenty-three, forty-five"
78
79           $words = NUMWORDS(1234, 'and'=>'');
80                                       # "one thousand, two hundred thirty-four"
81
82           $words = NUMWORDS(1234, 'and'=>', plus');
83                                       # "one thousand, two hundred, plus thirty-four"
84
85           $words = NUMWORDS(555_1202, group=>1, zero=>'oh');
86                                       # "five, five, five, one, two, oh, two"
87
88           $words = NUMWORDS(555_1202, group=>1, one=>'unity');
89                                       # "five, five, five, unity, two, oh, two"
90
91           $words = NUMWORDS(123.456, group=>1, decimal=>'mark');
92                                       # "one two three mark four five six"
93
94        # REQUIRE "CLASSICAL" PLURALS (EG: "focus"->"foci", "cherub"->"cherubim")
95
96             classical;              # USE ALL CLASSICAL PLURALS
97
98             classical 1;               #  USE ALL CLASSICAL PLURALS
99             classical 0;               #  USE ALL MODERN PLURALS (DEFAULT)
100
101             classical 'zero';      #  "no error" INSTEAD OF "no errors"
102             classical zero=>1;     #  "no error" INSTEAD OF "no errors"
103             classical zero=>0;     #  "no errors" INSTEAD OF "no error"
104
105             classical 'herd';      #  "2 buffalo" INSTEAD OF "2 buffalos"
106             classical herd=>1;     #  "2 buffalo" INSTEAD OF "2 buffalos"
107             classical herd=>0;     #  "2 buffalos" INSTEAD OF "2 buffalo"
108
109             classical 'persons';   # "2 chairpersons" INSTEAD OF "2 chairpeople"
110             classical persons=>1;  # "2 chairpersons" INSTEAD OF "2 chairpeople"
111             classical persons=>0;  # "2 chairpeople" INSTEAD OF "2 chairpersons"
112
113             classical 'ancient';   # "2 formulae" INSTEAD OF "2 formulas"
114             classical ancient=>1;  # "2 formulae" INSTEAD OF "2 formulas"
115             classical ancient=>0;  # "2 formulas" INSTEAD OF "2 formulae"
116
117        # INTERPOLATE "PL()", "PL_N()", "PL_V()", "PL_ADJ()", A()", "AN()"
118        # "NUM()" AND "ORD()" WITHIN STRINGS:
119
120             print inflect("The plural of $word is PL($word)\n");
121             print inflect("I saw $cat_count PL("cat",$cat_count)\n");
122             print inflect("PL(I,$N1) PL_V(saw,$N1) PL(a,$N2) PL_N(saw,$N2)");
123             print inflect("NUM($N1,)PL(I) PL_V(saw) NUM($N2,)PL(a) PL_N(saw)");
124             print inflect("I saw NUM($cat_count) PL("cat")\nNUM()");
125             print inflect("There PL_V(was,$errors) NO(error,$errors)\n");
126             print inflect("There NUM($errors,) PL_V(was) NO(error)\n";
127             print inflect("Did you want A($thing) or AN($idea)\n");
128             print inflect("It was ORD($position) from the left\n");
129
130        # ADD USER-DEFINED INFLECTIONS (OVERRIDING INBUILT RULES):
131
132             def_noun  "VAX"  => "VAXen";      # SINGULAR => PLURAL
133
134             def_verb  "will" => "shall",      # 1ST PERSON SINGULAR => PLURAL
135                       "will" => "will",       # 2ND PERSON SINGULAR => PLURAL
136                       "will" => "will",       # 3RD PERSON SINGULAR => PLURAL
137
138             def_adj   "hir"  => "their",      # SINGULAR => PLURAL
139
140             def_a     "h"                     # "AY HALWAYS SEZ 'HAITCH'!"
141
142             def_an    "horrendous.*"          # "AN HORRENDOUS AFFECTATION"
143

DESCRIPTION

145       The exportable subroutines of Lingua::EN::Inflect provide plural
146       inflections, "a"/"an" selection for English words, and manipulation of
147       numbers as words
148
149       Plural forms of all nouns, most verbs, and some adjectives are pro‐
150       vided. Where appropriate, "classical" variants (for example: "brother"
151       -> "brethren", "dogma" -> "dogmata", etc.) are also provided.
152
153       Pronunciation-based "a"/"an" selection is provided for all English
154       words, and most initialisms.
155
156       It is also possible to inflect numerals (1,2,3) to ordinals (1st, 2nd,
157       3rd) and to english words ("one", "two", "three).
158
159       In generating these inflections, Lingua::EN::Inflect follows the Oxford
160       English Dictionary and the guidelines in Fowler's Modern English Usage,
161       preferring the former where the two disagree.
162
163       The module is built around standard British spelling, but is designed
164       to cope with common American variants as well. Slang, jargon, and other
165       English dialects are not explicitly catered for.
166
167       Where two or more inflected forms exist for a single word (typically a
168       "classical" form and a "modern" form), Lingua::EN::Inflect prefers the
169       more common form (typically the "modern" one), unless "classical" pro‐
170       cessing has been specified (see "MODERN VS CLASSICAL INFLECTIONS").
171

FORMING PLURALS

173       Inflecting Plurals
174
175       All of the "PL_..." plural inflection subroutines take the word to be
176       inflected as their first argument and return the corresponding inflec‐
177       tion.  Note that all such subroutines expect the singular form of the
178       word. The results of passing a plural form are undefined (and unlikely
179       to be correct).
180
181       The "PL_..." subroutines also take an optional second argument, which
182       indicates the grammatical "number" of the word (or of another word with
183       which the word being inflected must agree). If the "number" argument is
184       supplied and is not 1 (or "one" or "a", or some other adjective that
185       implies the singular), the plural form of the word is returned. If the
186       "number" argument does indicate singularity, the (uninflected) word
187       itself is returned. If the number argument is omitted, the plural form
188       is returned unconditionally.
189
190       The various subroutines are:
191
192       "PL_N($;$)"
193               The exportable subroutine "PL_N()" takes a singular English
194               noun or pronoun and returns its plural. Pronouns in the nomina‐
195               tive ("I" -> "we") and accusative ("me" -> "us") cases are han‐
196               dled, as are possessive pronouns ("mine" -> "ours").
197
198       "PL_V($;$)"
199               The exportable subroutine "PL_V()" takes the singular form of a
200               conjugated verb (that is, one which is already in the correct
201               "person" and "mood") and returns the corresponding plural con‐
202               jugation.
203
204       "PL_ADJ($;$)"
205               The exportable subroutine "PL_ADJ()" takes the singular form of
206               certain types of adjectives and returns the corresponding plu‐
207               ral form.  Adjectives that are correctly handled include:
208               "numerical" adjectives ("a" -> "some"), demonstrative adjec‐
209               tives ("this" -> "these", "that" -> "those"), and possessives
210               ("my" -> "our", "cat's" -> "cats'", "child's" -> "childrens'",
211               etc.)
212
213       "PL($;$)"
214               The exportable subroutine "PL()" takes a singular English noun,
215               pronoun, verb, or adjective and returns its plural form. Where
216               a word has more than one inflection depending on its part of
217               speech (for example, the noun "thought" inflects to "thoughts",
218               the verb "thought" to "thought"), the (singular) noun sense is
219               preferred to the (singular) verb sense.
220
221               Hence "PL("knife")" will return "knives" ("knife" having been
222               treated as a singular noun), whereas "PL("knifes")" will return
223               "knife" ("knifes" having been treated as a 3rd person singular
224               verb).
225
226               The inherent ambiguity of such cases suggests that, where the
227               part of speech is known, "PL_N", "PL_V", and "PL_ADJ" should be
228               used in preference to "PL".
229
230       Note that all these subroutines ignore any whitespace surrounding the
231       word being inflected, but preserve that whitespace when the result is
232       returned. For example, "PL(" cat ")" returns " cats ".
233
234       Numbered plurals
235
236       The "PL_..." subroutines return only the inflected word, not the count
237       that was used to inflect it. Thus, in order to produce "I saw 3 ducks",
238       it is necessary to use:
239
240               print "I saw $N ", PL_N($animal,$N), "\n";
241
242       Since the usual purpose of producing a plural is to make it agree with
243       a preceding count, Lingua::EN::Inflect provides an exportable subrou‐
244       tine ("NO($;$)") which, given a word and a(n optional) count, returns
245       the count followed by the correctly inflected word. Hence the previous
246       example can be rewritten:
247
248               print "I saw ", NO($animal,$N), "\n";
249
250       In addition, if the count is zero (or some other term which implies
251       zero, such as "zero", "nil", etc.) the count is replaced by the word
252       "no". Hence, if $N had the value zero, the previous example would print
253       the somewhat more elegant:
254
255               I saw no animals
256
257       rather than:
258
259               I saw 0 animals
260
261       Note that the name of the subroutine is a pun: the subroutine returns
262       either a number (a No.) or a "no", in front of the inflected word.
263
264       Reducing the number of counts required
265
266       In some contexts, the need to supply an explicit count to the various
267       "PL_..." subroutines makes for tiresome repetition. For example:
268
269               print PL_ADJ("This",$errors), PL_N(" error",$errors),
270                     PL_V(" was",$errors), " fatal.\n";
271
272       Lingua::EN::Inflect therefore provides an exportable subroutine
273       ("NUM($;$)") which may be used to set a persistent "default number"
274       value. If such a value is set, it is subsequently used whenever an
275       optional second "number" argument is omitted. The default value thus
276       set can subsequently be removed by calling "NUM()" with no arguments.
277       Hence we could rewrite the previous example:
278
279               NUM($errors);
280               print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n";
281               NUM();
282
283       Normally, "NUM()" returns its first argument, so that it may also be
284       "inlined" in contexts like:
285
286               print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n"
287               print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
288                       if $severity > 1;
289
290       However, in certain contexts (see "INTERPOLATING INFLECTIONS IN
291       STRINGS") it is preferable that "NUM()" return an empty string. Hence
292       "NUM()" provides an optional second argument. If that argument is sup‐
293       plied (that is, if it is defined) and evaluates to false, "NUM" returns
294       an empty string instead of its first argument. For example:
295
296               print NUM($errors,0), NO("error"), PL_V(" was"), " detected.\n";
297               print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
298                       if $severity > 1;
299
300       Number-insensitive equality
301
302       Lingua::EN::Inflect also provides a solution to the problem of compar‐
303       ing words of differing plurality through the exportable subroutines
304       "PL_eq($$)", "PL_N_eq($$)", "PL_V_eq($$)", and "PL_ADJ_eq($$)".  Each
305       of these subroutines takes two strings, and  compares them using the
306       corresponding plural-inflection subroutine ("PL()", "PL_N()", "PL_V()",
307       and "PL_ADJ()" respectively).
308
309       The comparison returns true if:
310
311       ·       the strings are "eq"-equal, or
312
313       ·       one string is "eq"-equal to a plural form of the other, or
314
315       ·       the strings are two different plural forms of the one word.
316
317       Hence all of the following return true:
318
319               PL_eq("index","index")          # RETURNS "eq"
320               PL_eq("index","indexes")        # RETURNS "s:p"
321               PL_eq("index","indices")        # RETURNS "s:p"
322               PL_eq("indexes","index")        # RETURNS "p:s"
323               PL_eq("indices","index")        # RETURNS "p:s"
324               PL_eq("indices","indexes")      # RETURNS "p:p"
325               PL_eq("indexes","indices")      # RETURNS "p:p"
326               PL_eq("indices","indices")      # RETURNS "eq"
327
328       As indicated by the comments in the previous example, the actual value
329       returned by the various "PL_eq_..." subroutines encodes which of the
330       three equality rules succeeded: "eq" is returned if the strings were
331       identical, "s:p" if the strings were singular and plural respectively,
332       "p:s" for plural and singular, and "p:p" for two distinct plurals.
333       Inequality is indicated by returning an empty string.
334
335       It should be noted that two distinct singular words which happen to
336       take the same plural form are not considered equal, nor are cases where
337       one (singular) word's plural is the other (plural) word's singular.
338       Hence all of the following return false:
339
340               PL_eq("base","basis")       # ALTHOUGH BOTH -> "bases"
341               PL_eq("syrinx","syringe")   # ALTHOUGH BOTH -> "syringes"
342               PL_eq("she","he")           # ALTHOUGH BOTH -> "they"
343
344               PL_eq("opus","operas")      # ALTHOUGH "opus" -> "opera" -> "operas"
345               PL_eq("taxi","taxes")       # ALTHOUGH "taxi" -> "taxis" -> "taxes"
346
347       Note too that, although the comparison is "number-insensitive" it is
348       not case-insensitive (that is, "PL("time","Times")" returns false. To
349       obtain both number and case insensitivity, prefix both arguments with
350       "lc" (that is, "PL(lc "time", lc "Times")" returns true).
351

OTHER VERB FORMS

353       Present participles
354
355       "Lingua::EN::Inflect" also provides the "PART_PRES" subroutine, which
356       can take a 3rd person singular verb and correctly inflect it to its
357       present participle:
358
359               PART_PRES("runs")       # "running"
360               PART_PRES("loves")      # "loving"
361               PART_PRES("eats")       # "eating"
362               PART_PRES("bats")       # "batting"
363               PART_PRES("spies")      # "spying"
364

PROVIDING INDEFINITE ARTICLES

366       Selecting indefinite articles
367
368       Lingua::EN::Inflect provides two exportable subroutines ("A($;$)" and
369       "AN($;$)") which will correctly prepend the appropriate indefinite
370       article to a word, depending on its pronunciation. For example:
371
372               A("cat")                # -> "a cat"
373               AN("cat")               # -> "a cat"
374               A("euphemism")          # -> "a euphemism"
375               A("Euler number")       # -> "an Euler number"
376               A("hour")               # -> "an hour"
377               A("houri")              # -> "a houri"
378
379       The two subroutines are identical in function and may be used inter‐
380       changeably. The only reason that two versions are provided is to
381       enhance the readability of code such as:
382
383               print "That is ", AN($errortype), " error\n;
384               print "That is ", A($fataltype), " fatal error\n;
385
386       Note that in both cases the actual article provided depends only on the
387       pronunciation of the first argument, not on the name of the subroutine.
388
389       "A()" and "AN()" will ignore any indefinite article that already exists
390       at the start of the string. Thus:
391
392               @half_arked = (
393                       "a elephant",
394                       "a giraffe",
395                       "an ewe",
396                       "a orangutan",
397               );
398
399               print A($_), "\n" for @half_arked;
400
401               # prints:
402               #     an elephant
403               #     a giraffe
404               #     a ewe
405               #     an orangutan
406
407       "A()" and "AN()" both take an optional second argument. As with the
408       "PL_..." subroutines, this second argument is a "number" specifier. If
409       its value is 1 (or some other value implying singularity), "A()" and
410       "AN()" insert "a" or "an" as appropriate. If the number specifier
411       implies plurality, ("A()" and "AN()" insert the actual second argument
412       instead.  For example:
413
414               A("cat",1)              # -> "a cat"
415               A("cat",2)              # -> "2 cat"
416               A("cat","one")          # -> "one cat"
417               A("cat","no")           # -> "no cat"
418
419       Note that, as implied by the previous examples, "A()" and "AN()" both
420       assume that their job is merely to provide the correct qualifier for a
421       word (that is: "a", "an", or the specified count).  In other words,
422       they assume that the word they are given has already been correctly
423       inflected for plurality. Hence, if $N has the value 2, then:
424
425             print A("cat",$N);
426
427       prints "2 cat", instead of "2 cats". The correct approach is to use:
428
429             print A(PL("cat",$N),$N);
430
431       or, better still:
432
433             print NO("cat",$N);
434
435       Note too that, like the various "PL_..." subroutines, whenever "A()"
436       and "AN()" are called with only one argument they are subject to the
437       effects of any preceding call to "NUM()". Hence, another possible solu‐
438       tion is:
439
440             NUM($N);
441             print A(PL("cat"));
442
443       Indefinite articles and initialisms
444
445       "Initialisms" (sometimes inaccurately called "acronyms") are terms
446       which have been formed from the initial letters of words in a phrase
447       (for example, "NATO", "NBL", "S.O.S.", "SCUBA", etc.)
448
449       Such terms present a particular challenge when selecting between "a"
450       and "an", since they are sometimes pronounced as if they were a single
451       word ("nay-tow", "sku-ba") and sometimes as a series of letter names
452       ("en-eff-ell", "ess-oh-ess").
453
454       "A()" and "AN()" cope with this dichotomy using a series of inbuilt
455       rules, which may be summarized as:
456
457       1.      If the word starts with a single letter, followed by a period
458               or dash (for example, "R.I.P.", "C.O.D.", "e-mail", "X-ray",
459               "T-square"), then choose the appropriate article for the sound
460               of the first letter ("an R.I.P.", "a C.O.D.", "an e-mail", "an
461               X-ray", "a T-square").
462
463       2.      If the first two letters of the word are capitals, consonants,
464               and do not appear at the start of any known English word, (for
465               example, "LCD", "XML", "YWCA"), then once again choose "a" or
466               "an" depending on the sound of the first letter ("an LCD", "an
467               XML", "a YWCA").
468
469       3.      Otherwise, assume the string is a capitalized word or a pro‐
470               nounceable initialism (for example, "LED", "OPEC", "FAQ",
471               "UNESCO"), and therefore takes "a" or "an" according to the
472               (apparent) pronunciation of the entire word ("a LED", "an
473               OPEC", "a FAQ", "a UNESCO").
474
475       Note that rules 1 and 3 together imply that the presence or absence of
476       punctuation may change the selection of indefinite article for a par‐
477       ticular initialism (for example, "a FAQ" but "an F.A.Q.").
478
479       Indefinite articles and "soft H's"
480
481       Words beginning in the letter 'H' present another type of difficulty
482       when selecting a suitable indefinite article. In a few such words (for
483       example, "hour", "honour", "heir") the 'H' is not voiced at all, and so
484       such words inflect with "an". The remaining cases ("voiced H's") may be
485       divided into two categories: "hard H's" (such as "hangman", "holo‐
486       graph", "hat", etc.) and "soft H's" (such as "hysterical", "horren‐
487       dous", "holy", etc.)
488
489       Hard H's always take "a" as their indefinite article, and soft H's nor‐
490       mally do so as well. But some English speakers prefer "an" for soft H's
491       (although the practice is now generally considered an affectation,
492       rather than a legitimate grammatical alternative).
493
494       At present, the "A()" and "AN()" subroutines ignore soft H's and use
495       "a" for any voiced 'H'. The author would, however, welcome feedback on
496       this decision (envisaging a possible future "soft H" mode).
497

INFLECTING ORDINALS

499       Occasionally it is useful to present an integer value as an ordinal
500       rather than as a numeral. For example:
501
502               Enter password (1st attempt): ********
503               Enter password (2nd attempt): *********
504               Enter password (3rd attempt): *********
505               No 4th attempt. Access denied.
506
507       To this end, Lingua::EN::Inflect provides the "ORD()" subroutine.
508       <ORD()> takes a single argument and forms its ordinal equivalent.  If
509       the argument isn't a numerical integer, it just adds "-th".
510

CONVERTING NUMBERS TO WORDS

512       The exportable subroutine "NUMWORDS" takes a number (cardinal or ordi‐
513       nal) and returns an English representation of that number. In a scalar
514       context a string is returned. Hence:
515
516               use Lingua::EN::Inflect qw( NUMWORDS );
517
518               $words = NUMWORDS(1234567);
519
520       puts the string:
521
522               "one million, two hundred and thirty-four thousand, five hundred and sixty-seven"
523
524       into $words.
525
526       In a list context each comma-separated chunk is returned as a separate
527       element.  Hence:
528
529               @words = NUMWORDS(1234567);
530
531       puts the list:
532
533               ("one million",
534                "two hundred and thirty-four thousand",
535                "five hundred and sixty-seven")
536
537       into @words.
538
539       Non-digits (apart from an optional leading plus or minus sign, any dec‐
540       imal points, and ordinal suffixes -- see below) are silently ignored,
541       so the following all produce identical results:
542
543               NUMWORDS(5551202);
544               NUMWORDS(5_551_202);
545               NUMWORDS("5,551,202");
546               NUMWORDS("555-1202");
547
548       That last case is a little awkward since it's almost certainly a phone
549       number, and "five million, five hundred and fifty-one thousand, two
550       hundred and two" probably isn't what's wanted.
551
552       To overcome this, "NUMWORDS()" takes an optional named argument,
553       'group', which changes how numbers are translated. The argument must be
554       a positive integer less than four, which indicated how the digits of
555       the number are to be grouped. If the argument is 1, then each digit is
556       translated separately. If the argument is 2, pairs of digits (starting
557       from the left) are grouped together. If the argument is 3, triples of
558       numbers (again, from the left) are grouped. Hence:
559
560               NUMWORDS("555-1202", group=>1)
561
562       returns "five, five, five, one, two, zero, two", whilst:
563
564               NUMWORDS("555-1202", group=>2)
565
566       returns "fifty-five, fifty-one, twenty, two", and:
567
568               NUMWORDS("555-1202", group=>3)
569
570       returns "five fifty-five, one twenty, two".
571
572       Phone numbers are often written in words as
573       "five..five..five..one..two..zero..two", which is also easy to achieve:
574
575               join '..', NUMWORDS("555-1202", group=>1)
576
577       "NUMWORDS" also handles decimal fractions. Hence:
578
579               NUMWORDS("1.2345")
580
581       returns "one point two three four five" in a scalar context and
582       "("one","point","two","three","four","five")") in an array context.
583       Exponent form ("1.234e56") is not yet handled.
584
585       Multiple decimal points are only translated in one of the "grouping"
586       modes.  Hence:
587
588               NUMWORDS(101.202.303)
589
590       returns "one hundred and one point two zero two three zero three",
591       whereas:
592
593               NUMWORDS(101.202.303, group=>1)
594
595       returns "one zero one point two zero two point three zero three".
596
597       The digit '0' is unusual in that in may be translated to English as
598       "zero", "oh", or "nought". To cater for this diversity, "NUMWORDS" may
599       be passed a named argument, 'zero', which may be set to the desired
600       translation of '0'. For example:
601
602               print join "..", NUMWORDS("555-1202", group=>3, zero=>'oh')
603
604       prints "five..five..five..one..two..oh..two".  By default, zero is ren‐
605       dered as "zero".
606
607       Likewise, the digit '1' may be rendered as "one" or "a/an" (or very
608       occasionally other variants), depending on the context. So there is a
609       'one' argument as well:
610
611               print NUMWORDS($_, one=>'a solitary', zero=>'no more'),
612                     PL(" bottle of beer on the wall\n", $_)
613                          for (3,2,1,0);
614
615               # prints:
616               #     three bottles of beer on the wall
617               #     two bottles of beer on the wall
618               #     a solitary bottle of beer on the wall
619               #     no more bottles of beer on the wall
620
621       Care is needed if the word "a/an" is to be used as a 'one' value.
622       Unless the next word is known in advance, it's almost always necessary
623       to use the "A" function as well:
624
625               print A( NUMWORDS(1, one=>'a') . " $_\n")
626                    for qw(cat aardvark ewe hour);
627
628               # prints:
629               #     a cat
630               #     an aardvark
631               #     a ewe
632               #     an hour
633
634       Another major regional variation in number translation is the use of
635       "and" in certain contexts. The named argument 'and' allows the program‐
636       mer to specify how "and" should be handled. Hence:
637
638               print scalar NUMWORDS("765", 'and'=>'')
639
640       prints "seven hundred sixty-five", instead of "seven hundred and
641       sixty-five".  By default, the "and" is included.
642
643       The translation of the decimal point is also subject to variation (with
644       "point", "dot", and "decimal" being the favorites).  The named argument
645       'decimal' allows the programmer to how the decimal point should be ren‐
646       dered. Hence:
647
648               print scalar NUMWORDS("666.124.64.101", group=>3, decimal=>'dot')
649
650       prints "six sixty-six, dot, one twenty-four, dot, sixty-four, dot, one
651       zero one" By default, the decimal point is rendered as "point".
652
653       "NUMWORDS" also handles the ordinal forms of numbers. So:
654
655               print scalar NUMWORDS('1st');
656               print scalar NUMWORDS('3rd');
657               print scalar NUMWORDS('202nd');
658               print scalar NUMWORDS('1000000th');
659
660       print:
661
662               first
663               third
664               two hundred and twenty-second
665               one millionth
666
667       Two common idioms in this regard are:
668
669               print scalar NUMWORDS(ORD($number));
670
671       and:
672
673               print scalar ORD(NUMWORDS($number));
674
675       These are identical in effect, except when $number contains a decimal:
676
677               $number = 99.09;
678               print scalar NUMWORDS(ORD($number));    # ninety-ninth point zero nine
679               print scalar ORD(NUMWORDS($number));    # ninety-nine point zero ninth
680
681       Use whichever you feel is most appropriate.
682

INTERPOLATING INFLECTIONS IN STRINGS

684       By far the commonest use of the inflection subroutines is to produce
685       message strings for various purposes. For example:
686
687               print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n";
688               print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
689                       if $severity > 1;
690
691       Unfortunately the need to separate each subroutine call detracts sig‐
692       nificantly from the readability of the resulting code. To ameliorate
693       this problem, Lingua::EN::Inflect provides an exportable string-inter‐
694       polating subroutine (inflect($)), which recognizes calls to the various
695       inflection subroutines within a string and interpolates them appropri‐
696       ately.
697
698       Using "inflect" the previous example could be rewritten:
699
700               print inflect "NUM($errors) PL_N(error) PL_V(was) detected.\n";
701               print inflect "PL_ADJ(This) PL_N(error) PL_V(was) fatal.\n"
702                       if $severity > 1;
703
704       Note that "inflect" also correctly handles calls to the "NUM()" subrou‐
705       tine (whether interpolated or antecedent). The "inflect()" subroutine
706       has a related extra feature, in that it automatically cancels any
707       "default number" value before it returns its interpolated string. This
708       means that calls to "NUM()" which are embedded in an "inflect()"-inter‐
709       polated string do not "escape" and interfere with subsequent inflec‐
710       tions.
711

MODERN VS CLASSICAL INFLECTIONS

713       Certain words, mainly of Latin or Ancient Greek origin, can form plu‐
714       rals either using the standard English "-s" suffix, or with their orig‐
715       inal Latin or Greek inflections. For example:
716
717               PL("stigma")            # -> "stigmas" or "stigmata"
718               PL("torus")             # -> "toruses" or "tori"
719               PL("index")             # -> "indexes" or "indices"
720               PL("millennium")        # -> "millenniums" or "millennia"
721               PL("ganglion")          # -> "ganglions" or "ganglia"
722               PL("octopus")           # -> "octopuses" or "octopodes"
723
724       Lingua::EN::Inflect caters to such words by providing an "alternate
725       state" of inflection known as "classical mode".  By default, words are
726       inflected using their contemporary English plurals, but if classical
727       mode is invoked, the more traditional plural forms are returned
728       instead.
729
730       The exportable subroutine "classical()" controls this feature.  If
731       "classical()" is called with no arguments, it unconditionally invokes
732       classical mode. If it is called with a single argument, it turns all
733       classical inflects on or off (depending on whether the argument is true
734       or false). If called with two or more arguments, those arguments spec‐
735       ify which aspects of classical behaviour are to be used.
736
737       Thus:
738
739               classical;                  # SWITCH ON CLASSICAL MODE
740               print PL("formula");        # -> "formulae"
741
742               classical 0;                # SWITCH OFF CLASSICAL MODE
743               print PL("formula");        # -> "formulas"
744
745               classical $cmode;           # CLASSICAL MODE IFF $cmode
746               print PL("formula");        # -> "formulae" (IF $cmode)
747                                           # -> "formulas" (OTHERWISE)
748
749               classical herd=>1;          # SWITCH ON CLASSICAL MODE FOR "HERD" NOUNS
750               print PL("wilderbeest");    # -> "wilderbeest"
751
752               classical names=>1;         # SWITCH ON CLASSICAL MODE FOR NAMES
753               print PL("sally");          # -> "sallies"
754               print PL("Sally");          # -> "Sallys"
755
756       Note however that "classical()" has no effect on the inflection of
757       words which are now fully assimilated. Hence:
758
759               PL("forum")             # ALWAYS -> "forums"
760               PL("criterion")         # ALWAYS -> "criteria"
761
762       LEI assumes that a capitalized word is a person's name. So it forms the
763       plural according to the rules for names (which is that you don't
764       inflect, you just add -s or -es). You can choose to turn that behaviour
765       off (it's on by the default, even when the module isn't in classical
766       mode) by calling " classical(names="0) >;
767

USER-DEFINED INFLECTIONS

769       Adding plurals at run-time
770
771       Lingua::EN::Inflect provides five exportable subroutines which allow
772       the programmer to override the module's behaviour for specific cases:
773
774       "def_noun($$)"
775               The "def_noun" subroutine takes a pair of string arguments: the
776               singular and plural forms of the noun being specified. The sin‐
777               gular form specifies a pattern to be interpolated (as
778               "m/^(?:$first_arg)$/i").  Any noun matching this pattern is
779               then replaced by the string in the second argument. The second
780               argument specifies a string which is interpolated after the
781               match succeeds, and is then used as the plural form. For exam‐
782               ple:
783
784                     def_noun  'cow'        => 'kine';
785                     def_noun  '(.+i)o'     => '$1i';
786                     def_noun  'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!';
787
788               Note that both arguments should usually be specified in single
789               quotes, so that they are not interpolated when they are speci‐
790               fied, but later (when words are compared to them). As indicated
791               by the last example, care also needs to be taken with certain
792               characters in the second argument, to ensure that they are not
793               unintentionally interpolated during comparison.
794
795               The second argument string may also specify a second variant of
796               the plural form, to be used when "classical" plurals have been
797               requested. The beginning of the second variant is marked by a
798               '⎪' character:
799
800                     def_noun  'cow'        => 'cows⎪kine';
801                     def_noun  '(.+i)o'     => '$1os⎪$1i';
802                     def_noun  'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!⎪varmints';
803
804               If no classical variant is given, the specified plural form is
805               used in both normal and "classical" modes.
806
807               If the second argument is "undef" instead of a string, then the
808               current user definition for the first argument is removed, and
809               the standard plural inflection(s) restored.
810
811               Note that in all cases, later plural definitions for a particu‐
812               lar singular form replace earlier definitions of the same form.
813               For example:
814
815                     # FIRST, HIDE THE MODERN FORM....
816                     def_noun  'aviatrix' => 'aviatrices';
817
818                     # LATER, HIDE THE CLASSICAL FORM...
819                     def_noun  'aviatrix' => 'aviatrixes';
820
821                     # FINALLY, RESTORE THE DEFAULT BEHAVIOUR...
822                     def_noun  'aviatrix' => undef;
823
824               Special care is also required when defining general patterns
825               and associated specific exceptions: put the more specific cases
826               after the general pattern. For example:
827
828                     def_noun  '(.+)us' => '$1i';      # EVERY "-us" TO "-i"
829                     def_noun  'bus'    => 'buses';    # EXCEPT FOR "bus"
830
831               This "try-most-recently-defined-first" approach to matching
832               user-defined words is also used by "def_verb", "def_a" and
833               "def_an".
834
835       "def_verb($$$$$$)"
836               The "def_verb" subroutine takes three pairs of string arguments
837               (that is, six arguments in total), specifying the singular and
838               plural forms of the three "persons" of verb. As with
839               "def_noun", the singular forms are specifications of run-time-
840               interpolated patterns, whilst the plural forms are specifica‐
841               tions of (up to two) run-time-interpolated strings:
842
843                      def_verb 'am'       => 'are',
844                               'are'      => 'are⎪art",
845                               'is'       => 'are';
846
847                      def_verb 'have'     => 'have',
848                               'have'     => 'have",
849                               'ha(s⎪th)' => 'have';
850
851               Note that as with "def_noun", modern/classical variants of plu‐
852               rals may be separately specified, subsequent definitions
853               replace previous ones, and "undef"'ed plural forms revert to
854               the standard behaviour.
855
856       "def_adj($$)"
857               The "def_adj" subroutine takes a pair of string arguments,
858               which specify the singular and plural forms of the adjective
859               being defined.  As with "def_noun" and "def_adj", the singular
860               forms are specifications of run-time-interpolated patterns,
861               whilst the plural forms are specifications of (up to two) run-
862               time-interpolated strings:
863
864                      def_adj  'this'     => 'these',
865                      def_adj  'red'      => 'red⎪gules',
866
867               As previously, modern/classical variants of plurals may be sep‐
868               arately specified, subsequent definitions replace previous
869               ones, and "undef"'ed plural forms revert to the standard behav‐
870               iour.
871
872       def_a($) and def_an($)
873               The "def_a" and "def_an" subroutines each take a single argu‐
874               ment, which specifies a pattern. If a word passed to "A()" or
875               "AN()" matches this pattern, it will be prefixed (uncondition‐
876               ally) with the corresponding indefinite article. For example:
877
878                     def_a  'error';
879                     def_a  'in.+';
880
881                     def_an 'mistake';
882                     def_an 'error';
883
884               As with the other "def_..." subroutines, such redefinitions are
885               sequential in effect so that, after the above example, "error"
886               will be inflected with "an".
887
888       The $HOME/.inflectrc file
889
890       When it is imported, Lingua::EN::Inflect executes (as Perl code) the
891       contents of any file named .inflectrc which it finds in the in the
892       directory where Lingua/EN/Inflect.pm is installed, or in the current
893       home directory ($ENV{HOME}), or in both.  Note that the code is exe‐
894       cuted within the Lingua::EN::Inflect namespace.
895
896       Hence the user or the local Perl guru can make appropriate calls to
897       "def_noun", "def_verb", etc. in one of these .inflectrc files, to per‐
898       manently and universally modify the behaviour of the module. For exam‐
899       ple
900
901             > cat /usr/local/lib/perl5/Text/Inflect/.inflectrc
902
903             def_noun  "UNIX"  => "UN*X⎪UNICES";
904
905             def_verb  "teco"  => "teco",      # LITERALLY: "to edit with TECO"
906                       "teco"  => "teco",
907                       "tecos" => "teco";
908
909             def_a     "Euler.*";              # "Yewler" TURNS IN HIS GRAVE
910
911       Note that calls to the "def_..." subroutines from within a program will
912       take precedence over the contents of the home directory .inflectrc
913       file, which in turn takes precedence over the system-wide .inflectrc
914       file.
915

DIAGNOSTICS

917       On loading, if the Perl code in a .inflectrc file is invalid (syntacti‐
918       cally or otherwise), an appropriate fatal error is issued.  A common
919       problem is not ending the file with something that evaluates to true
920       (as the five "def_..." subroutines do).
921
922       Using the five "def_..." subroutines directly in a program may also
923       result in fatal diagnostics, if a (singular) pattern or an interpolated
924       (plural) string is somehow invalid.
925
926       Specific diagnostics related to user-defined inflections are:
927
928       "Bad user-defined singular pattern:\n\t %s"
929               The singular form of a user-defined noun or verb (as defined by
930               a call to "def_noun", "def_verb", "def_adj", "def_a" or
931               "def_an") is not a valid Perl regular expression. The actual
932               Perl error message is also given.
933
934       "Bad user-defined plural string: '%s'"
935               The plural form(s) of a user-defined noun or verb (as defined
936               by a call to "def_noun", "def_verb" or "def_adj") is not a
937               valid Perl interpolated string (usually because it interpolates
938               some undefined variable).
939
940       "Bad .inflectrc file (%s):\n %s"
941               Some other problem occurred in loading the named local or
942               global .inflectrc file. The Perl error message (including the
943               line number) is also given.
944
945       There are no diagnosable run-time error conditions for the actual
946       inflection subroutines, except "NUMWORDS" and hence no run-time diag‐
947       nostics. If the inflection subroutines are unable to form a plural via
948       a user-definition or an inbuilt rule, they just "guess" the commonest
949       English inflection: adding "-s" for nouns, removing "-s" for verbs, and
950       no inflection for adjectives.
951
952       "Lingua::EN::Inflect::NUMWORDS()" can "die" with the following mes‐
953       sages:
954
955       "Bad grouping option: %s"
956               The optional argument to "NUMWORDS()" wasn't 1, 2 or 3.
957
958       "Number out of range"
959               "NUMWORDS()" was passed a number larger than
960               999,999,999,999,999,999,999,999,999,999,999,999 (that is: nine
961               hundred and ninety-nine decillion, nine hundred and ninety-nine
962               nonillion, nine hundred and ninety-nine octillion, nine hundred
963               and ninety-nine septillion, nine hundred and ninety-nine sex‐
964               tillion, nine hundred and ninety-nine quintillion, nine hundred
965               and ninety-nine quadrillion, nine hundred and ninety-nine tril‐
966               lion, nine hundred and ninety-nine billion, nine hundred and
967               ninety-nine million, nine hundred and ninety-nine thousand,
968               nine hundred and ninety-nine :-)
969
970               The problem is that "NUMWORDS" doesn't know any words for num‐
971               ber components bigger than "decillion".
972

OTHER ISSUES

974       2nd Person precedence
975
976       If a verb has identical 1st and 2nd person singular forms, but differ‐
977       ent 1st and 2nd person plural forms, then when its plural is con‐
978       structed, the 2nd person plural form is always preferred.
979
980       The author is not currently aware of any such verbs in English, but is
981       not quite arrogant enough to assume ipso facto that none exist.
982
983       Nominative precedence
984
985       The singular pronoun "it" presents a special problem because its plural
986       form can vary, depending on its "case". For example:
987
988               It ate my homework       ->  They ate my homework
989               It ate it                ->  They ate them
990               I fed my homework to it  ->  I fed my homework to them
991
992       As a consequence of this ambiguity, "PL()" or "PL_N" have been imple‐
993       mented so that they always return the nominative plural (that is,
994       "they").
995
996       However, when asked for the plural of an unambiguously accusative "it"
997       (namely, "PL("to it")", "PL_N("from it")", "PL("with it")", etc.), both
998       subroutines will correctly return the accusative plural ("to them",
999       "from them", "with them", etc.)
1000
1001       The plurality of zero
1002
1003       The rules governing the choice between:
1004
1005             There were no errors.
1006
1007       and
1008
1009             There was no error.
1010
1011       are complex and often depend more on intent rather than content.  Hence
1012       it is infeasible to specify such rules algorithmically.
1013
1014       Therefore, Lingua::EN::Text contents itself with the following compro‐
1015       mise: If the governing number is zero, inflections always return the
1016       plural form unless the appropriate "classical" inflection is in effect,
1017       in which case the singular form is always returned.
1018
1019       Thus, the sequence:
1020
1021             NUM(0);
1022             print inflect "There PL(was) NO(choice)";
1023
1024       produces "There were no choices", whereas:
1025
1026             classical 'zero';     # or: classical(zero=>1);
1027             NUM(0);
1028             print inflect "There PL(was) NO(choice)";
1029
1030       it will print "There was no choice".
1031
1032       Homographs with heterogeneous plurals
1033
1034       Another context in which intent (and not content) sometimes determines
1035       plurality is where two distinct meanings of a word require different
1036       plurals. For example:
1037
1038             Three basses were stolen from the band's equipment trailer.
1039             Three bass were stolen from the band's aquarium.
1040
1041             I put the mice next to the cheese.
1042             I put the mouses next to the computers.
1043
1044             Several thoughts about leaving crossed my mind.
1045             Several thought about leaving across my lawn.
1046
1047       Lingua::EN::Inflect handles such words in two ways:
1048
1049               *       If both meanings of the word are the same part of
1050                       speech (for example, "bass" is a noun in both sentences
1051                       above), then one meaning is chosen as the "usual" mean‐
1052                       ing, and only that meaning's plural is ever returned by
1053                       any of the inflection subroutines.
1054
1055               *       If each meaning of the word is a different part of
1056                       speech (for example, "thought" is both a noun and a
1057                       verb), then the noun's plural is returned by "PL()" and
1058                       "PL_N()" and the verb's plural is returned only by
1059                       "PL_V()".
1060
1061               Such contexts are, fortunately, uncommon (particularly
1062               "same-part-of-speech" examples). An informal study of nearly
1063               600 "difficult plurals" indicates that "PL()" can be relied
1064               upon to "get it right" about 98% of the time (although, of
1065               course, ichthyophilic guitarists or cyber-behaviouralists may
1066               experience higher rates of confusion).
1067
1068               If the choice of a particular "usual inflection" is considered
1069               inappropriate, it can always be reversed with a preliminary
1070               call to the corresponding "def_..." subroutine.
1071

NOTE

1073       I'm not taking any further correspondence on:
1074
1075       "octopi".
1076           Despite the populist pandering of certain New World dictionaries,
1077           the plural is "octopuses" or (for the pendantic classicist)
1078           "octopodes". The suffix "-pus" is Greek, not Latin, so the plural
1079           is "-podes", not "pi".
1080
1081       "virus".
1082           Had no plural in Latin (possibly because it was a mass noun).  The
1083           only plural is the Anglicized "viruses".
1084

AUTHORS

1086       Damian Conway (damian@conway.org) Matthew Persico (ORD inflection)
1087

BUGS AND IRRITATIONS

1089       The endless inconsistencies of English.
1090
1091       (Please report words for which the correct plural or indefinite article
1092       is not formed, so that the reliability of Lingua::EN::Inflect can be
1093       improved.)
1094
1096        Copyright (c) 1997-2000, Damian Conway. All Rights Reserved.
1097        This module is free software. It may be used, redistributed
1098            and/or modified under the same terms as Perl itself.
1099
1100
1101
1102perl v5.8.8                       2005-05-19            Lingua::EN::Inflect(3)
Impressum