1Lingua::EN::Inflect(3)User Contributed Perl DocumentationLingua::EN::Inflect(3)
2
3
4
6 Lingua::EN::Inflect - Convert singular to plural. Select "a" or "an".
7
9 This document describes version 1.905 of Lingua::EN::Inflect
10
12 use Lingua::EN::Inflect qw ( PL PL_N PL_V PL_ADJ NO NUM
13 PL_eq PL_N_eq PL_V_eq PL_ADJ_eq
14 A AN
15 PART_PRES
16 ORD NUMWORDS
17 WORDLIST
18 inflect classical
19 def_noun def_verb def_adj def_a def_an );
20
21
22 # UNCONDITIONALLY FORM THE PLURAL
23
24 print "The plural of ", $word, " is ", PL($word), "\n";
25
26
27 # CONDITIONALLY FORM THE PLURAL
28
29 print "I saw $cat_count ", PL("cat",$cat_count), "\n";
30
31
32 # FORM PLURALS FOR SPECIFIC PARTS OF SPEECH
33
34 print PL_N("I",$N1), PL_V("saw",$N1),
35 PL_ADJ("my",$N2), PL_N("saw",$N2), "\n";
36
37
38 # DEAL WITH "0/1/N" -> "no/1/N" TRANSLATION:
39
40 print "There ", PL_V("was",$errors), NO(" error",$errors), "\n";
41
42
43 # USE DEFAULT COUNTS:
44
45 print NUM($N1,""), PL("I"), PL_V(" saw"), NUM($N2), PL_N(" saw");
46 print "There ", NUM($errors,''), PL_V("was"), NO(" error"), "\n";
47
48
49 # COMPARE TWO WORDS "NUMBER-INSENSITIVELY":
50
51 print "same\n" if PL_eq($word1, $word2);
52 print "same noun\n" if PL_N_eq($word1, $word2);
53 print "same verb\n" if PL_V_eq($word1, $word2);
54 print "same adj.\n" if PL_ADJ_eq($word1, $word2);
55
56
57 # ADD CORRECT "a" OR "an" FOR A GIVEN WORD:
58
59 print "Did you want ", A($thing), " or ", AN($idea), "\n";
60
61
62 # CONVERT NUMERALS INTO ORDINALS (i.e. 1->1st, 2->2nd, 3->3rd, etc.)
63
64 print "It was", ORD($position), " from the left\n";
65
66 # CONVERT NUMERALS TO WORDS (i.e. 1->"one", 101->"one hundred and one", etc.)
67 # IN A SCALAR CONTEXT: GET BACK A SINGLE STRING...
68
69 $words = NUMWORDS(1234); # "one thousand, two hundred and thirty-four"
70 $words = NUMWORDS(ORD(1234)); # "one thousand, two hundred and thirty-fourth"
71
72
73 # IN A LIST CONTEXT: GET BACK A LIST OF STRINGSi, ONE FOR EACH "CHUNK"...
74
75 @words = NUMWORDS(1234); # ("one thousand","two hundred and thirty-four")
76
77
78 # OPTIONAL PARAMETERS CHANGE TRANSLATION:
79
80 $words = NUMWORDS(12345, group=>1);
81 # "one, two, three, four, five"
82
83 $words = NUMWORDS(12345, group=>2);
84 # "twelve, thirty-four, five"
85
86 $words = NUMWORDS(12345, group=>3);
87 # "one twenty-three, forty-five"
88
89 $words = NUMWORDS(1234, 'and'=>'');
90 # "one thousand, two hundred thirty-four"
91
92 $words = NUMWORDS(1234, 'and'=>', plus');
93 # "one thousand, two hundred, plus thirty-four"
94
95 $words = NUMWORDS(555_1202, group=>1, zero=>'oh');
96 # "five, five, five, one, two, oh, two"
97
98 $words = NUMWORDS(555_1202, group=>1, one=>'unity');
99 # "five, five, five, unity, two, zero, two"
100
101 $words = NUMWORDS(123.456, group=>1, decimal=>'mark');
102 # "one two three mark four five six"
103
104 # LITERAL STYLE ONLY NAMES NUMBERS LESS THAN A CERTAIN THRESHOLD...
105
106 $words = NUMWORDS( 9, threshold=>10); # "nine"
107 $words = NUMWORDS( 10, threshold=>10); # "ten"
108 $words = NUMWORDS( 11, threshold=>10); # "11"
109 $words = NUMWORDS(1000, threshold=>10); # "1,000"
110
111 # JOIN WORDS INTO A LIST:
112
113 $list = WORDLIST("apple", "banana", "carrot");
114 # "apple, banana, and carrot"
115
116 $list = WORDLIST("apple", "banana");
117 # "apple and banana"
118
119 $list = WORDLIST("apple", "banana", "carrot", {final_sep=>""});
120 # "apple, banana and carrot"
121
122
123 # REQUIRE "CLASSICAL" PLURALS (EG: "focus"->"foci", "cherub"->"cherubim")
124
125 classical; # USE ALL CLASSICAL PLURALS
126
127 classical 1; # USE ALL CLASSICAL PLURALS
128 classical 0; # USE ALL MODERN PLURALS (DEFAULT)
129
130 classical 'zero'; # "no error" INSTEAD OF "no errors"
131 classical zero=>1; # "no error" INSTEAD OF "no errors"
132 classical zero=>0; # "no errors" INSTEAD OF "no error"
133
134 classical 'herd'; # "2 buffalo" INSTEAD OF "2 buffalos"
135 classical herd=>1; # "2 buffalo" INSTEAD OF "2 buffalos"
136 classical herd=>0; # "2 buffalos" INSTEAD OF "2 buffalo"
137
138 classical 'persons'; # "2 chairpersons" INSTEAD OF "2 chairpeople"
139 classical persons=>1; # "2 chairpersons" INSTEAD OF "2 chairpeople"
140 classical persons=>0; # "2 chairpeople" INSTEAD OF "2 chairpersons"
141
142 classical 'ancient'; # "2 formulae" INSTEAD OF "2 formulas"
143 classical ancient=>1; # "2 formulae" INSTEAD OF "2 formulas"
144 classical ancient=>0; # "2 formulas" INSTEAD OF "2 formulae"
145
146
147
148 # INTERPOLATE "PL()", "PL_N()", "PL_V()", "PL_ADJ()", A()", "AN()"
149 # "NUM()" AND "ORD()" WITHIN STRINGS:
150
151 print inflect("The plural of $word is PL($word)\n");
152 print inflect("I saw $cat_count PL(cat,$cat_count)\n");
153 print inflect("PL(I,$N1) PL_V(saw,$N1) PL(a,$N2) PL_N(saw,$N2)\n");
154 print inflect("NUM($N1,)PL(I) PL_V(saw) NUM($N2,)PL(a) PL_N(saw)\n");
155 print inflect("I saw NUM($cat_count) PL(cat)\n");
156 print inflect("There PL_V(was,$errors) NO(error,$errors)\n");
157 print inflect("There NUM($errors,)PL_V(was) NO(error)\n");
158 print inflect("Did you want A($thing) or AN($idea)\n");
159 print inflect("It was ORD($position) from the left\n");
160
161
162 # ADD USER-DEFINED INFLECTIONS (OVERRIDING INBUILT RULES):
163
164 def_noun "VAX" => "VAXen"; # SINGULAR => PLURAL
165
166 def_verb "will" => "shall", # 1ST PERSON SINGULAR => PLURAL
167 "will" => "will", # 2ND PERSON SINGULAR => PLURAL
168 "will" => "will"; # 3RD PERSON SINGULAR => PLURAL
169
170 def_adj "hir" => "their"; # SINGULAR => PLURAL
171
172 def_a "h"; # "AY HALWAYS SEZ 'HAITCH'!"
173
174 def_an "horrendous.*"; # "AN HORRENDOUS AFFECTATION"
175
177 [Note: This module is strictly in maintenance mode now. Take a look at
178 the newer Lingua::EN::Inflexion module, which offers a cleaner and more
179 convenient interface, has many more features (including
180 plural->singular inflexions), and is also much better tested. If you
181 have existing code that relies on Lingua::EN::Inflect, see the section
182 of the documentation entitled "CONVERTING FROM LINGUA::EN::INFLECT". ]
183
184 The exportable subroutines of Lingua::EN::Inflect provide plural
185 inflections, "a"/"an" selection for English words, and manipulation of
186 numbers as words
187
188 Plural forms of all nouns, most verbs, and some adjectives are
189 provided. Where appropriate, "classical" variants (for example:
190 "brother" -> "brethren", "dogma" -> "dogmata", etc.) are also provided.
191
192 Pronunciation-based "a"/"an" selection is provided for all English
193 words, and most initialisms.
194
195 It is also possible to inflect numerals (1,2,3) to ordinals (1st, 2nd,
196 3rd) and to English words ("one", "two", "three).
197
198 In generating these inflections, Lingua::EN::Inflect follows the Oxford
199 English Dictionary and the guidelines in Fowler's Modern English Usage,
200 preferring the former where the two disagree.
201
202 The module is built around standard British spelling, but is designed
203 to cope with common American variants as well. Slang, jargon, and other
204 English dialects are not explicitly catered for.
205
206 Where two or more inflected forms exist for a single word (typically a
207 "classical" form and a "modern" form), Lingua::EN::Inflect prefers the
208 more common form (typically the "modern" one), unless "classical"
209 processing has been specified (see "MODERN VS CLASSICAL INFLECTIONS").
210
212 Inflecting Plurals
213 All of the "PL_..." plural inflection subroutines take the word to be
214 inflected as their first argument and return the corresponding
215 inflection. Note that all such subroutines expect the singular form of
216 the word. The results of passing a plural form are undefined (and
217 unlikely to be correct).
218
219 The "PL_..." subroutines also take an optional second argument, which
220 indicates the grammatical "number" of the word (or of another word with
221 which the word being inflected must agree). If the "number" argument is
222 supplied and is not 1 (or "one" or "a", or some other adjective that
223 implies the singular), the plural form of the word is returned. If the
224 "number" argument does indicate singularity, the (uninflected) word
225 itself is returned. If the number argument is omitted, the plural form
226 is returned unconditionally.
227
228 The various subroutines are:
229
230 PL_N($;$)
231 The exportable subroutine PL_N() takes a singular English noun
232 or pronoun and returns its plural. Pronouns in the nominative
233 ("I" -> "we") and accusative ("me" -> "us") cases are handled,
234 as are possessive pronouns ("mine" -> "ours").
235
236 PL_V($;$)
237 The exportable subroutine PL_V() takes the singular form of a
238 conjugated verb (that is, one which is already in the correct
239 "person" and "mood") and returns the corresponding plural
240 conjugation.
241
242 PL_ADJ($;$)
243 The exportable subroutine PL_ADJ() takes the singular form of
244 certain types of adjectives and returns the corresponding
245 plural form. Adjectives that are correctly handled include:
246 "numerical" adjectives ("a" -> "some"), demonstrative
247 adjectives ("this" -> "these", "that" -> "those"), and
248 possessives ("my" -> "our", "cat's" -> "cats'", "child's" ->
249 "childrens'", etc.)
250
251 PL($;$) The exportable subroutine PL() takes a singular English noun,
252 pronoun, verb, or adjective and returns its plural form. Where
253 a word has more than one inflection depending on its part of
254 speech (for example, the noun "thought" inflects to "thoughts",
255 the verb "thought" to "thought"), the (singular) noun sense is
256 preferred to the (singular) verb sense.
257
258 Hence PL("knife") will return "knives" ("knife" having been
259 treated as a singular noun), whereas PL("knifes") will return
260 "knife" ("knifes" having been treated as a 3rd person singular
261 verb).
262
263 The inherent ambiguity of such cases suggests that, where the
264 part of speech is known, "PL_N", "PL_V", and "PL_ADJ" should be
265 used in preference to "PL".
266
267 Note that all these subroutines ignore any whitespace surrounding the
268 word being inflected, but preserve that whitespace when the result is
269 returned. For example, PL(" cat ") returns " cats ".
270
271 Numbered plurals
272 The "PL_..." subroutines return only the inflected word, not the count
273 that was used to inflect it. Thus, in order to produce "I saw 3 ducks",
274 it is necessary to use:
275
276 print "I saw $N ", PL_N($animal,$N), "\n";
277
278 Since the usual purpose of producing a plural is to make it agree with
279 a preceding count, Lingua::EN::Inflect provides an exportable
280 subroutine (NO($;$)) which, given a word and a(n optional) count,
281 returns the count followed by the correctly inflected word. Hence the
282 previous example can be rewritten:
283
284 print "I saw ", NO($animal,$N), "\n";
285
286 In addition, if the count is zero (or some other term which implies
287 zero, such as "zero", "nil", etc.) the count is replaced by the word
288 "no". Hence, if $N had the value zero, the previous example would print
289 the somewhat more elegant:
290
291 I saw no animals
292
293 rather than:
294
295 I saw 0 animals
296
297 Note that the name of the subroutine is a pun: the subroutine returns
298 either a number (a No.) or a "no", in front of the inflected word.
299
300 Wordy and comma'd plurals
301
302 The NO() subroutine takes an optional third argument: a hash of named
303 options that configure its behaviour.
304
305 The 'words_below' option informs NO() what other numbers (i.e. apart
306 from zero) it should convert to words. For example:S
307
308 for my $count (0..12) {
309 print NO('cat', $count, {words_below => 10}), "\n";
310 }
311
312 would print:
313
314 no cats
315 one cat
316 two cats
317 three cats
318 four cats
319 five cats
320 six cats
321 seven cats
322 eight cats
323 nine cats
324 10 cats
325 11 cats
326 12 cats
327
328 The 'comma' and 'comma_every' options determine whether or not the
329 numbers produced by NO() have commas in them. That is:
330
331 2001 space odysseys
332
333 versus:
334
335 2,001 space odysseys
336
337 Normally, numbers are produced without commas, but if 'comma' or
338 'comma_every' is specified, then commas are added as requested.
339
340 The 'comma' option specifies which character to use as a comma. It
341 defaults to ',', but may be set to anything convenient:
342
343 print NO('Euro', $amount, {comma=>'.'});
344
345 # prints: 1.000.000 Euros
346
347 The 'comma_every' option specifies how many characters between commas.
348 It defaults to 3, but may be set to any positive number:
349
350 print NO('Euro', $amount, {comma_every=>4});
351
352 # prints: 100,0000 Euros
353
354 Note that you can set both options at once, if you wish:
355
356 print NO('Euro', $amount, {comma_every=>2, comma=>'_'});
357
358 # prints: 1_00_00_00 Euros
359
360 Reducing the number of counts required
361 In some contexts, the need to supply an explicit count to the various
362 "PL_..." subroutines makes for tiresome repetition. For example:
363
364 print PL_ADJ("This",$errors), PL_N(" error",$errors),
365 PL_V(" was",$errors), " fatal.\n";
366
367 Lingua::EN::Inflect therefore provides an exportable subroutine
368 (NUM($;$)) that may be used to set a persistent "default number" value.
369 If such a value is set, it is subsequently used whenever an optional
370 second "number" argument is omitted. The default value thus set can
371 subsequently be removed by calling NUM() with no arguments. Hence we
372 could rewrite the previous example:
373
374 NUM($errors);
375 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n";
376 NUM();
377
378 Normally, NUM() returns its first argument, so that it may also be
379 "inlined" in contexts like:
380
381 print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n"
382 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
383 if $severity > 1;
384
385 However, in certain contexts (see "INTERPOLATING INFLECTIONS IN
386 STRINGS") it is preferable that NUM() return an empty string. Hence
387 NUM() provides an optional second argument. If that argument is
388 supplied (that is, if it is defined) and evaluates to false, "NUM"
389 returns an empty string instead of its first argument. For example:
390
391 print NUM($errors,0), NO("error"), PL_V(" was"), " detected.\n";
392 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
393 if $severity > 1;
394
395 Number-insensitive equality
396 Lingua::EN::Inflect also provides a solution to the problem of
397 comparing words of differing plurality through the exportable
398 subroutines PL_eq($$), PL_N_eq($$), PL_V_eq($$), and PL_ADJ_eq($$).
399 Each of these subroutines takes two strings, and compares them using
400 the corresponding plural-inflection subroutine (PL(), PL_N(), PL_V(),
401 and PL_ADJ() respectively).
402
403 The comparison returns true if:
404
405 • the strings are "eq"-equal, or
406
407 • one string is "eq"-equal to a plural form of the other, or
408
409 • the strings are two different plural forms of the one word.
410
411 Hence all of the following return true:
412
413 PL_eq("index","index") # RETURNS "eq"
414 PL_eq("index","indexes") # RETURNS "s:p"
415 PL_eq("index","indices") # RETURNS "s:p"
416 PL_eq("indexes","index") # RETURNS "p:s"
417 PL_eq("indices","index") # RETURNS "p:s"
418 PL_eq("indices","indexes") # RETURNS "p:p"
419 PL_eq("indexes","indices") # RETURNS "p:p"
420 PL_eq("indices","indices") # RETURNS "eq"
421
422 As indicated by the comments in the previous example, the actual value
423 returned by the various "PL_eq" subroutines encodes which of the three
424 equality rules succeeded: "eq" is returned if the strings were
425 identical, "s:p" if the strings were singular and plural respectively,
426 "p:s" for plural and singular, and "p:p" for two distinct plurals.
427 Inequality is indicated by returning an empty string.
428
429 It should be noted that two distinct singular words which happen to
430 take the same plural form are not considered equal, nor are cases where
431 one (singular) word's plural is the other (plural) word's singular.
432 Hence all of the following return false:
433
434 PL_eq("base","basis") # ALTHOUGH BOTH -> "bases"
435 PL_eq("syrinx","syringe") # ALTHOUGH BOTH -> "syringes"
436 PL_eq("she","he") # ALTHOUGH BOTH -> "they"
437
438 PL_eq("opus","operas") # ALTHOUGH "opus" -> "opera" -> "operas"
439 PL_eq("taxi","taxes") # ALTHOUGH "taxi" -> "taxis" -> "taxes"
440
441 Note too that, although the comparison is "number-insensitive" it is
442 not case-insensitive (that is, "PL("time","Times")" returns false. To
443 obtain both number and case insensitivity, prefix both arguments with
444 "lc" (that is, "PL(lc "time", lc "Times")" returns true).
445
447 Present participles
448 "Lingua::EN::Inflect" also provides the "PART_PRES" subroutine, which
449 can take a 3rd person singular verb and correctly inflect it to its
450 present participle:
451
452 PART_PRES("runs") # "running"
453 PART_PRES("loves") # "loving"
454 PART_PRES("eats") # "eating"
455 PART_PRES("bats") # "batting"
456 PART_PRES("spies") # "spying"
457
459 Selecting indefinite articles
460 Lingua::EN::Inflect provides two exportable subroutines (A($;$) and
461 AN($;$)) which will correctly prepend the appropriate indefinite
462 article to a word, depending on its pronunciation. For example:
463
464 A("cat") # -> "a cat"
465 AN("cat") # -> "a cat"
466 A("euphemism") # -> "a euphemism"
467 A("Euler number") # -> "an Euler number"
468 A("hour") # -> "an hour"
469 A("houri") # -> "a houri"
470
471 The two subroutines are identical in function and may be used
472 interchangeably. The only reason that two versions are provided is to
473 enhance the readability of code such as:
474
475 print "That is ", AN($errortype), " error\n;
476 print "That is ", A($fataltype), " fatal error\n;
477
478 Note that in both cases the actual article provided depends only on the
479 pronunciation of the first argument, not on the name of the subroutine.
480
481 A() and AN() will ignore any indefinite article that already exists at
482 the start of the string. Thus:
483
484 @half_arked = (
485 "a elephant",
486 "a giraffe",
487 "an ewe",
488 "a orangutan",
489 );
490
491 print A($_), "\n" for @half_arked;
492
493 # prints:
494 # an elephant
495 # a giraffe
496 # a ewe
497 # an orangutan
498
499 A() and AN() both take an optional second argument. As with the
500 "PL_..." subroutines, this second argument is a "number" specifier. If
501 its value is 1 (or some other value implying singularity), A() and AN()
502 insert "a" or "an" as appropriate. If the number specifier implies
503 plurality, (A() and AN() insert the actual second argument instead.
504 For example:
505
506 A("cat",1) # -> "a cat"
507 A("cat",2) # -> "2 cat"
508 A("cat","one") # -> "one cat"
509 A("cat","no") # -> "no cat"
510
511 Note that, as implied by the previous examples, A() and AN() both
512 assume that their job is merely to provide the correct qualifier for a
513 word (that is: "a", "an", or the specified count). In other words,
514 they assume that the word they are given has already been correctly
515 inflected for plurality. Hence, if $N has the value 2, then:
516
517 print A("cat",$N);
518
519 prints "2 cat", instead of "2 cats". The correct approach is to use:
520
521 print A(PL("cat",$N),$N);
522
523 or, better still:
524
525 print NO("cat",$N);
526
527 Note too that, like the various "PL_..." subroutines, whenever A() and
528 AN() are called with only one argument they are subject to the effects
529 of any preceding call to NUM(). Hence, another possible solution is:
530
531 NUM($N);
532 print A(PL("cat"));
533
534 Indefinite articles and initialisms
535 "Initialisms" (sometimes inaccurately called "acronyms") are terms
536 which have been formed from the initial letters of words in a phrase
537 (for example, "NATO", "NBL", "S.O.S.", "SCUBA", etc.)
538
539 Such terms present a particular challenge when selecting between "a"
540 and "an", since they are sometimes pronounced as if they were a single
541 word ("nay-tow", "sku-ba") and sometimes as a series of letter names
542 ("en-eff-ell", "ess-oh-ess").
543
544 A() and AN() cope with this dichotomy using a series of inbuilt rules,
545 which may be summarized as:
546
547 1. If the word starts with a single letter, followed by a period
548 or dash (for example, "R.I.P.", "C.O.D.", "e-mail", "X-ray",
549 "T-square"), then choose the appropriate article for the sound
550 of the first letter ("an R.I.P.", "a C.O.D.", "an e-mail", "an
551 X-ray", "a T-square").
552
553 2. If the first two letters of the word are capitals, consonants,
554 and do not appear at the start of any known English word, (for
555 example, "LCD", "XML", "YWCA"), then once again choose "a" or
556 "an" depending on the sound of the first letter ("an LCD", "an
557 XML", "a YWCA").
558
559 3. Otherwise, assume the string is a capitalized word or a
560 pronounceable initialism (for example, "LED", "OPEC", "FAQ",
561 "UNESCO"), and therefore takes "a" or "an" according to the
562 (apparent) pronunciation of the entire word ("a LED", "an
563 OPEC", "a FAQ", "a UNESCO").
564
565 Note that rules 1 and 3 together imply that the presence or absence of
566 punctuation may change the selection of indefinite article for a
567 particular initialism (for example, "a FAQ" but "an F.A.Q.").
568
569 Indefinite articles and "soft H's"
570 Words beginning in the letter 'H' present another type of difficulty
571 when selecting a suitable indefinite article. In a few such words (for
572 example, "hour", "honour", "heir") the 'H' is not voiced at all, and so
573 such words inflect with "an". The remaining cases ("voiced H's") may be
574 divided into two categories: "hard H's" (such as "hangman",
575 "holograph", "hat", etc.) and "soft H's" (such as "hysterical",
576 "horrendous", "holy", etc.)
577
578 Hard H's always take "a" as their indefinite article, and soft H's
579 normally do so as well. But some English speakers prefer "an" for soft
580 H's (although the practice is now generally considered an affectation,
581 rather than a legitimate grammatical alternative).
582
583 At present, the A() and AN() subroutines ignore soft H's and use "a"
584 for any voiced 'H'. The author would, however, welcome feedback on this
585 decision (envisaging a possible future "soft H" mode).
586
588 Occasionally it is useful to present an integer value as an ordinal
589 rather than as a numeral. For example:
590
591 Enter password (1st attempt): ********
592 Enter password (2nd attempt): *********
593 Enter password (3rd attempt): *********
594 No 4th attempt. Access denied.
595
596 To this end, Lingua::EN::Inflect provides the ORD() subroutine.
597 <ORD()> takes a single argument and forms its ordinal equivalent. If
598 the argument isn't a numerical integer, it just adds "-th".
599
601 The exportable subroutine "NUMWORDS" takes a number (cardinal or
602 ordinal) and returns an English representation of that number. In a
603 scalar context a string is returned. Hence:
604
605 use Lingua::EN::Inflect qw( NUMWORDS );
606
607 $words = NUMWORDS(1234567);
608
609 puts the string:
610
611 "one million, two hundred and thirty-four thousand, five hundred and sixty-seven"
612
613 into $words.
614
615 In a list context each comma-separated chunk is returned as a separate
616 element. Hence:
617
618 @words = NUMWORDS(1234567);
619
620 puts the list:
621
622 ("one million",
623 "two hundred and thirty-four thousand",
624 "five hundred and sixty-seven")
625
626 into @words.
627
628 Note that this also means that:
629
630 print NUMWORDS(1234567);
631
632 will (misprint) print:
633
634 one milliontwo hundred and thirty-four thousandfive hundred and sixty-seven
635
636 To get readable output, make sure the call in in scalar context:
637
638 print scalar NUMWORDS(1234567);
639
640 Non-digits (apart from an optional leading plus or minus sign, any
641 decimal points, and ordinal suffixes -- see below) are silently
642 ignored, so the following all produce identical results:
643
644 NUMWORDS(5551202);
645 NUMWORDS(5_551_202);
646 NUMWORDS("5,551,202");
647 NUMWORDS("555-1202");
648
649 That last case is a little awkward since it's almost certainly a phone
650 number, and "five million, five hundred and fifty-one thousand, two
651 hundred and two" probably isn't what's wanted.
652
653 To overcome this, NUMWORDS() takes an optional named argument, 'group',
654 which changes how numbers are translated. The argument must be a
655 positive integer less than four, which indicated how the digits of the
656 number are to be grouped. If the argument is 1, then each digit is
657 translated separately. If the argument is 2, pairs of digits (starting
658 from the left) are grouped together. If the argument is 3, triples of
659 numbers (again, from the left) are grouped. Hence:
660
661 NUMWORDS("555-1202", group=>1)
662
663 returns "five, five, five, one, two, zero, two", whilst:
664
665 NUMWORDS("555-1202", group=>2)
666
667 returns "fifty-five, fifty-one, twenty, two", and:
668
669 NUMWORDS("555-1202", group=>3)
670
671 returns "five fifty-five, one twenty, two".
672
673 Phone numbers are often written in words as
674 "five..five..five..one..two..zero..two", which is also easy to achieve:
675
676 join '..', NUMWORDS("555-1202", group=>1)
677
678 "NUMWORDS" also handles decimal fractions. Hence:
679
680 NUMWORDS("1.2345")
681
682 returns "one point two three four five" in a scalar context and
683 "("one","point","two","three","four","five")") in an array context.
684 Exponent form ("1.234e56") is not yet handled.
685
686 Multiple decimal points are only translated in one of the "grouping"
687 modes. Hence:
688
689 NUMWORDS(101.202.303)
690
691 returns "one hundred and one point two zero two three zero three",
692 whereas:
693
694 NUMWORDS(101.202.303, group=>1)
695
696 returns "one zero one point two zero two point three zero three".
697
698 The digit '0' is unusual in that in may be translated to English as
699 "zero", "oh", or "nought". To cater for this diversity, "NUMWORDS" may
700 be passed a named argument, 'zero', which may be set to the desired
701 translation of '0'. For example:
702
703 print join "..", NUMWORDS("555-1202", group=>3, zero=>'oh')
704
705 prints "five..five..five..one..two..oh..two". By default, zero is
706 rendered as "zero".
707
708 Likewise, the digit '1' may be rendered as "one" or "a/an" (or very
709 occasionally other variants), depending on the context. So there is a
710 'one' argument as well:
711
712 print NUMWORDS($_, one=>'a solitary', zero=>'no more'),
713 PL(" bottle of beer on the wall\n", $_)
714 for (3,2,1,0);
715
716 # prints:
717 # three bottles of beer on the wall
718 # two bottles of beer on the wall
719 # a solitary bottle of beer on the wall
720 # no more bottles of beer on the wall
721
722 Care is needed if the word "a/an" is to be used as a 'one' value.
723 Unless the next word is known in advance, it's almost always necessary
724 to use the "A" function as well:
725
726 print A( NUMWORDS(1, one=>'a') . " $_\n")
727 for qw(cat aardvark ewe hour);
728
729 # prints:
730 # a cat
731 # an aardvark
732 # a ewe
733 # an hour
734
735 Another major regional variation in number translation is the use of
736 "and" in certain contexts. The named argument 'and' allows the
737 programmer to specify how "and" should be handled. Hence:
738
739 print scalar NUMWORDS("765", 'and'=>'')
740
741 prints "seven hundred sixty-five", instead of "seven hundred and sixty-
742 five". By default, the "and" is included.
743
744 The translation of the decimal point is also subject to variation (with
745 "point", "dot", and "decimal" being the favorites). The named argument
746 'decimal' allows the programmer to how the decimal point should be
747 rendered. Hence:
748
749 print scalar NUMWORDS("666.124.64.101", group=>3, decimal=>'dot')
750
751 prints "six sixty-six, dot, one twenty-four, dot, sixty-four, dot, one
752 zero one" By default, the decimal point is rendered as "point".
753
754 "NUMWORDS" also handles the ordinal forms of numbers. So:
755
756 print scalar NUMWORDS('1st');
757 print scalar NUMWORDS('3rd');
758 print scalar NUMWORDS('202nd');
759 print scalar NUMWORDS('1000000th');
760
761 print:
762
763 first
764 third
765 two hundred and twenty-second
766 one millionth
767
768 Two common idioms in this regard are:
769
770 print scalar NUMWORDS(ORD($number));
771
772 and:
773
774 print scalar ORD(NUMWORDS($number));
775
776 These are identical in effect, except when $number contains a decimal:
777
778 $number = 99.09;
779 print scalar NUMWORDS(ORD($number)); # ninety-ninth point zero nine
780 print scalar ORD(NUMWORDS($number)); # ninety-nine point zero ninth
781
782 Use whichever you feel is most appropriate.
783
785 When creating a list of words, commas are used between adjacent items,
786 except if the items contain commas, in which case semicolons are used.
787 But if there are less than two items, the commas/semicolons are omitted
788 entirely. The final item also has a conjunction (usually "and" or "or")
789 before it. And although it's technically incorrect (and sometimes
790 misleading), some people prefer to omit the comma before that final
791 conjunction, even when there are more than two items.
792
793 That's complicated enough to warrant its own subroutine: WORDLIST().
794 This subroutine expects a list of words, possibly with one or more hash
795 references containing options. It returns a string that joins the list
796 together in the normal English usage. For example:
797
798 print "You chose ", WORDLIST(@selected_items), "\n";
799 # You chose barley soup, roast beef, and Yorkshire pudding
800
801 print "You chose ", WORDLIST(@selected_items, {final_sep=>""}), "\n";
802 # You chose barley soup, roast beef and Yorkshire pudding
803
804 print "Please chose ", WORDLIST(@side_orders, {conj=>"or"}), "\n";
805 # Please chose salad, vegetables, or ice-cream
806
807 The available options are:
808
809 Option named Specifies Default value
810
811 conj Final conjunction "and"
812 sep Inter-item separator ","
813 last_sep Final separator value of 'sep' option
814
816 By far the commonest use of the inflection subroutines is to produce
817 message strings for various purposes. For example:
818
819 print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n";
820 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
821 if $severity > 1;
822
823 Unfortunately the need to separate each subroutine call detracts
824 significantly from the readability of the resulting code. To ameliorate
825 this problem, Lingua::EN::Inflect provides an exportable string-
826 interpolating subroutine (inflect($)), which recognizes calls to the
827 various inflection subroutines within a string and interpolates them
828 appropriately.
829
830 Using "inflect" the previous example could be rewritten:
831
832 print inflect "NUM($errors) PL_N(error) PL_V(was) detected.\n";
833 print inflect "PL_ADJ(This) PL_N(error) PL_V(was) fatal.\n"
834 if $severity > 1;
835
836 Note that "inflect" also correctly handles calls to the NUM()
837 subroutine (whether interpolated or antecedent). The inflect()
838 subroutine has a related extra feature, in that it automatically
839 cancels any "default number" value before it returns its interpolated
840 string. This means that calls to NUM() which are embedded in an
841 inflect()-interpolated string do not "escape" and interfere with
842 subsequent inflections.
843
845 Certain words, mainly of Latin or Ancient Greek origin, can form
846 plurals either using the standard English "-s" suffix, or with their
847 original Latin or Greek inflections. For example:
848
849 PL("stigma") # -> "stigmas" or "stigmata"
850 PL("torus") # -> "toruses" or "tori"
851 PL("index") # -> "indexes" or "indices"
852 PL("millennium") # -> "millenniums" or "millennia"
853 PL("ganglion") # -> "ganglions" or "ganglia"
854 PL("octopus") # -> "octopuses" or "octopodes"
855
856 Lingua::EN::Inflect caters to such words by providing an "alternate
857 state" of inflection known as "classical mode". By default, words are
858 inflected using their contemporary English plurals, but if classical
859 mode is invoked, the more traditional plural forms are returned
860 instead.
861
862 The exportable subroutine classical() controls this feature. If
863 classical() is called with no arguments, it unconditionally invokes
864 classical mode. If it is called with a single argument, it turns all
865 classical inflects on or off (depending on whether the argument is true
866 or false). If called with two or more arguments, those arguments
867 specify which aspects of classical behaviour are to be used.
868
869 Thus:
870
871 classical; # SWITCH ON CLASSICAL MODE
872 print PL("formula"); # -> "formulae"
873
874 classical 0; # SWITCH OFF CLASSICAL MODE
875 print PL("formula"); # -> "formulas"
876
877 classical $cmode; # CLASSICAL MODE IFF $cmode
878 print PL("formula"); # -> "formulae" (IF $cmode)
879 # -> "formulas" (OTHERWISE)
880
881 classical herd=>1; # SWITCH ON CLASSICAL MODE FOR "HERD" NOUNS
882 print PL("wilderbeest"); # -> "wilderbeest"
883
884 classical names=>1; # SWITCH ON CLASSICAL MODE FOR NAMES
885 print PL("sally"); # -> "sallies"
886 print PL("Sally"); # -> "Sallys"
887
888 Note however that classical() has no effect on the inflection of words
889 which are now fully assimilated. Hence:
890
891 PL("forum") # ALWAYS -> "forums"
892 PL("criterion") # ALWAYS -> "criteria"
893
894 LEI assumes that a capitalized word is a person's name. So it forms the
895 plural according to the rules for names (which is that you don't
896 inflect, you just add -s or -es). You can choose to turn that behaviour
897 off (it's on by the default, even when the module isn't in classical
898 mode) by calling classical(names=>0).
899
901 Adding plurals at run-time
902 Lingua::EN::Inflect provides five exportable subroutines which allow
903 the programmer to override the module's behaviour for specific cases:
904
905 def_noun($$)
906 The "def_noun" subroutine takes a pair of string arguments: the
907 singular and plural forms of the noun being specified. The
908 singular form specifies a pattern to be interpolated (as
909 "m/^(?:$first_arg)$/i"). Any noun matching this pattern is
910 then replaced by the string in the second argument. The second
911 argument specifies a string which is interpolated after the
912 match succeeds, and is then used as the plural form. For
913 example:
914
915 def_noun 'cow' => 'kine';
916 def_noun '(.+i)o' => '$1i';
917 def_noun 'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!';
918
919 Note that both arguments should usually be specified in single
920 quotes, so that they are not interpolated when they are
921 specified, but later (when words are compared to them). As
922 indicated by the last example, care also needs to be taken with
923 certain characters in the second argument, to ensure that they
924 are not unintentionally interpolated during comparison.
925
926 The second argument string may also specify a second variant of
927 the plural form, to be used when "classical" plurals have been
928 requested. The beginning of the second variant is marked by a
929 '|' character:
930
931 def_noun 'cow' => 'cows|kine';
932 def_noun '(.+i)o' => '$1os|$1i';
933 def_noun 'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!|varmints';
934
935 If no classical variant is given, the specified plural form is
936 used in both normal and "classical" modes.
937
938 If the second argument is "undef" instead of a string, then the
939 current user definition for the first argument is removed, and
940 the standard plural inflection(s) restored.
941
942 Note that in all cases, later plural definitions for a
943 particular singular form replace earlier definitions of the
944 same form. For example:
945
946 # FIRST, HIDE THE MODERN FORM....
947 def_noun 'aviatrix' => 'aviatrices';
948
949 # LATER, HIDE THE CLASSICAL FORM...
950 def_noun 'aviatrix' => 'aviatrixes';
951
952 # FINALLY, RESTORE THE DEFAULT BEHAVIOUR...
953 def_noun 'aviatrix' => undef;
954
955 Special care is also required when defining general patterns
956 and associated specific exceptions: put the more specific cases
957 after the general pattern. For example:
958
959 def_noun '(.+)us' => '$1i'; # EVERY "-us" TO "-i"
960 def_noun 'bus' => 'buses'; # EXCEPT FOR "bus"
961
962 This "try-most-recently-defined-first" approach to matching
963 user-defined words is also used by "def_verb", "def_a" and
964 "def_an".
965
966 def_verb($$$$$$)
967 The "def_verb" subroutine takes three pairs of string arguments
968 (that is, six arguments in total), specifying the singular and
969 plural forms of the three "persons" of verb. As with
970 "def_noun", the singular forms are specifications of run-time-
971 interpolated patterns, whilst the plural forms are
972 specifications of (up to two) run-time-interpolated strings:
973
974 def_verb 'am' => 'are',
975 'are' => 'are|art',
976 'is' => 'are';
977
978 def_verb 'have' => 'have',
979 'have' => 'have',
980 'ha(s|th)' => 'have';
981
982 Note that as with "def_noun", modern/classical variants of
983 plurals may be separately specified, subsequent definitions
984 replace previous ones, and "undef"'ed plural forms revert to
985 the standard behaviour.
986
987 def_adj($$)
988 The "def_adj" subroutine takes a pair of string arguments,
989 which specify the singular and plural forms of the adjective
990 being defined. As with "def_noun" and "def_adj", the singular
991 forms are specifications of run-time-interpolated patterns,
992 whilst the plural forms are specifications of (up to two) run-
993 time-interpolated strings:
994
995 def_adj 'this' => 'these',
996 def_adj 'red' => 'red|gules',
997
998 As previously, modern/classical variants of plurals may be
999 separately specified, subsequent definitions replace previous
1000 ones, and "undef"'ed plural forms revert to the standard
1001 behaviour.
1002
1003 def_a($) and def_an($)
1004 The "def_a" and "def_an" subroutines each take a single
1005 argument, which specifies a pattern. If a word passed to A() or
1006 AN() matches this pattern, it will be prefixed
1007 (unconditionally) with the corresponding indefinite article.
1008 For example:
1009
1010 def_a 'error';
1011 def_a 'in.+';
1012
1013 def_an 'mistake';
1014 def_an 'error';
1015
1016 As with the other "def_..." subroutines, such redefinitions are
1017 sequential in effect so that, after the above example, "error"
1018 will be inflected with "an".
1019
1020 The $HOME/.inflectrc file
1021 When it is imported, Lingua::EN::Inflect executes (as Perl code) the
1022 contents of any file named .inflectrc which it finds in the in the
1023 directory where Lingua/EN/Inflect.pm is installed, or in the current
1024 home directory ($ENV{HOME}), or in both. Note that the code is
1025 executed within the Lingua::EN::Inflect namespace.
1026
1027 Hence the user or the local Perl guru can make appropriate calls to
1028 "def_noun", "def_verb", etc. in one of these .inflectrc files, to
1029 permanently and universally modify the behaviour of the module. For
1030 example
1031
1032 > cat /usr/local/lib/perl5/Text/Inflect/.inflectrc
1033
1034 def_noun "UNIX" => "UN*X|UNICES";
1035
1036 def_verb "teco" => "teco", # LITERALLY: "to edit with TECO"
1037 "teco" => "teco",
1038 "tecos" => "teco";
1039
1040 def_a "Euler.*"; # "Yewler" TURNS IN HIS GRAVE
1041
1042 Note that calls to the "def_..." subroutines from within a program will
1043 take precedence over the contents of the home directory .inflectrc
1044 file, which in turn takes precedence over the system-wide .inflectrc
1045 file.
1046
1048 On loading, if the Perl code in a .inflectrc file is invalid
1049 (syntactically or otherwise), an appropriate fatal error is issued. A
1050 common problem is not ending the file with something that evaluates to
1051 true (as the five "def_..." subroutines do).
1052
1053 Using the five "def_..." subroutines directly in a program may also
1054 result in fatal diagnostics, if a (singular) pattern or an interpolated
1055 (plural) string is somehow invalid.
1056
1057 Specific diagnostics related to user-defined inflections are:
1058
1059 "Bad user-defined singular pattern:\n\t %s"
1060 The singular form of a user-defined noun or verb (as defined by
1061 a call to "def_noun", "def_verb", "def_adj", "def_a" or
1062 "def_an") is not a valid Perl regular expression. The actual
1063 Perl error message is also given.
1064
1065 "Bad user-defined plural string: '%s'"
1066 The plural form(s) of a user-defined noun or verb (as defined
1067 by a call to "def_noun", "def_verb" or "def_adj") is not a
1068 valid Perl interpolated string (usually because it interpolates
1069 some undefined variable).
1070
1071 "Bad .inflectrc file (%s):\n %s"
1072 Some other problem occurred in loading the named local or
1073 global .inflectrc file. The Perl error message (including the
1074 line number) is also given.
1075
1076 There are no diagnosable run-time error conditions for the actual
1077 inflection subroutines, except "NUMWORDS" and hence no run-time
1078 diagnostics. If the inflection subroutines are unable to form a plural
1079 via a user-definition or an inbuilt rule, they just "guess" the
1080 commonest English inflection: adding "-s" for nouns, removing "-s" for
1081 verbs, and no inflection for adjectives.
1082
1083 Lingua::EN::Inflect::NUMWORDS() can "die" with the following messages:
1084
1085 "Bad grouping option: %s"
1086 The optional argument to NUMWORDS() wasn't 1, 2 or 3.
1087
1088 "Number out of range"
1089 NUMWORDS() was passed a number larger than the number
1090 represented by 3006 consecutive nines. The words representing
1091 that number are 63,681 characters long, including commas and
1092 spaces. If you're interested in the actual value, see
1093 t/numwords.t.
1094
1095 The reference for the names is
1096 http://en.wikipedia.org/wiki/Names_of_large_numbers
1097
1098 There are no names for any higher numbers.
1099
1101 2nd Person precedence
1102 If a verb has identical 1st and 2nd person singular forms, but
1103 different 1st and 2nd person plural forms, then when its plural is
1104 constructed, the 2nd person plural form is always preferred.
1105
1106 The author is not currently aware of any such verbs in English, but is
1107 not quite arrogant enough to assume ipso facto that none exist.
1108
1109 Nominative precedence
1110 The singular pronoun "it" presents a special problem because its plural
1111 form can vary, depending on its "case". For example:
1112
1113 It ate my homework -> They ate my homework
1114 It ate it -> They ate them
1115 I fed my homework to it -> I fed my homework to them
1116
1117 As a consequence of this ambiguity, PL() or "PL_N" have been
1118 implemented so that they always return the nominative plural (that is,
1119 "they").
1120
1121 However, when asked for the plural of an unambiguously accusative "it"
1122 (namely, "PL("to it")", "PL_N("from it")", "PL("with it")", etc.), both
1123 subroutines will correctly return the accusative plural ("to them",
1124 "from them", "with them", etc.)
1125
1126 The plurality of zero
1127 The rules governing the choice between:
1128
1129 There were no errors.
1130
1131 and
1132
1133 There was no error.
1134
1135 are complex and often depend more on intent rather than content. Hence
1136 it is infeasible to specify such rules algorithmically.
1137
1138 Therefore, Lingua::EN::Text contents itself with the following
1139 compromise: If the governing number is zero, inflections always return
1140 the plural form unless the appropriate "classical" inflection is in
1141 effect, in which case the singular form is always returned.
1142
1143 Thus, the sequence:
1144
1145 NUM(0);
1146 print inflect "There PL(was) NO(choice)";
1147
1148 produces "There were no choices", whereas:
1149
1150 classical 'zero'; # or: classical(zero=>1);
1151 NUM(0);
1152 print inflect "There PL(was) NO(choice)";
1153
1154 it will print "There was no choice".
1155
1156 Homographs with heterogeneous plurals
1157 Another context in which intent (and not content) sometimes determines
1158 plurality is where two distinct meanings of a word require different
1159 plurals. For example:
1160
1161 Three basses were stolen from the band's equipment trailer.
1162 Three bass were stolen from the band's aquarium.
1163
1164 I put the mice next to the cheese.
1165 I put the mouses next to the computers.
1166
1167 Several thoughts about leaving crossed my mind.
1168 Several thought about leaving across my lawn.
1169
1170 Lingua::EN::Inflect handles such words in two ways:
1171
1172 • If both meanings of the word are the same part of speech (for
1173 example, "bass" is a noun in both sentences above), then one
1174 meaning is chosen as the "usual" meaning, and only that
1175 meaning's plural is ever returned by any of the inflection
1176 subroutines.
1177
1178 • If each meaning of the word is a different part of speech (for
1179 example, "thought" is both a noun and a verb), then the noun's
1180 plural is returned by PL() and PL_N() and the verb's plural is
1181 returned only by PL_V().
1182
1183 Such contexts are, fortunately, uncommon (particularly "same-part-of-
1184 speech" examples). An informal study of nearly 600 "difficult plurals"
1185 indicates that PL() can be relied upon to "get it right" about 98% of
1186 the time (although, of course, ichthyophilic guitarists or cyber-
1187 behaviouralists may experience higher rates of confusion).
1188
1189 If the choice of a particular "usual inflection" is considered
1190 inappropriate, it can always be reversed with a preliminary call to the
1191 corresponding "def_..." subroutine.
1192
1194 I'm not taking any further correspondence on:
1195
1196 "octopi".
1197 Despite the populist pandering of certain New World dictionaries,
1198 the plural is "octopuses" or (for the pedantic classicist)
1199 "octopodes". The suffix "-pus" is Greek, not Latin, so the plural
1200 is "-podes", not "pi".
1201
1202 "virus".
1203 Had no plural in Latin (possibly because it was a mass noun). The
1204 only plural is the Anglicized "viruses".
1205
1207 Damian Conway (damian@conway.org)
1208
1210 The endless inconsistencies of English.
1211
1212 (Please report words for which the correct plural or indefinite article
1213 is not formed, so that the reliability of Lingua::EN::Inflect can be
1214 improved.)
1215
1217 Copyright (c) 1997-2009, Damian Conway. All Rights Reserved.
1218 This module is free software. It may be used, redistributed
1219 and/or modified under the same terms as Perl itself.
1220
1221
1222
1223perl v5.36.0 2023-01-20 Lingua::EN::Inflect(3)