1Lingua::EN::Inflect(3)User Contributed Perl DocumentationLingua::EN::Inflect(3)
2
3
4
6 Lingua::EN::Inflect - Convert singular to plural. Select "a" or "an".
7
9 This document describes version 1.905 of Lingua::EN::Inflect
10
12 use Lingua::EN::Inflect qw ( PL PL_N PL_V PL_ADJ NO NUM
13 PL_eq PL_N_eq PL_V_eq PL_ADJ_eq
14 A AN
15 PART_PRES
16 ORD NUMWORDS
17 WORDLIST
18 inflect classical
19 def_noun def_verb def_adj def_a def_an );
20
21
22 # UNCONDITIONALLY FORM THE PLURAL
23
24 print "The plural of ", $word, " is ", PL($word), "\n";
25
26
27 # CONDITIONALLY FORM THE PLURAL
28
29 print "I saw $cat_count ", PL("cat",$cat_count), "\n";
30
31
32 # FORM PLURALS FOR SPECIFIC PARTS OF SPEECH
33
34 print PL_N("I",$N1), PL_V("saw",$N1),
35 PL_ADJ("my",$N2), PL_N("saw",$N2), "\n";
36
37
38 # DEAL WITH "0/1/N" -> "no/1/N" TRANSLATION:
39
40 print "There ", PL_V("was",$errors), NO(" error",$errors), "\n";
41
42
43 # USE DEFAULT COUNTS:
44
45 print NUM($N1,""), PL("I"), PL_V(" saw"), NUM($N2), PL_N(" saw");
46 print "There ", NUM($errors,''), PL_V("was"), NO(" error"), "\n";
47
48
49 # COMPARE TWO WORDS "NUMBER-INSENSITIVELY":
50
51 print "same\n" if PL_eq($word1, $word2);
52 print "same noun\n" if PL_N_eq($word1, $word2);
53 print "same verb\n" if PL_V_eq($word1, $word2);
54 print "same adj.\n" if PL_ADJ_eq($word1, $word2);
55
56
57 # ADD CORRECT "a" OR "an" FOR A GIVEN WORD:
58
59 print "Did you want ", A($thing), " or ", AN($idea), "\n";
60
61
62 # CONVERT NUMERALS INTO ORDINALS (i.e. 1->1st, 2->2nd, 3->3rd, etc.)
63
64 print "It was", ORD($position), " from the left\n";
65
66 # CONVERT NUMERALS TO WORDS (i.e. 1->"one", 101->"one hundred and one", etc.)
67 # IN A SCALAR CONTEXT: GET BACK A SINGLE STRING...
68
69 $words = NUMWORDS(1234); # "one thousand, two hundred and thirty-four"
70 $words = NUMWORDS(ORD(1234)); # "one thousand, two hundred and thirty-fourth"
71
72
73 # IN A LIST CONTEXT: GET BACK A LIST OF STRINGSi, ONE FOR EACH "CHUNK"...
74
75 @words = NUMWORDS(1234); # ("one thousand","two hundred and thirty-four")
76
77
78 # OPTIONAL PARAMETERS CHANGE TRANSLATION:
79
80 $words = NUMWORDS(12345, group=>1);
81 # "one, two, three, four, five"
82
83 $words = NUMWORDS(12345, group=>2);
84 # "twelve, thirty-four, five"
85
86 $words = NUMWORDS(12345, group=>3);
87 # "one twenty-three, forty-five"
88
89 $words = NUMWORDS(1234, 'and'=>'');
90 # "one thousand, two hundred thirty-four"
91
92 $words = NUMWORDS(1234, 'and'=>', plus');
93 # "one thousand, two hundred, plus thirty-four"
94
95 $words = NUMWORDS(555_1202, group=>1, zero=>'oh');
96 # "five, five, five, one, two, oh, two"
97
98 $words = NUMWORDS(555_1202, group=>1, one=>'unity');
99 # "five, five, five, unity, two, zero, two"
100
101 $words = NUMWORDS(123.456, group=>1, decimal=>'mark');
102 # "one two three mark four five six"
103
104 # LITERAL STYLE ONLY NAMES NUMBERS LESS THAN A CERTAIN THRESHOLD...
105
106 $words = NUMWORDS( 9, threshold=>10); # "nine"
107 $words = NUMWORDS( 10, threshold=>10); # "ten"
108 $words = NUMWORDS( 11, threshold=>10); # "11"
109 $words = NUMWORDS(1000, threshold=>10); # "1,000"
110
111 # JOIN WORDS INTO A LIST:
112
113 $list = WORDLIST("apple", "banana", "carrot");
114 # "apple, banana, and carrot"
115
116 $list = WORDLIST("apple", "banana");
117 # "apple and banana"
118
119 $list = WORDLIST("apple", "banana", "carrot", {final_sep=>""});
120 # "apple, banana and carrot"
121
122
123 # REQUIRE "CLASSICAL" PLURALS (EG: "focus"->"foci", "cherub"->"cherubim")
124
125 classical; # USE ALL CLASSICAL PLURALS
126
127 classical 1; # USE ALL CLASSICAL PLURALS
128 classical 0; # USE ALL MODERN PLURALS (DEFAULT)
129
130 classical 'zero'; # "no error" INSTEAD OF "no errors"
131 classical zero=>1; # "no error" INSTEAD OF "no errors"
132 classical zero=>0; # "no errors" INSTEAD OF "no error"
133
134 classical 'herd'; # "2 buffalo" INSTEAD OF "2 buffalos"
135 classical herd=>1; # "2 buffalo" INSTEAD OF "2 buffalos"
136 classical herd=>0; # "2 buffalos" INSTEAD OF "2 buffalo"
137
138 classical 'persons'; # "2 chairpersons" INSTEAD OF "2 chairpeople"
139 classical persons=>1; # "2 chairpersons" INSTEAD OF "2 chairpeople"
140 classical persons=>0; # "2 chairpeople" INSTEAD OF "2 chairpersons"
141
142 classical 'ancient'; # "2 formulae" INSTEAD OF "2 formulas"
143 classical ancient=>1; # "2 formulae" INSTEAD OF "2 formulas"
144 classical ancient=>0; # "2 formulas" INSTEAD OF "2 formulae"
145
146
147
148 # INTERPOLATE "PL()", "PL_N()", "PL_V()", "PL_ADJ()", A()", "AN()"
149 # "NUM()" AND "ORD()" WITHIN STRINGS:
150
151 print inflect("The plural of $word is PL($word)\n");
152 print inflect("I saw $cat_count PL(cat,$cat_count)\n");
153 print inflect("PL(I,$N1) PL_V(saw,$N1) PL(a,$N2) PL_N(saw,$N2)\n");
154 print inflect("NUM($N1,)PL(I) PL_V(saw) NUM($N2,)PL(a) PL_N(saw)\n");
155 print inflect("I saw NUM($cat_count) PL(cat)\n");
156 print inflect("There PL_V(was,$errors) NO(error,$errors)\n");
157 print inflect("There NUM($errors,)PL_V(was) NO(error)\n");
158 print inflect("Did you want A($thing) or AN($idea)\n");
159 print inflect("It was ORD($position) from the left\n");
160
161
162 # ADD USER-DEFINED INFLECTIONS (OVERRIDING INBUILT RULES):
163
164 def_noun "VAX" => "VAXen"; # SINGULAR => PLURAL
165
166 def_verb "will" => "shall", # 1ST PERSON SINGULAR => PLURAL
167 "will" => "will", # 2ND PERSON SINGULAR => PLURAL
168 "will" => "will"; # 3RD PERSON SINGULAR => PLURAL
169
170 def_adj "hir" => "their"; # SINGULAR => PLURAL
171
172 def_a "h"; # "AY HALWAYS SEZ 'HAITCH'!"
173
174 def_an "horrendous.*"; # "AN HORRENDOUS AFFECTATION"
175
177 [Note: This module is strictly in maintenance mode now. Take a look at
178 the newer Lingua::EN::Inflexion module, which offers a cleaner and more
179 convenient interface, has many more features (including
180 plural->singular inflexions), and is also much better tested. If you
181 have existing code that relies on Lingua::EN::Inflect, see the section
182 of the documentation entitled "CONVERTING FROM LINGUA::EN::INFLECT". ]
183
184 The exportable subroutines of Lingua::EN::Inflect provide plural
185 inflections, "a"/"an" selection for English words, and manipulation of
186 numbers as words
187
188 Plural forms of all nouns, most verbs, and some adjectives are
189 provided. Where appropriate, "classical" variants (for example:
190 "brother" -> "brethren", "dogma" -> "dogmata", etc.) are also provided.
191
192 Pronunciation-based "a"/"an" selection is provided for all English
193 words, and most initialisms.
194
195 It is also possible to inflect numerals (1,2,3) to ordinals (1st, 2nd,
196 3rd) and to English words ("one", "two", "three).
197
198 In generating these inflections, Lingua::EN::Inflect follows the Oxford
199 English Dictionary and the guidelines in Fowler's Modern English Usage,
200 preferring the former where the two disagree.
201
202 The module is built around standard British spelling, but is designed
203 to cope with common American variants as well. Slang, jargon, and other
204 English dialects are not explicitly catered for.
205
206 Where two or more inflected forms exist for a single word (typically a
207 "classical" form and a "modern" form), Lingua::EN::Inflect prefers the
208 more common form (typically the "modern" one), unless "classical"
209 processing has been specified (see "MODERN VS CLASSICAL INFLECTIONS").
210
212 Inflecting Plurals
213 All of the "PL_..." plural inflection subroutines take the word to be
214 inflected as their first argument and return the corresponding
215 inflection. Note that all such subroutines expect the singular form of
216 the word. The results of passing a plural form are undefined (and
217 unlikely to be correct).
218
219 The "PL_..." subroutines also take an optional second argument, which
220 indicates the grammatical "number" of the word (or of another word with
221 which the word being inflected must agree). If the "number" argument is
222 supplied and is not 1 (or "one" or "a", or some other adjective that
223 implies the singular), the plural form of the word is returned. If the
224 "number" argument does indicate singularity, the (uninflected) word
225 itself is returned. If the number argument is omitted, the plural form
226 is returned unconditionally.
227
228 The various subroutines are:
229
230 "PL_N($;$)"
231 The exportable subroutine "PL_N()" takes a singular English
232 noun or pronoun and returns its plural. Pronouns in the
233 nominative ("I" -> "we") and accusative ("me" -> "us") cases
234 are handled, as are possessive pronouns ("mine" -> "ours").
235
236 "PL_V($;$)"
237 The exportable subroutine "PL_V()" takes the singular form of a
238 conjugated verb (that is, one which is already in the correct
239 "person" and "mood") and returns the corresponding plural
240 conjugation.
241
242 "PL_ADJ($;$)"
243 The exportable subroutine "PL_ADJ()" takes the singular form of
244 certain types of adjectives and returns the corresponding
245 plural form. Adjectives that are correctly handled include:
246 "numerical" adjectives ("a" -> "some"), demonstrative
247 adjectives ("this" -> "these", "that" -> "those"), and
248 possessives ("my" -> "our", "cat's" -> "cats'", "child's" ->
249 "childrens'", etc.)
250
251 "PL($;$)"
252 The exportable subroutine "PL()" takes a singular English noun,
253 pronoun, verb, or adjective and returns its plural form. Where
254 a word has more than one inflection depending on its part of
255 speech (for example, the noun "thought" inflects to "thoughts",
256 the verb "thought" to "thought"), the (singular) noun sense is
257 preferred to the (singular) verb sense.
258
259 Hence "PL("knife")" will return "knives" ("knife" having been
260 treated as a singular noun), whereas "PL("knifes")" will return
261 "knife" ("knifes" having been treated as a 3rd person singular
262 verb).
263
264 The inherent ambiguity of such cases suggests that, where the
265 part of speech is known, "PL_N", "PL_V", and "PL_ADJ" should be
266 used in preference to "PL".
267
268 Note that all these subroutines ignore any whitespace surrounding the
269 word being inflected, but preserve that whitespace when the result is
270 returned. For example, "PL(" cat ")" returns " cats ".
271
272 Numbered plurals
273 The "PL_..." subroutines return only the inflected word, not the count
274 that was used to inflect it. Thus, in order to produce "I saw 3 ducks",
275 it is necessary to use:
276
277 print "I saw $N ", PL_N($animal,$N), "\n";
278
279 Since the usual purpose of producing a plural is to make it agree with
280 a preceding count, Lingua::EN::Inflect provides an exportable
281 subroutine ("NO($;$)") which, given a word and a(n optional) count,
282 returns the count followed by the correctly inflected word. Hence the
283 previous example can be rewritten:
284
285 print "I saw ", NO($animal,$N), "\n";
286
287 In addition, if the count is zero (or some other term which implies
288 zero, such as "zero", "nil", etc.) the count is replaced by the word
289 "no". Hence, if $N had the value zero, the previous example would print
290 the somewhat more elegant:
291
292 I saw no animals
293
294 rather than:
295
296 I saw 0 animals
297
298 Note that the name of the subroutine is a pun: the subroutine returns
299 either a number (a No.) or a "no", in front of the inflected word.
300
301 Wordy and comma'd plurals
302
303 The "NO()" subroutine takes an optional third argument: a hash of named
304 options that configure its behaviour.
305
306 The 'words_below' option informs "NO()" what other numbers (i.e. apart
307 from zero) it should convert to words. For example:S
308
309 for my $count (0..12) {
310 print NO('cat', $count, {words_below => 10}), "\n";
311 }
312
313 would print:
314
315 no cats
316 one cat
317 two cats
318 three cats
319 four cats
320 five cats
321 six cats
322 seven cats
323 eight cats
324 nine cats
325 10 cats
326 11 cats
327 12 cats
328
329 The 'comma' and 'comma_every' options determine whether or not the
330 numbers produced by "NO()" have commas in them. That is:
331
332 2001 space odysseys
333
334 versus:
335
336 2,001 space odysseys
337
338 Normally, numbers are produced without commas, but if 'comma' or
339 'comma_every' is specified, then commas are added as requested.
340
341 The 'comma' option specifies which character to use as a comma. It
342 defaults to ',', but may be set to anything convenient:
343
344 print NO('Euro', $amount, {comma=>'.'});
345
346 # prints: 1.000.000 Euros
347
348 The 'comma_every' option specifies how many characters between commas.
349 It defaults to 3, but may be set to any positive number:
350
351 print NO('Euro', $amount, {comma_every=>4});
352
353 # prints: 100,0000 Euros
354
355 Note that you can set both options at once, if you wish:
356
357 print NO('Euro', $amount, {comma_every=>2, comma=>'_'});
358
359 # prints: 1_00_00_00 Euros
360
361 Reducing the number of counts required
362 In some contexts, the need to supply an explicit count to the various
363 "PL_..." subroutines makes for tiresome repetition. For example:
364
365 print PL_ADJ("This",$errors), PL_N(" error",$errors),
366 PL_V(" was",$errors), " fatal.\n";
367
368 Lingua::EN::Inflect therefore provides an exportable subroutine
369 ("NUM($;$)") that may be used to set a persistent "default number"
370 value. If such a value is set, it is subsequently used whenever an
371 optional second "number" argument is omitted. The default value thus
372 set can subsequently be removed by calling "NUM()" with no arguments.
373 Hence we could rewrite the previous example:
374
375 NUM($errors);
376 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n";
377 NUM();
378
379 Normally, "NUM()" returns its first argument, so that it may also be
380 "inlined" in contexts like:
381
382 print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n"
383 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
384 if $severity > 1;
385
386 However, in certain contexts (see "INTERPOLATING INFLECTIONS IN
387 STRINGS") it is preferable that "NUM()" return an empty string. Hence
388 "NUM()" provides an optional second argument. If that argument is
389 supplied (that is, if it is defined) and evaluates to false, "NUM"
390 returns an empty string instead of its first argument. For example:
391
392 print NUM($errors,0), NO("error"), PL_V(" was"), " detected.\n";
393 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
394 if $severity > 1;
395
396 Number-insensitive equality
397 Lingua::EN::Inflect also provides a solution to the problem of
398 comparing words of differing plurality through the exportable
399 subroutines "PL_eq($$)", "PL_N_eq($$)", "PL_V_eq($$)", and
400 "PL_ADJ_eq($$)". Each of these subroutines takes two strings, and
401 compares them using the corresponding plural-inflection subroutine
402 ("PL()", "PL_N()", "PL_V()", and "PL_ADJ()" respectively).
403
404 The comparison returns true if:
405
406 • the strings are "eq"-equal, or
407
408 • one string is "eq"-equal to a plural form of the other, or
409
410 • the strings are two different plural forms of the one word.
411
412 Hence all of the following return true:
413
414 PL_eq("index","index") # RETURNS "eq"
415 PL_eq("index","indexes") # RETURNS "s:p"
416 PL_eq("index","indices") # RETURNS "s:p"
417 PL_eq("indexes","index") # RETURNS "p:s"
418 PL_eq("indices","index") # RETURNS "p:s"
419 PL_eq("indices","indexes") # RETURNS "p:p"
420 PL_eq("indexes","indices") # RETURNS "p:p"
421 PL_eq("indices","indices") # RETURNS "eq"
422
423 As indicated by the comments in the previous example, the actual value
424 returned by the various "PL_eq" subroutines encodes which of the three
425 equality rules succeeded: "eq" is returned if the strings were
426 identical, "s:p" if the strings were singular and plural respectively,
427 "p:s" for plural and singular, and "p:p" for two distinct plurals.
428 Inequality is indicated by returning an empty string.
429
430 It should be noted that two distinct singular words which happen to
431 take the same plural form are not considered equal, nor are cases where
432 one (singular) word's plural is the other (plural) word's singular.
433 Hence all of the following return false:
434
435 PL_eq("base","basis") # ALTHOUGH BOTH -> "bases"
436 PL_eq("syrinx","syringe") # ALTHOUGH BOTH -> "syringes"
437 PL_eq("she","he") # ALTHOUGH BOTH -> "they"
438
439 PL_eq("opus","operas") # ALTHOUGH "opus" -> "opera" -> "operas"
440 PL_eq("taxi","taxes") # ALTHOUGH "taxi" -> "taxis" -> "taxes"
441
442 Note too that, although the comparison is "number-insensitive" it is
443 not case-insensitive (that is, "PL("time","Times")" returns false. To
444 obtain both number and case insensitivity, prefix both arguments with
445 "lc" (that is, "PL(lc "time", lc "Times")" returns true).
446
448 Present participles
449 "Lingua::EN::Inflect" also provides the "PART_PRES" subroutine, which
450 can take a 3rd person singular verb and correctly inflect it to its
451 present participle:
452
453 PART_PRES("runs") # "running"
454 PART_PRES("loves") # "loving"
455 PART_PRES("eats") # "eating"
456 PART_PRES("bats") # "batting"
457 PART_PRES("spies") # "spying"
458
460 Selecting indefinite articles
461 Lingua::EN::Inflect provides two exportable subroutines ("A($;$)" and
462 "AN($;$)") which will correctly prepend the appropriate indefinite
463 article to a word, depending on its pronunciation. For example:
464
465 A("cat") # -> "a cat"
466 AN("cat") # -> "a cat"
467 A("euphemism") # -> "a euphemism"
468 A("Euler number") # -> "an Euler number"
469 A("hour") # -> "an hour"
470 A("houri") # -> "a houri"
471
472 The two subroutines are identical in function and may be used
473 interchangeably. The only reason that two versions are provided is to
474 enhance the readability of code such as:
475
476 print "That is ", AN($errortype), " error\n;
477 print "That is ", A($fataltype), " fatal error\n;
478
479 Note that in both cases the actual article provided depends only on the
480 pronunciation of the first argument, not on the name of the subroutine.
481
482 "A()" and "AN()" will ignore any indefinite article that already exists
483 at the start of the string. Thus:
484
485 @half_arked = (
486 "a elephant",
487 "a giraffe",
488 "an ewe",
489 "a orangutan",
490 );
491
492 print A($_), "\n" for @half_arked;
493
494 # prints:
495 # an elephant
496 # a giraffe
497 # a ewe
498 # an orangutan
499
500 "A()" and "AN()" both take an optional second argument. As with the
501 "PL_..." subroutines, this second argument is a "number" specifier. If
502 its value is 1 (or some other value implying singularity), "A()" and
503 "AN()" insert "a" or "an" as appropriate. If the number specifier
504 implies plurality, ("A()" and "AN()" insert the actual second argument
505 instead. For example:
506
507 A("cat",1) # -> "a cat"
508 A("cat",2) # -> "2 cat"
509 A("cat","one") # -> "one cat"
510 A("cat","no") # -> "no cat"
511
512 Note that, as implied by the previous examples, "A()" and "AN()" both
513 assume that their job is merely to provide the correct qualifier for a
514 word (that is: "a", "an", or the specified count). In other words,
515 they assume that the word they are given has already been correctly
516 inflected for plurality. Hence, if $N has the value 2, then:
517
518 print A("cat",$N);
519
520 prints "2 cat", instead of "2 cats". The correct approach is to use:
521
522 print A(PL("cat",$N),$N);
523
524 or, better still:
525
526 print NO("cat",$N);
527
528 Note too that, like the various "PL_..." subroutines, whenever "A()"
529 and "AN()" are called with only one argument they are subject to the
530 effects of any preceding call to "NUM()". Hence, another possible
531 solution is:
532
533 NUM($N);
534 print A(PL("cat"));
535
536 Indefinite articles and initialisms
537 "Initialisms" (sometimes inaccurately called "acronyms") are terms
538 which have been formed from the initial letters of words in a phrase
539 (for example, "NATO", "NBL", "S.O.S.", "SCUBA", etc.)
540
541 Such terms present a particular challenge when selecting between "a"
542 and "an", since they are sometimes pronounced as if they were a single
543 word ("nay-tow", "sku-ba") and sometimes as a series of letter names
544 ("en-eff-ell", "ess-oh-ess").
545
546 "A()" and "AN()" cope with this dichotomy using a series of inbuilt
547 rules, which may be summarized as:
548
549 1. If the word starts with a single letter, followed by a period
550 or dash (for example, "R.I.P.", "C.O.D.", "e-mail", "X-ray",
551 "T-square"), then choose the appropriate article for the sound
552 of the first letter ("an R.I.P.", "a C.O.D.", "an e-mail", "an
553 X-ray", "a T-square").
554
555 2. If the first two letters of the word are capitals, consonants,
556 and do not appear at the start of any known English word, (for
557 example, "LCD", "XML", "YWCA"), then once again choose "a" or
558 "an" depending on the sound of the first letter ("an LCD", "an
559 XML", "a YWCA").
560
561 3. Otherwise, assume the string is a capitalized word or a
562 pronounceable initialism (for example, "LED", "OPEC", "FAQ",
563 "UNESCO"), and therefore takes "a" or "an" according to the
564 (apparent) pronunciation of the entire word ("a LED", "an
565 OPEC", "a FAQ", "a UNESCO").
566
567 Note that rules 1 and 3 together imply that the presence or absence of
568 punctuation may change the selection of indefinite article for a
569 particular initialism (for example, "a FAQ" but "an F.A.Q.").
570
571 Indefinite articles and "soft H's"
572 Words beginning in the letter 'H' present another type of difficulty
573 when selecting a suitable indefinite article. In a few such words (for
574 example, "hour", "honour", "heir") the 'H' is not voiced at all, and so
575 such words inflect with "an". The remaining cases ("voiced H's") may be
576 divided into two categories: "hard H's" (such as "hangman",
577 "holograph", "hat", etc.) and "soft H's" (such as "hysterical",
578 "horrendous", "holy", etc.)
579
580 Hard H's always take "a" as their indefinite article, and soft H's
581 normally do so as well. But some English speakers prefer "an" for soft
582 H's (although the practice is now generally considered an affectation,
583 rather than a legitimate grammatical alternative).
584
585 At present, the "A()" and "AN()" subroutines ignore soft H's and use
586 "a" for any voiced 'H'. The author would, however, welcome feedback on
587 this decision (envisaging a possible future "soft H" mode).
588
590 Occasionally it is useful to present an integer value as an ordinal
591 rather than as a numeral. For example:
592
593 Enter password (1st attempt): ********
594 Enter password (2nd attempt): *********
595 Enter password (3rd attempt): *********
596 No 4th attempt. Access denied.
597
598 To this end, Lingua::EN::Inflect provides the "ORD()" subroutine.
599 <ORD()> takes a single argument and forms its ordinal equivalent. If
600 the argument isn't a numerical integer, it just adds "-th".
601
603 The exportable subroutine "NUMWORDS" takes a number (cardinal or
604 ordinal) and returns an English representation of that number. In a
605 scalar context a string is returned. Hence:
606
607 use Lingua::EN::Inflect qw( NUMWORDS );
608
609 $words = NUMWORDS(1234567);
610
611 puts the string:
612
613 "one million, two hundred and thirty-four thousand, five hundred and sixty-seven"
614
615 into $words.
616
617 In a list context each comma-separated chunk is returned as a separate
618 element. Hence:
619
620 @words = NUMWORDS(1234567);
621
622 puts the list:
623
624 ("one million",
625 "two hundred and thirty-four thousand",
626 "five hundred and sixty-seven")
627
628 into @words.
629
630 Note that this also means that:
631
632 print NUMWORDS(1234567);
633
634 will (misprint) print:
635
636 one milliontwo hundred and thirty-four thousandfive hundred and sixty-seven
637
638 To get readable output, make sure the call in in scalar context:
639
640 print scalar NUMWORDS(1234567);
641
642 Non-digits (apart from an optional leading plus or minus sign, any
643 decimal points, and ordinal suffixes -- see below) are silently
644 ignored, so the following all produce identical results:
645
646 NUMWORDS(5551202);
647 NUMWORDS(5_551_202);
648 NUMWORDS("5,551,202");
649 NUMWORDS("555-1202");
650
651 That last case is a little awkward since it's almost certainly a phone
652 number, and "five million, five hundred and fifty-one thousand, two
653 hundred and two" probably isn't what's wanted.
654
655 To overcome this, "NUMWORDS()" takes an optional named argument,
656 'group', which changes how numbers are translated. The argument must be
657 a positive integer less than four, which indicated how the digits of
658 the number are to be grouped. If the argument is 1, then each digit is
659 translated separately. If the argument is 2, pairs of digits (starting
660 from the left) are grouped together. If the argument is 3, triples of
661 numbers (again, from the left) are grouped. Hence:
662
663 NUMWORDS("555-1202", group=>1)
664
665 returns "five, five, five, one, two, zero, two", whilst:
666
667 NUMWORDS("555-1202", group=>2)
668
669 returns "fifty-five, fifty-one, twenty, two", and:
670
671 NUMWORDS("555-1202", group=>3)
672
673 returns "five fifty-five, one twenty, two".
674
675 Phone numbers are often written in words as
676 "five..five..five..one..two..zero..two", which is also easy to achieve:
677
678 join '..', NUMWORDS("555-1202", group=>1)
679
680 "NUMWORDS" also handles decimal fractions. Hence:
681
682 NUMWORDS("1.2345")
683
684 returns "one point two three four five" in a scalar context and
685 "("one","point","two","three","four","five")") in an array context.
686 Exponent form ("1.234e56") is not yet handled.
687
688 Multiple decimal points are only translated in one of the "grouping"
689 modes. Hence:
690
691 NUMWORDS(101.202.303)
692
693 returns "one hundred and one point two zero two three zero three",
694 whereas:
695
696 NUMWORDS(101.202.303, group=>1)
697
698 returns "one zero one point two zero two point three zero three".
699
700 The digit '0' is unusual in that in may be translated to English as
701 "zero", "oh", or "nought". To cater for this diversity, "NUMWORDS" may
702 be passed a named argument, 'zero', which may be set to the desired
703 translation of '0'. For example:
704
705 print join "..", NUMWORDS("555-1202", group=>3, zero=>'oh')
706
707 prints "five..five..five..one..two..oh..two". By default, zero is
708 rendered as "zero".
709
710 Likewise, the digit '1' may be rendered as "one" or "a/an" (or very
711 occasionally other variants), depending on the context. So there is a
712 'one' argument as well:
713
714 print NUMWORDS($_, one=>'a solitary', zero=>'no more'),
715 PL(" bottle of beer on the wall\n", $_)
716 for (3,2,1,0);
717
718 # prints:
719 # three bottles of beer on the wall
720 # two bottles of beer on the wall
721 # a solitary bottle of beer on the wall
722 # no more bottles of beer on the wall
723
724 Care is needed if the word "a/an" is to be used as a 'one' value.
725 Unless the next word is known in advance, it's almost always necessary
726 to use the "A" function as well:
727
728 print A( NUMWORDS(1, one=>'a') . " $_\n")
729 for qw(cat aardvark ewe hour);
730
731 # prints:
732 # a cat
733 # an aardvark
734 # a ewe
735 # an hour
736
737 Another major regional variation in number translation is the use of
738 "and" in certain contexts. The named argument 'and' allows the
739 programmer to specify how "and" should be handled. Hence:
740
741 print scalar NUMWORDS("765", 'and'=>'')
742
743 prints "seven hundred sixty-five", instead of "seven hundred and sixty-
744 five". By default, the "and" is included.
745
746 The translation of the decimal point is also subject to variation (with
747 "point", "dot", and "decimal" being the favorites). The named argument
748 'decimal' allows the programmer to how the decimal point should be
749 rendered. Hence:
750
751 print scalar NUMWORDS("666.124.64.101", group=>3, decimal=>'dot')
752
753 prints "six sixty-six, dot, one twenty-four, dot, sixty-four, dot, one
754 zero one" By default, the decimal point is rendered as "point".
755
756 "NUMWORDS" also handles the ordinal forms of numbers. So:
757
758 print scalar NUMWORDS('1st');
759 print scalar NUMWORDS('3rd');
760 print scalar NUMWORDS('202nd');
761 print scalar NUMWORDS('1000000th');
762
763 print:
764
765 first
766 third
767 two hundred and twenty-second
768 one millionth
769
770 Two common idioms in this regard are:
771
772 print scalar NUMWORDS(ORD($number));
773
774 and:
775
776 print scalar ORD(NUMWORDS($number));
777
778 These are identical in effect, except when $number contains a decimal:
779
780 $number = 99.09;
781 print scalar NUMWORDS(ORD($number)); # ninety-ninth point zero nine
782 print scalar ORD(NUMWORDS($number)); # ninety-nine point zero ninth
783
784 Use whichever you feel is most appropriate.
785
787 When creating a list of words, commas are used between adjacent items,
788 except if the items contain commas, in which case semicolons are used.
789 But if there are less than two items, the commas/semicolons are omitted
790 entirely. The final item also has a conjunction (usually "and" or "or")
791 before it. And although it's technically incorrect (and sometimes
792 misleading), some people prefer to omit the comma before that final
793 conjunction, even when there are more than two items.
794
795 That's complicated enough to warrant its own subroutine: "WORDLIST()".
796 This subroutine expects a list of words, possibly with one or more hash
797 references containing options. It returns a string that joins the list
798 together in the normal English usage. For example:
799
800 print "You chose ", WORDLIST(@selected_items), "\n";
801 # You chose barley soup, roast beef, and Yorkshire pudding
802
803 print "You chose ", WORDLIST(@selected_items, {final_sep=>""}), "\n";
804 # You chose barley soup, roast beef and Yorkshire pudding
805
806 print "Please chose ", WORDLIST(@side_orders, {conj=>"or"}), "\n";
807 # Please chose salad, vegetables, or ice-cream
808
809 The available options are:
810
811 Option named Specifies Default value
812
813 conj Final conjunction "and"
814 sep Inter-item separator ","
815 last_sep Final separator value of 'sep' option
816
818 By far the commonest use of the inflection subroutines is to produce
819 message strings for various purposes. For example:
820
821 print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n";
822 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
823 if $severity > 1;
824
825 Unfortunately the need to separate each subroutine call detracts
826 significantly from the readability of the resulting code. To ameliorate
827 this problem, Lingua::EN::Inflect provides an exportable string-
828 interpolating subroutine (inflect($)), which recognizes calls to the
829 various inflection subroutines within a string and interpolates them
830 appropriately.
831
832 Using "inflect" the previous example could be rewritten:
833
834 print inflect "NUM($errors) PL_N(error) PL_V(was) detected.\n";
835 print inflect "PL_ADJ(This) PL_N(error) PL_V(was) fatal.\n"
836 if $severity > 1;
837
838 Note that "inflect" also correctly handles calls to the "NUM()"
839 subroutine (whether interpolated or antecedent). The "inflect()"
840 subroutine has a related extra feature, in that it automatically
841 cancels any "default number" value before it returns its interpolated
842 string. This means that calls to "NUM()" which are embedded in an
843 "inflect()"-interpolated string do not "escape" and interfere with
844 subsequent inflections.
845
847 Certain words, mainly of Latin or Ancient Greek origin, can form
848 plurals either using the standard English "-s" suffix, or with their
849 original Latin or Greek inflections. For example:
850
851 PL("stigma") # -> "stigmas" or "stigmata"
852 PL("torus") # -> "toruses" or "tori"
853 PL("index") # -> "indexes" or "indices"
854 PL("millennium") # -> "millenniums" or "millennia"
855 PL("ganglion") # -> "ganglions" or "ganglia"
856 PL("octopus") # -> "octopuses" or "octopodes"
857
858 Lingua::EN::Inflect caters to such words by providing an "alternate
859 state" of inflection known as "classical mode". By default, words are
860 inflected using their contemporary English plurals, but if classical
861 mode is invoked, the more traditional plural forms are returned
862 instead.
863
864 The exportable subroutine "classical()" controls this feature. If
865 "classical()" is called with no arguments, it unconditionally invokes
866 classical mode. If it is called with a single argument, it turns all
867 classical inflects on or off (depending on whether the argument is true
868 or false). If called with two or more arguments, those arguments
869 specify which aspects of classical behaviour are to be used.
870
871 Thus:
872
873 classical; # SWITCH ON CLASSICAL MODE
874 print PL("formula"); # -> "formulae"
875
876 classical 0; # SWITCH OFF CLASSICAL MODE
877 print PL("formula"); # -> "formulas"
878
879 classical $cmode; # CLASSICAL MODE IFF $cmode
880 print PL("formula"); # -> "formulae" (IF $cmode)
881 # -> "formulas" (OTHERWISE)
882
883 classical herd=>1; # SWITCH ON CLASSICAL MODE FOR "HERD" NOUNS
884 print PL("wilderbeest"); # -> "wilderbeest"
885
886 classical names=>1; # SWITCH ON CLASSICAL MODE FOR NAMES
887 print PL("sally"); # -> "sallies"
888 print PL("Sally"); # -> "Sallys"
889
890 Note however that "classical()" has no effect on the inflection of
891 words which are now fully assimilated. Hence:
892
893 PL("forum") # ALWAYS -> "forums"
894 PL("criterion") # ALWAYS -> "criteria"
895
896 LEI assumes that a capitalized word is a person's name. So it forms the
897 plural according to the rules for names (which is that you don't
898 inflect, you just add -s or -es). You can choose to turn that behaviour
899 off (it's on by the default, even when the module isn't in classical
900 mode) by calling "classical(names=>0)".
901
903 Adding plurals at run-time
904 Lingua::EN::Inflect provides five exportable subroutines which allow
905 the programmer to override the module's behaviour for specific cases:
906
907 "def_noun($$)"
908 The "def_noun" subroutine takes a pair of string arguments: the
909 singular and plural forms of the noun being specified. The
910 singular form specifies a pattern to be interpolated (as
911 "m/^(?:$first_arg)$/i"). Any noun matching this pattern is
912 then replaced by the string in the second argument. The second
913 argument specifies a string which is interpolated after the
914 match succeeds, and is then used as the plural form. For
915 example:
916
917 def_noun 'cow' => 'kine';
918 def_noun '(.+i)o' => '$1i';
919 def_noun 'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!';
920
921 Note that both arguments should usually be specified in single
922 quotes, so that they are not interpolated when they are
923 specified, but later (when words are compared to them). As
924 indicated by the last example, care also needs to be taken with
925 certain characters in the second argument, to ensure that they
926 are not unintentionally interpolated during comparison.
927
928 The second argument string may also specify a second variant of
929 the plural form, to be used when "classical" plurals have been
930 requested. The beginning of the second variant is marked by a
931 '|' character:
932
933 def_noun 'cow' => 'cows|kine';
934 def_noun '(.+i)o' => '$1os|$1i';
935 def_noun 'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!|varmints';
936
937 If no classical variant is given, the specified plural form is
938 used in both normal and "classical" modes.
939
940 If the second argument is "undef" instead of a string, then the
941 current user definition for the first argument is removed, and
942 the standard plural inflection(s) restored.
943
944 Note that in all cases, later plural definitions for a
945 particular singular form replace earlier definitions of the
946 same form. For example:
947
948 # FIRST, HIDE THE MODERN FORM....
949 def_noun 'aviatrix' => 'aviatrices';
950
951 # LATER, HIDE THE CLASSICAL FORM...
952 def_noun 'aviatrix' => 'aviatrixes';
953
954 # FINALLY, RESTORE THE DEFAULT BEHAVIOUR...
955 def_noun 'aviatrix' => undef;
956
957 Special care is also required when defining general patterns
958 and associated specific exceptions: put the more specific cases
959 after the general pattern. For example:
960
961 def_noun '(.+)us' => '$1i'; # EVERY "-us" TO "-i"
962 def_noun 'bus' => 'buses'; # EXCEPT FOR "bus"
963
964 This "try-most-recently-defined-first" approach to matching
965 user-defined words is also used by "def_verb", "def_a" and
966 "def_an".
967
968 "def_verb($$$$$$)"
969 The "def_verb" subroutine takes three pairs of string arguments
970 (that is, six arguments in total), specifying the singular and
971 plural forms of the three "persons" of verb. As with
972 "def_noun", the singular forms are specifications of run-time-
973 interpolated patterns, whilst the plural forms are
974 specifications of (up to two) run-time-interpolated strings:
975
976 def_verb 'am' => 'are',
977 'are' => 'are|art',
978 'is' => 'are';
979
980 def_verb 'have' => 'have',
981 'have' => 'have',
982 'ha(s|th)' => 'have';
983
984 Note that as with "def_noun", modern/classical variants of
985 plurals may be separately specified, subsequent definitions
986 replace previous ones, and "undef"'ed plural forms revert to
987 the standard behaviour.
988
989 "def_adj($$)"
990 The "def_adj" subroutine takes a pair of string arguments,
991 which specify the singular and plural forms of the adjective
992 being defined. As with "def_noun" and "def_adj", the singular
993 forms are specifications of run-time-interpolated patterns,
994 whilst the plural forms are specifications of (up to two) run-
995 time-interpolated strings:
996
997 def_adj 'this' => 'these',
998 def_adj 'red' => 'red|gules',
999
1000 As previously, modern/classical variants of plurals may be
1001 separately specified, subsequent definitions replace previous
1002 ones, and "undef"'ed plural forms revert to the standard
1003 behaviour.
1004
1005 def_a($) and def_an($)
1006 The "def_a" and "def_an" subroutines each take a single
1007 argument, which specifies a pattern. If a word passed to "A()"
1008 or "AN()" matches this pattern, it will be prefixed
1009 (unconditionally) with the corresponding indefinite article.
1010 For example:
1011
1012 def_a 'error';
1013 def_a 'in.+';
1014
1015 def_an 'mistake';
1016 def_an 'error';
1017
1018 As with the other "def_..." subroutines, such redefinitions are
1019 sequential in effect so that, after the above example, "error"
1020 will be inflected with "an".
1021
1022 The $HOME/.inflectrc file
1023 When it is imported, Lingua::EN::Inflect executes (as Perl code) the
1024 contents of any file named .inflectrc which it finds in the in the
1025 directory where Lingua/EN/Inflect.pm is installed, or in the current
1026 home directory ($ENV{HOME}), or in both. Note that the code is
1027 executed within the Lingua::EN::Inflect namespace.
1028
1029 Hence the user or the local Perl guru can make appropriate calls to
1030 "def_noun", "def_verb", etc. in one of these .inflectrc files, to
1031 permanently and universally modify the behaviour of the module. For
1032 example
1033
1034 > cat /usr/local/lib/perl5/Text/Inflect/.inflectrc
1035
1036 def_noun "UNIX" => "UN*X|UNICES";
1037
1038 def_verb "teco" => "teco", # LITERALLY: "to edit with TECO"
1039 "teco" => "teco",
1040 "tecos" => "teco";
1041
1042 def_a "Euler.*"; # "Yewler" TURNS IN HIS GRAVE
1043
1044 Note that calls to the "def_..." subroutines from within a program will
1045 take precedence over the contents of the home directory .inflectrc
1046 file, which in turn takes precedence over the system-wide .inflectrc
1047 file.
1048
1050 On loading, if the Perl code in a .inflectrc file is invalid
1051 (syntactically or otherwise), an appropriate fatal error is issued. A
1052 common problem is not ending the file with something that evaluates to
1053 true (as the five "def_..." subroutines do).
1054
1055 Using the five "def_..." subroutines directly in a program may also
1056 result in fatal diagnostics, if a (singular) pattern or an interpolated
1057 (plural) string is somehow invalid.
1058
1059 Specific diagnostics related to user-defined inflections are:
1060
1061 "Bad user-defined singular pattern:\n\t %s"
1062 The singular form of a user-defined noun or verb (as defined by
1063 a call to "def_noun", "def_verb", "def_adj", "def_a" or
1064 "def_an") is not a valid Perl regular expression. The actual
1065 Perl error message is also given.
1066
1067 "Bad user-defined plural string: '%s'"
1068 The plural form(s) of a user-defined noun or verb (as defined
1069 by a call to "def_noun", "def_verb" or "def_adj") is not a
1070 valid Perl interpolated string (usually because it interpolates
1071 some undefined variable).
1072
1073 "Bad .inflectrc file (%s):\n %s"
1074 Some other problem occurred in loading the named local or
1075 global .inflectrc file. The Perl error message (including the
1076 line number) is also given.
1077
1078 There are no diagnosable run-time error conditions for the actual
1079 inflection subroutines, except "NUMWORDS" and hence no run-time
1080 diagnostics. If the inflection subroutines are unable to form a plural
1081 via a user-definition or an inbuilt rule, they just "guess" the
1082 commonest English inflection: adding "-s" for nouns, removing "-s" for
1083 verbs, and no inflection for adjectives.
1084
1085 "Lingua::EN::Inflect::NUMWORDS()" can "die" with the following
1086 messages:
1087
1088 "Bad grouping option: %s"
1089 The optional argument to "NUMWORDS()" wasn't 1, 2 or 3.
1090
1091 "Number out of range"
1092 "NUMWORDS()" was passed a number larger than the number
1093 represented by 3006 consecutive nines. The words representing
1094 that number are 63,681 characters long, including commas and
1095 spaces. If you're interested in the actual value, see
1096 t/numwords.t.
1097
1098 The reference for the names is
1099 http://en.wikipedia.org/wiki/Names_of_large_numbers
1100
1101 There are no names for any higher numbers.
1102
1104 2nd Person precedence
1105 If a verb has identical 1st and 2nd person singular forms, but
1106 different 1st and 2nd person plural forms, then when its plural is
1107 constructed, the 2nd person plural form is always preferred.
1108
1109 The author is not currently aware of any such verbs in English, but is
1110 not quite arrogant enough to assume ipso facto that none exist.
1111
1112 Nominative precedence
1113 The singular pronoun "it" presents a special problem because its plural
1114 form can vary, depending on its "case". For example:
1115
1116 It ate my homework -> They ate my homework
1117 It ate it -> They ate them
1118 I fed my homework to it -> I fed my homework to them
1119
1120 As a consequence of this ambiguity, "PL()" or "PL_N" have been
1121 implemented so that they always return the nominative plural (that is,
1122 "they").
1123
1124 However, when asked for the plural of an unambiguously accusative "it"
1125 (namely, "PL("to it")", "PL_N("from it")", "PL("with it")", etc.), both
1126 subroutines will correctly return the accusative plural ("to them",
1127 "from them", "with them", etc.)
1128
1129 The plurality of zero
1130 The rules governing the choice between:
1131
1132 There were no errors.
1133
1134 and
1135
1136 There was no error.
1137
1138 are complex and often depend more on intent rather than content. Hence
1139 it is infeasible to specify such rules algorithmically.
1140
1141 Therefore, Lingua::EN::Text contents itself with the following
1142 compromise: If the governing number is zero, inflections always return
1143 the plural form unless the appropriate "classical" inflection is in
1144 effect, in which case the singular form is always returned.
1145
1146 Thus, the sequence:
1147
1148 NUM(0);
1149 print inflect "There PL(was) NO(choice)";
1150
1151 produces "There were no choices", whereas:
1152
1153 classical 'zero'; # or: classical(zero=>1);
1154 NUM(0);
1155 print inflect "There PL(was) NO(choice)";
1156
1157 it will print "There was no choice".
1158
1159 Homographs with heterogeneous plurals
1160 Another context in which intent (and not content) sometimes determines
1161 plurality is where two distinct meanings of a word require different
1162 plurals. For example:
1163
1164 Three basses were stolen from the band's equipment trailer.
1165 Three bass were stolen from the band's aquarium.
1166
1167 I put the mice next to the cheese.
1168 I put the mouses next to the computers.
1169
1170 Several thoughts about leaving crossed my mind.
1171 Several thought about leaving across my lawn.
1172
1173 Lingua::EN::Inflect handles such words in two ways:
1174
1175 • If both meanings of the word are the same part of speech (for
1176 example, "bass" is a noun in both sentences above), then one
1177 meaning is chosen as the "usual" meaning, and only that
1178 meaning's plural is ever returned by any of the inflection
1179 subroutines.
1180
1181 • If each meaning of the word is a different part of speech (for
1182 example, "thought" is both a noun and a verb), then the noun's
1183 plural is returned by "PL()" and "PL_N()" and the verb's plural
1184 is returned only by "PL_V()".
1185
1186 Such contexts are, fortunately, uncommon (particularly "same-part-of-
1187 speech" examples). An informal study of nearly 600 "difficult plurals"
1188 indicates that "PL()" can be relied upon to "get it right" about 98% of
1189 the time (although, of course, ichthyophilic guitarists or cyber-
1190 behaviouralists may experience higher rates of confusion).
1191
1192 If the choice of a particular "usual inflection" is considered
1193 inappropriate, it can always be reversed with a preliminary call to the
1194 corresponding "def_..." subroutine.
1195
1197 I'm not taking any further correspondence on:
1198
1199 "octopi".
1200 Despite the populist pandering of certain New World dictionaries,
1201 the plural is "octopuses" or (for the pedantic classicist)
1202 "octopodes". The suffix "-pus" is Greek, not Latin, so the plural
1203 is "-podes", not "pi".
1204
1205 "virus".
1206 Had no plural in Latin (possibly because it was a mass noun). The
1207 only plural is the Anglicized "viruses".
1208
1210 Damian Conway (damian@conway.org)
1211
1213 The endless inconsistencies of English.
1214
1215 (Please report words for which the correct plural or indefinite article
1216 is not formed, so that the reliability of Lingua::EN::Inflect can be
1217 improved.)
1218
1220 Copyright (c) 1997-2009, Damian Conway. All Rights Reserved.
1221 This module is free software. It may be used, redistributed
1222 and/or modified under the same terms as Perl itself.
1223
1224
1225
1226perl v5.32.1 2021-01-27 Lingua::EN::Inflect(3)