/lib/DDG/Goodie/MolarMass.pm

http://github.com/duckduckgo/zeroclickinfo-goodies · Perl · 235 lines · 163 code · 35 blank · 37 comment · 37 complexity · 8bdfb2570b3aa9aec92915b637d1083e MD5 · raw file

  1. package DDG::Goodie::MolarMass;
  2. # ABSTRACT: Calculates the molar mass of a chemical compound from its formula
  3. use DDG::Goodie;
  4. use strict;
  5. use warnings;
  6. use YAML::XS 'LoadFile';
  7. use Math::Round 'nearest';
  8. use Text::Trim;
  9. zci answer_type => 'molar_mass';
  10. zci is_cached => 1;
  11. my %masses = %{ LoadFile(share('elements.yml')) };
  12. my %compounds = %{ LoadFile(share('compounds.yml')) };
  13. triggers any => 'molar mass';
  14. # Handle statement
  15. handle remainder => sub {
  16. my $remainder = $_;
  17. $remainder =~ s/(what is|whats|what\'s|the|of|for|\?)//g;
  18. $remainder = trim $remainder;
  19. return unless $remainder;
  20. # Check if input is in list of common compounds
  21. if (exists $compounds{lc($remainder)}) {
  22. return build_answer_with_compound(@compounds{lc($remainder)});
  23. }
  24. # If not, proceed with molar mass calculation.
  25. my $mass = molar_mass($remainder);
  26. return if $mass == -1;
  27. return "The molar mass of $remainder is $mass g/mol.",
  28. structured_answer => {
  29. data => {
  30. title => "$mass g/mol",
  31. subtitle => "$remainder"
  32. },
  33. templates => {
  34. group => 'text'
  35. }
  36. };
  37. };
  38. sub build_answer_with_compound {
  39. my %compound = %{$_[0]};
  40. return "The molar mass of $compound{name} ($compound{formula}) is $compound{weight} g/mol.",
  41. structured_answer => {
  42. data => {
  43. title => "$compound{weight} g/mol",
  44. subtitle => "$compound{name}, $compound{formula}"
  45. },
  46. templates => {
  47. group => 'text'
  48. }
  49. };
  50. }
  51. # returns true if input only comprised of numbers
  52. sub is_int {
  53. my ($val) = @_;
  54. return ($val =~ m/^\d+$/);
  55. }
  56. # returns true if input only comprised of letters
  57. sub is_compound {
  58. my ($cmp) = @_;
  59. return ($cmp =~ /^([a-z]+)$/i);
  60. }
  61. # sanatize verifies that the input is suitable for processing.
  62. # Sanatization Strategy:
  63. # - Check that formula is only comprised of alphanumerics and parentheses.
  64. # - Check number of right parens never exceeds number of left parens
  65. # - Check each number preceded by a letter, right paren, or another number.
  66. # - Check each lowercase char preceded by a letter.
  67. # Returns -1 if any of these checks fail.
  68. sub sanatize {
  69. my ($string) = @_;
  70. if (!($string =~ /^([a-z]|[0-9]|[\(]|[\)])+$/i)) {
  71. return -1;
  72. }
  73. my $paren_count = 0;
  74. for my $c (split //, $string) {
  75. if ($c eq "(") {
  76. $paren_count += 1;
  77. }
  78. elsif ($c eq ")") {
  79. $paren_count -= 1;
  80. }
  81. if ($paren_count < 0) {
  82. return -1;
  83. }
  84. }
  85. my $prev = "NULL";
  86. for my $c2 (split //, $string) {
  87. if ($c2 =~ /[a-z]/
  88. && (!(is_compound($prev)) || ($prev eq "NULL"))) {
  89. return -1;
  90. }
  91. elsif (is_int($c2)
  92. && !((is_compound($prev) && !($prev eq "NULL")) || $prev eq ")" || is_int($prev))) {
  93. return -1;
  94. }
  95. $prev = $c2;
  96. }
  97. return 0;
  98. }
  99. # verify_compounds verifies that every compound in the array is in the
  100. # table of masses, returns -1 otherwise.
  101. sub verify_compounds {
  102. my @arr = @{$_[0]};
  103. my $arr_len = scalar(@arr);
  104. for my $i (0..$arr_len - 1) {
  105. if (ref($arr[$i]) eq 'ARRAY') {
  106. return -1 if (verify_compounds($arr[$i]) == -1);
  107. }
  108. elsif (is_compound($arr[$i])) {
  109. return -1 if !(exists $masses{$arr[$i]});
  110. }
  111. }
  112. return 0;
  113. }
  114. # parse turns a string such as "Al2(SO4)3" into a nested array that looks
  115. # like ["Al",2,["S","O",4],3].
  116. sub parse {
  117. my ($string) = @_;
  118. my @stack = [];
  119. my @a = [];
  120. push @stack, @a;
  121. for my $c (split //, $string) {
  122. if ($c eq '(') {
  123. my @arr = [];
  124. push @stack, @arr;
  125. }
  126. elsif ($c eq ')') {
  127. my $temp = pop @stack;
  128. push @{$stack[-1]}, $temp;
  129. }
  130. elsif (is_int($c)) {
  131. if (is_int($stack[-1][-1])) {
  132. # join integer digits together if
  133. # $c is a digit of a larger integer
  134. $stack[-1][-1] = $stack[-1][-1] * 10 + $c;
  135. }
  136. else {
  137. push @{$stack[-1]}, $c;
  138. }
  139. }
  140. elsif ($c =~ /[a-z]/) {
  141. # join lowercase letters to the last character before it
  142. # will not fail as long as input is sanitized.
  143. $stack[-1][-1] = $stack[-1][-1] . $c;
  144. }
  145. else {
  146. # this should be reached by capitalized characters
  147. push @{$stack[-1]}, $c;
  148. }
  149. }
  150. return $stack[-1];
  151. }
  152. # calc_mass calculates the molar mass of a nested array produced by parse.
  153. sub calc_mass {
  154. my @arr = @{$_[0]};
  155. my $arr_len = scalar(@arr);
  156. my $mass = 0;
  157. for my $i (0..$arr_len - 1) {
  158. # Pseudocode:
  159. # First, check if $i is the last index of the array, because the rest
  160. # of the algorithm depends on being able to check the i+1 th element.
  161. # 3 cases for the ith element:
  162. # 1. it is a standalone element represented by a string
  163. # 2. it is a multi-element molecule that is represented by an array
  164. # 3. it is an integer, but we will handle integers in cases 1 and 2
  165. # so we can ignore $i if it is an integer.
  166. # For cases 1 and 2, we need to check if the i+1th element is an
  167. # integer, if it is, we multiply by the i+1th integer
  168. if ($i == $arr_len - 1) {
  169. # Special handler for last index.
  170. $mass = $mass + calc_mass($arr[$i]) if ref($arr[$i]) eq 'ARRAY';
  171. $mass = $mass + $masses{$arr[$i]} if exists $masses{$arr[$i]}
  172. }
  173. elsif (ref($arr[$i]) eq 'ARRAY' && is_int($arr[$i+1])) {
  174. $mass += calc_mass($arr[$i]) * $arr[$i+1];
  175. }
  176. elsif (ref($arr[$i]) eq 'ARRAY') {
  177. $mass += calc_mass($arr[$i]);
  178. }
  179. elsif (is_compound($arr[$i]) && is_int($arr[$i+1])) {
  180. $mass += $masses{$arr[$i]}*$arr[$i+1] if exists $masses{$arr[$i]};
  181. }
  182. elsif (exists $masses{$arr[$i]}) {
  183. $mass += $masses{$arr[$i]};
  184. } # Other cases are ignored.
  185. }
  186. return $mass;
  187. }
  188. # returns the molar mass of the string passed to it
  189. # returns -1 if some mass is not found, or if there is invalid input
  190. sub molar_mass {
  191. # Note: sanatize and verify_compounds return -1 if given invalid input.
  192. my ($str) = @_;
  193. my $sanatize_result = sanatize($str);
  194. return -1 if ($sanatize_result == -1);
  195. my @temp_arr = parse($str);
  196. my $verified_result = verify_compounds(@temp_arr);
  197. return -1 if ($verified_result == -1);
  198. return nearest(0.0001, calc_mass(@temp_arr));
  199. }
  200. 1;