PageRenderTime 43ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/src/node_modules/nlp_compromise/src/term/noun/organization/is_organization.js

https://gitlab.com/skmexyz/SortableCodingChallenge
JavaScript | 77 lines | 58 code | 7 blank | 12 comment | 12 complexity | b856c03675878deb34b44dab03ab1799 MD5 | raw file
  1. 'use strict';
  2. const abbreviations = require('../../../data/abbreviations');
  3. const org_data = require('../../../data/organizations');
  4. //some boring capitalised acronyms you see frequently
  5. const blacklist = {
  6. url: true,
  7. http: true,
  8. wtf: true,
  9. irl: true,
  10. ie: true,
  11. eg: true,
  12. gps: true,
  13. dna: true,
  14. sms: true, //these should maybe be somewhere else
  15. };
  16. //words like 'co' and ltd
  17. let org_suffix = abbreviations.orgs.reduce(function(h, s) {
  18. h[s] = true;
  19. return h;
  20. }, {});
  21. org_data.suffixes.forEach(function(s) { //a few more
  22. org_suffix[s] = true;
  23. });
  24. //named orgs like google and nestle
  25. let org_names = org_data.organizations.reduce(function(h, s) {
  26. h[s] = true;
  27. return h;
  28. }, {});
  29. const is_organization = function(str, text) {
  30. text = text || '';
  31. //blacklist some boring ones
  32. if (blacklist[str]) {
  33. return false;
  34. }
  35. //some known organizations, like microsoft
  36. if (org_names[str]) {
  37. return true;
  38. }
  39. //no period acronyms
  40. if (text.length <= 5 && text.match(/^[A-Z][A-Z]+$/) !== null) {
  41. return true;
  42. }
  43. //period acronyms
  44. if (text.length >= 4 && text.match(/^([A-Z]\.)*$/) !== null) {
  45. return true;
  46. }
  47. // eg 'Smith & Co'
  48. if (str.match(/ & /)) {
  49. return true;
  50. }
  51. // Girlscouts of Canada
  52. if (str.match(/..s of /)) {
  53. return true;
  54. }
  55. // eg pets.com
  56. if (str.match(/[a-z]{3}\.(com|net|org|biz)/)) { //not a perfect url regex, but a "org.com"
  57. return true;
  58. }
  59. // "foobar inc."
  60. let words = str.split(' ');
  61. if (words.length > 1) {
  62. let last = words[words.length - 1];
  63. if (org_suffix[last]) {
  64. return true;
  65. }
  66. }
  67. return false;
  68. };
  69. module.exports = is_organization;
  70. // console.log(is_organization('Captain of Jamaica'));