PageRenderTime 48ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/web/libraries/php-iban/utils/convert-registry.php

https://gitlab.com/Paulmicha/drupacompta
PHP | 319 lines | 249 code | 11 blank | 59 comment | 39 complexity | d24ddd92b3702dc147f3a11682e3e050 MD5 | raw file
  1. <?php
  2. # this script converts the IBAN_registry.txt file's entries to registry.txt format (php-iban's required internal format).
  3. # init
  4. require_once(dirname(dirname(__FILE__)) . '/php-iban.php');
  5. date_default_timezone_set('UTC'); # mutes a warning
  6. # read registry
  7. $data = `iconv -f utf8 -t ascii --byte-subst="<0x%x>" --unicode-subst="<U+%04X>" 'IBAN_Registry.txt'`;
  8. if($data == '') { die("Couldn't read IBAN_Registry.txt - try downloading from the location described in the REGISTRY-URL file."); }
  9. # print header line
  10. print "country_code|country_name|domestic_example|bban_example|bban_format_swift|bban_format_regex|bban_length|iban_example|iban_format_swift|iban_format_regex|iban_length|bban_bankid_start_offset|bban_bankid_stop_offset|bban_branchid_start_offset|bban_branchid_stop_offset|registry_edition|country_sepa\n";
  11. # break in to lines
  12. $lines = preg_split('/[\r\n]+/',$data);
  13. # display
  14. foreach($lines as $line) {
  15. # if it's not a blank line, and it's not the header row
  16. if($line != '' && !preg_match('/SEPA Country/',$line)) {
  17. # extract individual tab-separated fields
  18. $bits = explode("\t",$line);
  19. # remove quotes and superfluous whitespace on fields that have them.
  20. for($i=0;$i<count($bits);$i++) {
  21. $bits[$i] = preg_replace('/^"(.*)"$/','$1',$bits[$i]);
  22. $bits[$i] = preg_replace('/^ */','',$bits[$i]);
  23. $bits[$i] = preg_replace('/ *$/','',$bits[$i]);
  24. }
  25. # assigned fields to named variables
  26. # print "-------\n";
  27. # print $line;
  28. # print "-------\n";
  29. list($country_name,$country_code,$domestic_example,$bban,$bban_structure,$bban_length,$bban_bi_position,$bban_bi_length,$bban_bi_example,$bban_example,$iban,$iban_structure,$iban_length,$iban_electronic_example,$iban_print_example,$country_sepa,$contact_details) = $bits;
  30. # sanitise
  31. $country_code = strtoupper(substr($country_code,0,2)); # sanitise comments away
  32. $bban_structure = preg_replace('/[:;]/','',$bban_structure); # errors seen in Germany, Hungary entries
  33. $iban_structure = preg_replace('/, .*$/','',$iban_structure); # duplicates for FO, GL seen in DK
  34. $iban_electronic_example = preg_replace('/, .*$/','',$iban_electronic_example); # duplicates for FO, GL seen in DK
  35. if($country_code=='MU') {
  36. $iban_electronic_example = str_replace(' ','',$iban_electronic_example); # MU example has a spurious space
  37. }
  38. if($country_code=='CZ') {
  39. $iban_electronic_example = preg_replace('/ \w{10,}+$/','',$iban_electronic_example); # extra example for CZ
  40. $iban_print_example = preg_replace('/^(CZ.. .... .... .... .... ....).*$/','$1',$iban_print_example); # extra example
  41. }
  42. if($country_code=='FI') {
  43. # remove additional example
  44. $iban_electronic_example = preg_replace('/ or .*$/','',$iban_electronic_example);
  45. # fix bban example to remove verbosity and match domestic example
  46. $bban = '12345600000785';
  47. }
  48. if($country_code=='KZ') {
  49. # fix presence of multiline free-text in KZ IBAN structure field
  50. $iban_structure = '2!a2!n3!n13!c';
  51. }
  52. if($country_code=='QA') {
  53. # fix the lack BBAN structure provision in the TXT format registry
  54. $bban_structure = '4!a4!n17!c';
  55. # fix broken IBAN structure provision
  56. $iban_structure = 'QA2!n4!a4!n17!c';
  57. }
  58. if($country_code=='JO') {
  59. $bban_bi_length=4; # not '4!a' as suggested
  60. }
  61. $iban_print_example = preg_replace('/, .*$/','',$iban_print_example); # DK includes FO and GL examples in one record
  62. # drop leading 2!a in iban structure.
  63. # .. should actually be the country code in question
  64. if(substr($iban_structure,0,3) == '2!a') {
  65. $iban_structure = $country_code . substr($iban_structure,3);
  66. }
  67. # calculate $bban_regex from $bban_structure
  68. $bban_regex = swift_to_regex($bban_structure);
  69. # calculate $iban_regex from $iban_structure
  70. $iban_regex = swift_to_regex($iban_structure);
  71. print "[DEBUG] got $iban_regex from $iban_structure\n";
  72. # debugging
  73. if(true) {
  74. print "[$country_name ($country_code)]\n";
  75. print "Domestic account number example: $domestic_example\n";
  76. print "BBAN structure: $bban_structure\n";
  77. print "BBAN length: $bban_length\n";
  78. print "BBAN bank identifier position: $bban_bi_position\n";
  79. print "BBAN bank identifier length: $bban_bi_length\n";
  80. print "BBAN bank identifier example: $bban_bi_example\n";
  81. print "BBAN example: $bban_example\n";
  82. print "BBAN regex (calculated): $bban_regex\n";
  83. print "IBAN structure: $iban_structure\n";
  84. print "IBAN length: $iban_length\n";
  85. print "IBAN electronic format example: $iban_electronic_example\n";
  86. print "IBAN print format example: $iban_print_example\n";
  87. print "IBAN Regex (calculated): $iban_regex\n";
  88. print "SEPA country: $country_sepa\n";
  89. print "Contact details: $contact_details\n\n";
  90. }
  91. # calculate numeric $bban_length
  92. $bban_length = preg_replace('/[^\d]/','',$bban_length);
  93. # calculate numeric $iban_length
  94. $iban_length = preg_replace('/[^\d]/','',$iban_length);
  95. # calculate bban_bankid_<start|stop>_offset
  96. # .... First we have to parse the freetext $bban_bi_position, eg:
  97. # Bank Identifier 1-3, Branch Identifier
  98. # Position 1-2
  99. # Positions 1-2
  100. # Positions 1-3
  101. # Positions 1-3 ;Branch is not available
  102. # Positions 1-3, Branch identifier
  103. # Positions 1-3, Branch identifier positions
  104. # Positions 1-4
  105. # Positions 1-4, Branch identifier
  106. # Positions 1-4, Branch identifier positions
  107. # Positions 1-5
  108. # Positions 1-5 (positions 1-2 bank identifier; positions 3-5 branch identifier). In case of payment institutions Positions 1-5, Branch identifier positions
  109. # Positions 1-6, Branch identifier positions
  110. # Positions 1-6. First two digits of bank identifier indicate the bank or banking group (For example, 1 or 2 for Nordea, 31 for Handelsbanken, 5 for cooperative banks etc)
  111. # Positions 1-7
  112. # Positions 1-8
  113. # Positions 2-6, Branch identifier positions
  114. # positions 1-3, Branch identifier positions
  115. #
  116. # ... our algorithm is as follows:
  117. # - find all <digit>-<digit> tokens
  118. preg_match_all('/(\d)-(\d\d?)/',$bban_bi_position,$matches);
  119. # - discard overlaps ({1-5,1-2,3-5} becomes {1-2,3-5})
  120. $tmptokens = array();
  121. for($j=0;$j<count($matches[0]);$j++) {
  122. #print "tmptokens was... " . print_r($tmptokens,1) . "\n";
  123. $from = $matches[1][$j];
  124. $to = $matches[2][$j];
  125. # (if we don't yet have a match starting here, or it goes further,
  126. # overwrite the match-from-this-position record)
  127. if(!isset($tmptokens[$from]) || $to < $tmptokens[$from]) {
  128. $tmptokens[$from] = $to;
  129. }
  130. }
  131. unset($matches); # done
  132. # - assume the token starting from position 1 is the bank identifier
  133. # (or, if it does not exist, the token starting from position 2)
  134. $bban_bankid_start_offset = 0; # decrement 1 on assignment
  135. if(isset($tmptokens[1])) {
  136. $bban_bankid_stop_offset = $tmptokens[1]-1; # decrement 1 on assignment
  137. unset($tmptokens[1]);
  138. }
  139. else {
  140. $bban_bankid_stop_offset = $tmptokens[2]-1; # decrement 1 on assignment
  141. unset($tmptokens[2]);
  142. }
  143. # - assume any subsequent token, if present, is the branch identifier.
  144. $tmpkeys = array_keys($tmptokens);
  145. $start = array_shift($tmpkeys);
  146. unset($tmpkeys); # done
  147. $bban_branchid_start_offset='';
  148. $bban_branchid_stop_offset='';
  149. if($start!= '') {
  150. # we have a branch identifier!
  151. $bban_branchid_start_offset=$start-1;
  152. $bban_branchid_stop_offset=$tmptokens[$start]-1;
  153. }
  154. else {
  155. # (note: this codepath occurs for around two thirds of all records)
  156. # we have not yet found a branch identifier. HOWEVER, we can analyse the
  157. # structure of the BBAN to determine whether there is more than one
  158. # remaining non-tiny field (tiny fields on the end of a BBAN typically
  159. # being checksums) and, if so, assume that the first/shorter one is the
  160. # branch identifier.
  161. $reduced_bban_structure = preg_replace('/^\d+![nac]/','',$bban_structure);
  162. # print "[DEBUG] reduced BBAN structure = $reduced_bban_structure\n";
  163. $tokens = swift_tokenize($reduced_bban_structure,1);
  164. # print "[DEBUG] tokens = " + json_encode($tokens,1);
  165. # discard any tokens of length 1 or 2
  166. for($t=0;$t<count($tokens[0]);$t++) {
  167. if($tokens[1][$t] < 3) {
  168. $tokens['discarded'][$t] = 1;
  169. }
  170. }
  171. # interesting fields are those that are not discarded...
  172. if(!isset($tokens['discarded'])) {
  173. $interesting_field_count = count($tokens[0]); }
  174. else {
  175. $interesting_field_count = (count($tokens[0])-count($tokens['discarded']));
  176. }
  177. # print "[DEBUG] interesting field count = $interesting_field_count\n";
  178. # ...if we have at least two of them, there's a branchid-type field
  179. if($interesting_field_count >= 2) {
  180. # now loop through until we assign the branchid start offset
  181. # (this occurs just after the first non-discarded field)
  182. $found=0;
  183. for($f=0; (($found==0) && ($f<count($tokens[0]))); $f++) {
  184. # if this is a non-discarded token, of >2 length...
  185. if((!isset($tokens['discarded'][$f]) || $tokens['discarded'][$f] != 1) && $tokens[1][$f]>2) {
  186. # ... then assign.
  187. $pre_offset = $bban_bankid_stop_offset+1; # this is the offset before we reduced the structure to remove the bankid field
  188. $bban_branchid_start_offset = $pre_offset + $tokens['offset'][$f];
  189. $bban_branchid_stop_offset = $pre_offset + $tokens['offset'][$f] + $tokens[1][$f] - 1; # decrement by one on assignment
  190. $found=1;
  191. }
  192. }
  193. }
  194. }
  195. # fix for Jordan
  196. if($country_code == 'JO') {
  197. $bban_bankid_start_offset = 0;
  198. $bban_bankid_stop_offset = 3;
  199. $bban_branchid_start_offset = 4;
  200. $bban_branchid_stop_offset = 7;
  201. }
  202. # calculate 1=Yes, 0=No for $country_sepa
  203. # NOTE: This is buggy due to the free inclusion of random text by the registry publishers.
  204. # Notably it requires modification for places like Finland and Portugal where these
  205. # comments are known to exist.
  206. if(strtolower($country_sepa)=='yes') { $country_sepa=1; } else { $country_sepa = 0; }
  207. # set registry edition
  208. $registry_edition = date('Y-m-d');
  209. # now prepare generate our registry lines...
  210. $to_generate = array($country_code=>$country_name);
  211. if($country_code == 'DK') {
  212. $to_generate = array('DK'=>$country_name,'FO'=>'Faroe Islands','GL'=>'Greenland');
  213. }
  214. elseif($country_code == 'FR') {
  215. $to_generate = array('FR'=>$country_name,'BL'=>'Saint Barthelemy','GF'=>'French Guyana','GP'=>'Guadelope','MF'=>'Saint Martin (French Part)','MQ'=>'Martinique','RE'=>'Reunion','PF'=>'French Polynesia','TF'=>'French Southern Territories','YT'=>'Mayotte','NC'=>'New Caledonia','PM'=>'Saint Pierre et Miquelon','WF'=>'Wallis and Futuna Islands');
  216. }
  217. # output loop
  218. foreach($to_generate as $country_code=>$country_name) {
  219. # fixes for fields duplicating country code
  220. #print "CHECKSUM-BEFORE[$country_code] = $iban_electronic_example\n";
  221. $iban_electronic_example = iban_set_checksum($country_code . substr($iban_electronic_example,2));
  222. #print "CHECKSUM-AFTER[$country_code] = $iban_electronic_example\n";
  223. $iban_structure = $country_code . substr($iban_structure,2);
  224. # step 1
  225. $iban_regex_fixed = '^' . $country_code;
  226. $tmp_country_code = substr($iban_regex,1,2);
  227. #print "[DEBUG] $tmp_country_code\n";
  228. # route #1 ... here we are dealing with a country code in the string already
  229. if(preg_match('/^[A-Z][A-Z]$/',$tmp_country_code)) {
  230. #print "[DEBUG] route #1\n";
  231. $iban_regex_fixed = $iban_regex_fixed . substr($iban_regex,3);
  232. }
  233. # route #2 ... here there is no country code yet present
  234. else {
  235. #print "[DEBUG] route #2\n";
  236. $iban_regex_fixed = $iban_regex_fixed . substr($iban_regex,1);
  237. }
  238. #print "[DEBUG] substited '$iban_regex_fixed' for '$iban_regex'\n";
  239. # output
  240. print "$country_code|$country_name|$domestic_example|$bban_example|$bban_structure|$bban_regex|$bban_length|$iban_electronic_example|$iban_structure|$iban_regex_fixed|$iban_length|$bban_bankid_start_offset|$bban_bankid_stop_offset|$bban_branchid_start_offset|$bban_branchid_stop_offset|$registry_edition|$country_sepa\n";
  241. }
  242. }
  243. }
  244. # swift_to_regex()
  245. # converts the SWIFT IBAN format specifications to regular expressions
  246. # eg: 4!n6!n1!n -> ^(\d{4})(\d{6})(\d{1})$
  247. function swift_to_regex($swift) {
  248. # first find tokens
  249. $matches = swift_tokenize($swift);
  250. # now replace bits
  251. $tr = '^' . $swift . '$';
  252. # loop through each matched token
  253. for($i=0;$i<count($matches[0]);$i++) {
  254. # calculate replacement
  255. $replacement = '(TOKEN)';
  256. # type 'n'
  257. if($matches[3][$i] == 'n') {
  258. $replacement = '(\d{length})';
  259. }
  260. # type 'c'
  261. elseif($matches[3][$i] == 'c') {
  262. $replacement = '([A-Za-z0-9]{length})';
  263. }
  264. # type 'a'
  265. elseif($matches[3][$i] == 'a') {
  266. $replacement = '([A-Z]{length})';
  267. #' . $matches[1][$i] . '})';
  268. }
  269. else {
  270. print "unknown type: $matches[3][$i]\n";
  271. exit(1);
  272. }
  273. # now add length indicator to the token
  274. $length = '(LENGTH)';
  275. if($matches[2][$i] == '!') {
  276. $length = $matches[1][$i];
  277. }
  278. else {
  279. $length = '1,' . $matches[1][$i];
  280. }
  281. $replacement = preg_replace('/length/',$length,$replacement,1);
  282. # finally, replace the entire token with the replacement
  283. $tr = preg_replace('/' . $matches[0][$i] . '/',$replacement,$tr,1);
  284. }
  285. return $tr;
  286. }
  287. # swift_tokenize()
  288. # fetch individual tokens in a swift structural string
  289. function swift_tokenize($string,$calculate_offsets=0) {
  290. preg_match_all('/((?:\d*?[1-2])?\d)(!)?([anc])/',$string,$matches);
  291. if($calculate_offsets) {
  292. $current_offset=0;
  293. for($i=0;$i<count($matches[0]);$i++) {
  294. $matches['offset'][$i] = $current_offset;
  295. $current_offset+=$matches[1][$i];
  296. }
  297. #print "ANALYSE[raw]: " . join(',',$matches['offset']);
  298. }
  299. return $matches;
  300. }
  301. ?>