PageRenderTime 48ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/i18npool/source/localedata/data/currency-check.awk

https://bitbucket.org/markjenkins/libreoffice_ubuntu-debian-fixes
AWK | 335 lines | 276 code | 13 blank | 46 comment | 0 complexity | 2819b3a0fe64d58e1cf86fe3bf51e988 MD5 | raw file
Possible License(s): GPL-3.0, LGPL-3.0, MPL-2.0-no-copyleft-exception, LGPL-2.1, BSD-3-Clause-No-Nuclear-License-2014
  1. #!/usr/bin/gawk -f
  2. #
  3. # This file is part of the LibreOffice project.
  4. #
  5. # This Source Code Form is subject to the terms of the Mozilla Public
  6. # License, v. 2.0. If a copy of the MPL was not distributed with this
  7. # file, You can obtain one at http://mozilla.org/MPL/2.0/.
  8. #
  9. # This file incorporates work covered by the following license notice:
  10. #
  11. # Licensed to the Apache Software Foundation (ASF) under one or more
  12. # contributor license agreements. See the NOTICE file distributed
  13. # with this work for additional information regarding copyright
  14. # ownership. The ASF licenses this file to you under the Apache
  15. # License, Version 2.0 (the "License"); you may not use this file
  16. # except in compliance with the License. You may obtain a copy of
  17. # the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18. #
  19. # Usage: gawk -f currency-check.awk *.xml
  20. # Check any
  21. # <FormatCode>...[$xxx-...]...</FormatCode>
  22. # against every
  23. # <CurrencySymbol>xxx</CurrencySymbol>
  24. # definition of the same XML file and output symbols if no match was found.
  25. # For formatindex="12" to formatindex="15" and for formatindex="17" it is
  26. # checked if the used currency symbol is the usedInCompatibleFormatCodes
  27. # currency symbol as it is needed by the number formatter.
  28. # Also generates output if the generic currency symbol (UTF8 string 0xC2A4)
  29. # is used instead of a real currency symbol.
  30. # Author: Eike Rathke <er@openoffice.org>
  31. BEGIN {
  32. file = ""
  33. }
  34. file != FILENAME {
  35. if ( file )
  36. checkIt()
  37. file = FILENAME
  38. line = 0
  39. nFormats = 0
  40. nCurrencies = 0
  41. bFormatAuto = 0
  42. sReplaceFrom = ""
  43. sReplaceTo = ""
  44. sMatchReplace = ""
  45. sRefCurrencyFromLocale = ""
  46. crlf = 0
  47. }
  48. {
  49. ++line
  50. # If run under Unix a CrLf spoils ...$ line end checks. DOS line endings
  51. # are boo anyways.
  52. if ( /\x0D$/ )
  53. {
  54. print "Error: not Unix line ending in line " line
  55. crlf = 1
  56. exit(1)
  57. }
  58. if ( $1 ~ /^<LC_FORMAT(>|$)/ )
  59. {
  60. if ( $0 ~ /replaceFrom="\[CURRENCY\]"/ )
  61. {
  62. sReplaceFrom = "\\[CURRENCY\\]"
  63. sMatchReplace = "^<FormatCode>.*" sReplaceFrom
  64. }
  65. for ( j=2; j<=NF; ++j )
  66. {
  67. if ( $j ~ /^replaceTo="/ )
  68. {
  69. l = 12
  70. if ( $j ~ />$/ )
  71. ++l
  72. if ( $j ~ /\/>$/ )
  73. ++l
  74. sReplaceTo = substr( $j, 12, length($j)-l )
  75. }
  76. }
  77. }
  78. else if ( $1 ~ /^<FormatElement(>|$)/ )
  79. {
  80. if ( $0 ~ /usage="CURRENCY"/ )
  81. {
  82. if ( $0 ~ /formatindex="1[23457]"/ )
  83. bFormatAuto = 1
  84. else
  85. bFormatAuto = 0
  86. }
  87. }
  88. else if ( $0 ~ /^[[:blank:]]*<FormatCode>.*\[\$.*-[0-9a-fA-F]+\]/ ||
  89. (sMatchReplace && $0 ~ sMatchReplace ) )
  90. {
  91. if ( sReplaceFrom )
  92. gsub( sReplaceFrom, sReplaceTo )
  93. split( $0, arr, /<|>/ )
  94. split( arr[3], code, /(\[\$)|(-[0-9a-fA-F]+\])/ )
  95. for ( j in code )
  96. {
  97. if ( code[j] && code[j] !~ /\#|0|\[NatNum/ )
  98. {
  99. FormatLine[nFormats] = file " line " line
  100. FormatAuto[nFormats] = bFormatAuto
  101. Formats[nFormats++] = code[j]
  102. }
  103. }
  104. bFormatAuto = 0
  105. }
  106. else if ( $1 ~ /^<LC_CURRENCY(>|$)/ )
  107. {
  108. for ( j=2; j<=NF; ++j )
  109. {
  110. if ( $j ~ /^ref="/ )
  111. {
  112. l = 6
  113. if ( $j ~ />$/ )
  114. ++l
  115. if ( $j ~ /\/>$/ )
  116. ++l
  117. locale = substr( $j, 6, length($j)-l )
  118. sRefCurrencyFromLocale = file
  119. oldfile = file
  120. oldline = line
  121. file = locale ".xml"
  122. line = 0
  123. while ( (getline <file) > 0 )
  124. {
  125. ++line
  126. getCurrencyParams()
  127. }
  128. close( file )
  129. if ( !line )
  130. print "ref locale not available: " file \
  131. " (from " oldfile " line " oldline ")"
  132. file = oldfile
  133. line = oldline
  134. sRefCurrencyFromLocale = ""
  135. }
  136. }
  137. }
  138. else
  139. getCurrencyParams()
  140. }
  141. END {
  142. if ( file && !crlf )
  143. checkIt()
  144. }
  145. function getCurrencyParams() {
  146. # Assumes that each element is on a line on its own!
  147. if ( $1 ~ /^<Currency(>|$)/ )
  148. {
  149. if ( $0 ~ /default="true"/ )
  150. SymbolDefault[nCurrencies] = 1
  151. else
  152. SymbolDefault[nCurrencies] = 0
  153. if ( $0 ~ /usedInCompatibleFormatCodes="true"/ )
  154. SymbolCompati[nCurrencies] = 1
  155. else
  156. SymbolCompati[nCurrencies] = 0
  157. }
  158. else if ( $0 ~ /^[[:blank:]]*<CurrencyID>/ )
  159. {
  160. split( $0, arr, /<|>/ )
  161. if ( sRefCurrencyFromLocale )
  162. IDLine[nCurrencies] = file " line " line \
  163. " (referenced from " sRefCurrencyFromLocale ")"
  164. else
  165. IDLine[nCurrencies] = file " line " line
  166. IDs[nCurrencies] = arr[3]
  167. }
  168. else if ( $0 ~ /^[[:blank:]]*<CurrencySymbol>/ )
  169. {
  170. split( $0, arr, /<|>/ )
  171. if ( sRefCurrencyFromLocale )
  172. SymbolLine[nCurrencies] = file " line " line \
  173. " (referenced from " sRefCurrencyFromLocale ")"
  174. else
  175. SymbolLine[nCurrencies] = file " line " line
  176. Symbols[nCurrencies] = arr[3]
  177. }
  178. else if ( $0 ~ /^[[:blank:]]*<BankSymbol>/ )
  179. {
  180. split( $0, arr, /<|>/ )
  181. if ( sRefCurrencyFromLocale )
  182. BankSymbolLine[nCurrencies] = file " line " line \
  183. " (referenced from " sRefCurrencyFromLocale ")"
  184. else
  185. BankSymbolLine[nCurrencies] = file " line " line
  186. BankSymbols[nCurrencies] = arr[3]
  187. }
  188. else if ( $1 ~ /^<\/Currency>/ )
  189. {
  190. ++nCurrencies
  191. }
  192. }
  193. function checkIt() {
  194. bad = 0
  195. for ( j=0; j<nFormats; ++j )
  196. {
  197. state = FormatInSymbol( Formats[j] )
  198. if ( Formats[j] == "\xc2\xa4" )
  199. {
  200. bad = 1
  201. print " bad: `" Formats[j] "' (" FormatLine[j] ")"
  202. }
  203. else if ( state == 0 )
  204. {
  205. bad = 1
  206. print "unknown: `" Formats[j] "' (" FormatLine[j] ")"
  207. }
  208. else if ( FormatAuto[j] && state < 2 )
  209. {
  210. bad = 1
  211. print "badauto: `" Formats[j] "' (" FormatLine[j] ")"
  212. }
  213. }
  214. if ( bad )
  215. {
  216. for ( j=0; j<nCurrencies; ++j )
  217. {
  218. bDef = 0
  219. if ( Symbols[j] == "\xc2\xa4" )
  220. print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")"
  221. if ( SymbolDefault[j] )
  222. {
  223. bDef = 1
  224. print "default: `" Symbols[j] "' (" SymbolLine[j] ")"
  225. }
  226. if ( SymbolCompati[j] )
  227. {
  228. bDef = 1
  229. print "compati: `" Symbols[j] "' (" SymbolLine[j] ")"
  230. }
  231. if ( !bDef )
  232. print "defined: `" Symbols[j] "' (" SymbolLine[j] ")"
  233. }
  234. }
  235. else
  236. {
  237. bHasDefault = 0
  238. bHasCompati = 0
  239. for ( j=0; j<nCurrencies; ++j )
  240. {
  241. if ( Symbols[j] == "\xc2\xa4" )
  242. {
  243. bad = 1
  244. print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")"
  245. }
  246. if ( SymbolDefault[j] )
  247. {
  248. if ( !bHasDefault )
  249. bHasDefault = 1
  250. else
  251. {
  252. bad = 1
  253. print "dupe default: `" Symbols[j] "' (" SymbolLine[j] ")"
  254. }
  255. }
  256. if ( SymbolCompati[j] )
  257. {
  258. if ( !bHasCompati )
  259. bHasCompati = 1
  260. else
  261. {
  262. bad = 1
  263. print "dupe compati: `" Symbols[j] "' (" SymbolLine[j] ")"
  264. }
  265. }
  266. }
  267. if ( !bHasDefault )
  268. {
  269. bad = 1
  270. print " no default: (" file ")"
  271. }
  272. if ( !bHasCompati )
  273. {
  274. bad = 1
  275. print " no compati: (" file ")"
  276. }
  277. }
  278. for ( j=0; j<nCurrencies; ++j )
  279. {
  280. # Check if CurrencyID at least resembles some ISO 4217 code.
  281. # The only exception is zh_MO that had an erroneous original data set
  282. # with BankSymbol="P" (stored as ISO code in documents, hence copied to
  283. # CurrencyID now) and needs that entry for legacy documents.
  284. # There is a strange bug in gawk 3.1.4 that does a match of [A-Z] on
  285. # lower case except 'a', regardless of IGNORECASE setting, hence this
  286. # ugly notation. [[:upper:]] wouldn't be correct since we want only
  287. # ASCII to match.
  288. if ( IDs[j] !~ /^[ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ]$/ \
  289. && !(file == "zh_MO.xml" && IDs[j] == "P") )
  290. {
  291. bad = 1
  292. print "no ISO 4217 code: `" IDs[j] "' (" IDLine[j] ")"
  293. }
  294. # CurrencyID should equal BankSymbol for now.
  295. if ( IDs[j] != BankSymbols[j] )
  296. {
  297. bad = 1
  298. print "not equal: CurrencyID `" IDs[j] "' != BankSymbol `" BankSymbols[j] \
  299. "' (" IDLine[j] " and " BankSymbolLine[j] ")"
  300. }
  301. }
  302. if ( bad )
  303. print ""
  304. }
  305. function FormatInSymbol( format ) {
  306. state = 0
  307. for ( nSym=0; nSym<nCurrencies; ++nSym )
  308. {
  309. if ( format == Symbols[nSym] )
  310. {
  311. # Two currencies can have the same symbol (e.g. az_AZ.xml 'man.'
  312. # for AZM and AZN), continue to lookup if the match isn't the
  313. # compatible one.
  314. if ( SymbolCompati[nSym] )
  315. return 2
  316. else
  317. state = 1
  318. }
  319. }
  320. return state
  321. }
  322. # vim: ts=4 sw=4 expandtab