PageRenderTime 2ms CodeModel.GetById 40ms app.highlight 11ms RepoModel.GetById 19ms app.codeStats 0ms

/open-dm-dq/standardizer/other/scripts/generateClueTableFR.awk

https://bitbucket.org/openesb/mdm-legacy
AWK | 164 lines | 147 code | 17 blank | 0 comment | 0 complexity | 6dbf16982e3cc4458ae221b73c1aaed3 MD5 | raw file
  1BEGIN {
  2    inputType[""] = "UNSPECIFIED"
  3    inputType["1P"] = "UNKNOWN_1P"
  4    inputType["A1"] = "ALPHA_ONE"
  5    inputType["A2"] = "ALPHA_TWO"
  6    inputType["A3"] = "UNKNOWN_A3"
  7    inputType["AM"] = "AMPERSAND"
  8    inputType["AN"] = "ALPHA_NUM"
  9    inputType["AU"] = "GENERIC_WORD"
 10    inputType["B*"] = "UNKNOWN_B_STAR"
 11    inputType["B+"] = "UNKNOWN_B_PLUS"
 12    inputType["BI"] = "UNKNOWN_BI"
 13    inputType["BN"] = "UNKNOWN_BN"
 14    inputType["BP"] = "BUILDING_PROPERTY"
 15    inputType["BS"] = "UNKNOWN_BS"
 16    inputType["BT"] = "UNKNOWN_BT"
 17    inputType["BU"] = "BUILDING_UNIT"
 18    inputType["BX"] = "POST_OFFICE_BOX"
 19    inputType["CN"] = "UNKNOWN_CN"
 20    inputType["D1"] = "DIGIT"
 21    inputType["DA"] = "LEADING_DASH"
 22    inputType["DB"] = "UNKNOWN_DB"
 23    inputType["DM"] = "UNKNOWN_DM"
 24    inputType["DR"] = "STREET_DIRECTION"
 25    inputType["EI"] = "EXTRA_INFORMATION"
 26    inputType["EN"] = "UNKNOWN_EN"
 27    inputType["EX"] = "EXTENSION"
 28    inputType["FC"] = "NUMERIC_FRACTION"
 29    inputType["H*"] = "UNKNOWN_H_STAR"
 30    inputType["H+"] = "UNKNOWN_H_PLUS"
 31    inputType["HN"] = "UNKNOWN_HN"
 32    inputType["HR"] = "HIGHWAY_ROUTE"
 33    inputType["HS"] = "UNKNOWN_HS"
 34    inputType["MP"] = "MILE_POST"
 35    inputType["N*"] = "UNKNOWN_N_STAR"
 36    inputType["N+"] = "UNKNOWN_N_PLUS"
 37    inputType["NA"] = "UNKNOWN_NA"
 38    inputType["NB"] = "UNKNOWN_NB"
 39    inputType["NL"] = "COMMON_WORD"
 40    inputType["NU"] = "NUMERIC_VALUE"
 41    inputType["OT"] = "ORDINAL_TYPE"
 42    inputType["P*"] = "UNKNOWN_P_STAR"
 43    inputType["P+"] = "UNKNOWN_P_PLUS"
 44    inputType["PD"] = "UNKNOWN_PD"
 45    inputType["PT"] = "PREFIX_TYPE"
 46    inputType["R*"] = "UNKNOWN_R_STAR"
 47    inputType["R+"] = "UNKNOWN_R_PLUS"
 48    inputType["RR"] = "RURAL_ROUTE"
 49    inputType["SA"] = "STATE_ABBREVIATION"
 50    inputType["SD"] = "UNKNOWN_SD"
 51    inputType["ST"] = "UNKNOWN_ST"
 52    inputType["T*"] = "UNKNOWN_T_STAR"
 53    inputType["T+"] = "UNKNOWN_T_PLUS"
 54    inputType["TB"] = "UNKNOWN_TB"
 55    inputType["TY"] = "STREET_TYPE"
 56    inputType["W*"] = "UNKNOWN_W_STAR"
 57    inputType["W+"] = "UNKNOWN_W_PLUS"
 58    inputType["WD"] = "STRUCTURE_DESCRIPTOR"
 59    inputType["WI"] = "STRUCTURE_IDENTIFIER"
 60    inputType["XN"] = "UNKNOWN_XN"
 61
 62    print "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>"
 63    print "<clues>"
 64}
 65{
 66    line = sprintf("%-99s", $0)
 67
 68    name = substr(line, 1, 34)
 69    gsub(/\&/, "&amp;", name)
 70    gsub(/^ */, "", name)
 71    gsub(/ *$/, "", name)
 72
 73    translation = substr(line, 35, 13)
 74    gsub(/\&/, "&amp;", translation)
 75    gsub(/^ */, "", translation)
 76    gsub(/ *$/, "", translation)
 77
 78    translationExpanded = "false"
 79    if (substr(line, 98, 1) == "*")
 80	translationExpanded = "true"
 81
 82    clueWordId1 = substr(line, 51, 4)
 83    gsub(/\&/, "&amp;", clueWordId1)
 84    gsub(/^ */, "", clueWordId1)
 85    gsub(/ *$/, "", clueWordId1)
 86
 87    clueType1 = substr(line, 55, 2)
 88    gsub(/\&/, "&amp;", clueType1)
 89
 90    clueWordId2 = substr(line, 57, 4)
 91    gsub(/\&/, "&amp;", clueWordId2)
 92    gsub(/^ */, "", clueWordId2)
 93    gsub(/ *$/, "", clueWordId2)
 94
 95    clueType2 = substr(line, 61, 2)
 96    gsub(/\&/, "&amp;", clueType2)
 97
 98    clueWordId3 = substr(line, 63, 4)
 99    gsub(/\&/, "&amp;", clueWordId3)
100    gsub(/^ */, "", clueWordId3)
101    gsub(/ *$/, "", clueWordId3)
102
103    clueType3 = substr(line, 67, 2)
104    gsub(/\&/, "&amp;", clueType3)
105
106    clueWordId4 = substr(line, 69, 4)
107    gsub(/\&/, "&amp;", clueWordId4)
108    gsub(/^ */, "", clueWordId4)
109    gsub(/ *$/, "", clueWordId4)
110
111    clueType4 = substr(line, 73, 2)
112    gsub(/\&/, "&amp;", clueType4)
113
114    clueWordId5 = substr(line, 75, 4)
115    gsub(/\&/, "&amp;", clueWordId5)
116    gsub(/^ */, "", clueWordId5)
117    gsub(/ *$/, "", clueWordId5)
118
119    clueType5 = substr(line, 79, 2)
120    gsub(/\&/, "&amp;", clueType5)
121
122    print "    <clue>"
123    print "        <name>"  name "</name>"
124    print "        <translation>"  translation "</translation>"
125    print "        <translationExpanded>"  translationExpanded "</translationExpanded>"
126
127    print "        <words>"
128    if (clueWordId1 != "") {
129        print "            <word>"
130        print "                <id>" clueWordId1 "</id>"
131        print "                <type>" inputType[clueType1] "</type>"
132        print "            </word>"
133    }
134    if (clueWordId2 != "") {
135        print "            <word>"
136        print "                <id>" clueWordId2 "</id>"
137        print "                <type>" inputType[clueType2] "</type>"
138        print "            </word>"
139    }
140    if (clueWordId3 != "") {
141        print "            <word>"
142        print "                <id>" clueWordId3 "</id>"
143        print "                <type>" inputType[clueType3] "</type>"
144        print "            </word>"
145    }
146    if (clueWordId4 != "") {
147        print "            <word>"
148        print "                <id>" clueWordId4 "</id>"
149        print "                <type>" inputType[clueType4] "</type>"
150        print "            </word>"
151    }
152    if (clueWordId5 != "") {
153        print "            <word>"
154        print "                <id>" clueWordId5 "</id>"
155        print "                <type>" inputType[clueType5] "</type>"
156        print "            </word>"
157    }
158    print "            </words>"
159
160    print "    </clue>"
161}
162END {
163    print "</clues>"
164}