/open-dm-dq/standardizer/other/scripts/generateClueTableFR.awk
AWK | 164 lines | 147 code | 17 blank | 0 comment | 0 complexity | 6dbf16982e3cc4458ae221b73c1aaed3 MD5 | raw file
- BEGIN {
- inputType[""] = "UNSPECIFIED"
- inputType["1P"] = "UNKNOWN_1P"
- inputType["A1"] = "ALPHA_ONE"
- inputType["A2"] = "ALPHA_TWO"
- inputType["A3"] = "UNKNOWN_A3"
- inputType["AM"] = "AMPERSAND"
- inputType["AN"] = "ALPHA_NUM"
- inputType["AU"] = "GENERIC_WORD"
- inputType["B*"] = "UNKNOWN_B_STAR"
- inputType["B+"] = "UNKNOWN_B_PLUS"
- inputType["BI"] = "UNKNOWN_BI"
- inputType["BN"] = "UNKNOWN_BN"
- inputType["BP"] = "BUILDING_PROPERTY"
- inputType["BS"] = "UNKNOWN_BS"
- inputType["BT"] = "UNKNOWN_BT"
- inputType["BU"] = "BUILDING_UNIT"
- inputType["BX"] = "POST_OFFICE_BOX"
- inputType["CN"] = "UNKNOWN_CN"
- inputType["D1"] = "DIGIT"
- inputType["DA"] = "LEADING_DASH"
- inputType["DB"] = "UNKNOWN_DB"
- inputType["DM"] = "UNKNOWN_DM"
- inputType["DR"] = "STREET_DIRECTION"
- inputType["EI"] = "EXTRA_INFORMATION"
- inputType["EN"] = "UNKNOWN_EN"
- inputType["EX"] = "EXTENSION"
- inputType["FC"] = "NUMERIC_FRACTION"
- inputType["H*"] = "UNKNOWN_H_STAR"
- inputType["H+"] = "UNKNOWN_H_PLUS"
- inputType["HN"] = "UNKNOWN_HN"
- inputType["HR"] = "HIGHWAY_ROUTE"
- inputType["HS"] = "UNKNOWN_HS"
- inputType["MP"] = "MILE_POST"
- inputType["N*"] = "UNKNOWN_N_STAR"
- inputType["N+"] = "UNKNOWN_N_PLUS"
- inputType["NA"] = "UNKNOWN_NA"
- inputType["NB"] = "UNKNOWN_NB"
- inputType["NL"] = "COMMON_WORD"
- inputType["NU"] = "NUMERIC_VALUE"
- inputType["OT"] = "ORDINAL_TYPE"
- inputType["P*"] = "UNKNOWN_P_STAR"
- inputType["P+"] = "UNKNOWN_P_PLUS"
- inputType["PD"] = "UNKNOWN_PD"
- inputType["PT"] = "PREFIX_TYPE"
- inputType["R*"] = "UNKNOWN_R_STAR"
- inputType["R+"] = "UNKNOWN_R_PLUS"
- inputType["RR"] = "RURAL_ROUTE"
- inputType["SA"] = "STATE_ABBREVIATION"
- inputType["SD"] = "UNKNOWN_SD"
- inputType["ST"] = "UNKNOWN_ST"
- inputType["T*"] = "UNKNOWN_T_STAR"
- inputType["T+"] = "UNKNOWN_T_PLUS"
- inputType["TB"] = "UNKNOWN_TB"
- inputType["TY"] = "STREET_TYPE"
- inputType["W*"] = "UNKNOWN_W_STAR"
- inputType["W+"] = "UNKNOWN_W_PLUS"
- inputType["WD"] = "STRUCTURE_DESCRIPTOR"
- inputType["WI"] = "STRUCTURE_IDENTIFIER"
- inputType["XN"] = "UNKNOWN_XN"
-
- print "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>"
- print "<clues>"
- }
- {
- line = sprintf("%-99s", $0)
-
- name = substr(line, 1, 34)
- gsub(/\&/, "&", name)
- gsub(/^ */, "", name)
- gsub(/ *$/, "", name)
-
- translation = substr(line, 35, 13)
- gsub(/\&/, "&", translation)
- gsub(/^ */, "", translation)
- gsub(/ *$/, "", translation)
-
- translationExpanded = "false"
- if (substr(line, 98, 1) == "*")
- translationExpanded = "true"
-
- clueWordId1 = substr(line, 51, 4)
- gsub(/\&/, "&", clueWordId1)
- gsub(/^ */, "", clueWordId1)
- gsub(/ *$/, "", clueWordId1)
-
- clueType1 = substr(line, 55, 2)
- gsub(/\&/, "&", clueType1)
-
- clueWordId2 = substr(line, 57, 4)
- gsub(/\&/, "&", clueWordId2)
- gsub(/^ */, "", clueWordId2)
- gsub(/ *$/, "", clueWordId2)
-
- clueType2 = substr(line, 61, 2)
- gsub(/\&/, "&", clueType2)
-
- clueWordId3 = substr(line, 63, 4)
- gsub(/\&/, "&", clueWordId3)
- gsub(/^ */, "", clueWordId3)
- gsub(/ *$/, "", clueWordId3)
-
- clueType3 = substr(line, 67, 2)
- gsub(/\&/, "&", clueType3)
-
- clueWordId4 = substr(line, 69, 4)
- gsub(/\&/, "&", clueWordId4)
- gsub(/^ */, "", clueWordId4)
- gsub(/ *$/, "", clueWordId4)
-
- clueType4 = substr(line, 73, 2)
- gsub(/\&/, "&", clueType4)
-
- clueWordId5 = substr(line, 75, 4)
- gsub(/\&/, "&", clueWordId5)
- gsub(/^ */, "", clueWordId5)
- gsub(/ *$/, "", clueWordId5)
-
- clueType5 = substr(line, 79, 2)
- gsub(/\&/, "&", clueType5)
-
- print " <clue>"
- print " <name>" name "</name>"
- print " <translation>" translation "</translation>"
- print " <translationExpanded>" translationExpanded "</translationExpanded>"
-
- print " <words>"
- if (clueWordId1 != "") {
- print " <word>"
- print " <id>" clueWordId1 "</id>"
- print " <type>" inputType[clueType1] "</type>"
- print " </word>"
- }
- if (clueWordId2 != "") {
- print " <word>"
- print " <id>" clueWordId2 "</id>"
- print " <type>" inputType[clueType2] "</type>"
- print " </word>"
- }
- if (clueWordId3 != "") {
- print " <word>"
- print " <id>" clueWordId3 "</id>"
- print " <type>" inputType[clueType3] "</type>"
- print " </word>"
- }
- if (clueWordId4 != "") {
- print " <word>"
- print " <id>" clueWordId4 "</id>"
- print " <type>" inputType[clueType4] "</type>"
- print " </word>"
- }
- if (clueWordId5 != "") {
- print " <word>"
- print " <id>" clueWordId5 "</id>"
- print " <type>" inputType[clueType5] "</type>"
- print " </word>"
- }
- print " </words>"
-
- print " </clue>"
- }
- END {
- print "</clues>"
- }