mosaic /open-dm-dq/standardizer/other/scripts/generateClueTableFR.awk

Language awk Lines 165
MD5 Hash 6dbf16982e3cc4458ae221b73c1aaed3 Estimated Cost $3,608 (why?)
Repository https://bitbucket.org/pymma/mosaic.git View Raw File View Project SPDX
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
BEGIN {
    inputType[""] = "UNSPECIFIED"
    inputType["1P"] = "UNKNOWN_1P"
    inputType["A1"] = "ALPHA_ONE"
    inputType["A2"] = "ALPHA_TWO"
    inputType["A3"] = "UNKNOWN_A3"
    inputType["AM"] = "AMPERSAND"
    inputType["AN"] = "ALPHA_NUM"
    inputType["AU"] = "GENERIC_WORD"
    inputType["B*"] = "UNKNOWN_B_STAR"
    inputType["B+"] = "UNKNOWN_B_PLUS"
    inputType["BI"] = "UNKNOWN_BI"
    inputType["BN"] = "UNKNOWN_BN"
    inputType["BP"] = "BUILDING_PROPERTY"
    inputType["BS"] = "UNKNOWN_BS"
    inputType["BT"] = "UNKNOWN_BT"
    inputType["BU"] = "BUILDING_UNIT"
    inputType["BX"] = "POST_OFFICE_BOX"
    inputType["CN"] = "UNKNOWN_CN"
    inputType["D1"] = "DIGIT"
    inputType["DA"] = "LEADING_DASH"
    inputType["DB"] = "UNKNOWN_DB"
    inputType["DM"] = "UNKNOWN_DM"
    inputType["DR"] = "STREET_DIRECTION"
    inputType["EI"] = "EXTRA_INFORMATION"
    inputType["EN"] = "UNKNOWN_EN"
    inputType["EX"] = "EXTENSION"
    inputType["FC"] = "NUMERIC_FRACTION"
    inputType["H*"] = "UNKNOWN_H_STAR"
    inputType["H+"] = "UNKNOWN_H_PLUS"
    inputType["HN"] = "UNKNOWN_HN"
    inputType["HR"] = "HIGHWAY_ROUTE"
    inputType["HS"] = "UNKNOWN_HS"
    inputType["MP"] = "MILE_POST"
    inputType["N*"] = "UNKNOWN_N_STAR"
    inputType["N+"] = "UNKNOWN_N_PLUS"
    inputType["NA"] = "UNKNOWN_NA"
    inputType["NB"] = "UNKNOWN_NB"
    inputType["NL"] = "COMMON_WORD"
    inputType["NU"] = "NUMERIC_VALUE"
    inputType["OT"] = "ORDINAL_TYPE"
    inputType["P*"] = "UNKNOWN_P_STAR"
    inputType["P+"] = "UNKNOWN_P_PLUS"
    inputType["PD"] = "UNKNOWN_PD"
    inputType["PT"] = "PREFIX_TYPE"
    inputType["R*"] = "UNKNOWN_R_STAR"
    inputType["R+"] = "UNKNOWN_R_PLUS"
    inputType["RR"] = "RURAL_ROUTE"
    inputType["SA"] = "STATE_ABBREVIATION"
    inputType["SD"] = "UNKNOWN_SD"
    inputType["ST"] = "UNKNOWN_ST"
    inputType["T*"] = "UNKNOWN_T_STAR"
    inputType["T+"] = "UNKNOWN_T_PLUS"
    inputType["TB"] = "UNKNOWN_TB"
    inputType["TY"] = "STREET_TYPE"
    inputType["W*"] = "UNKNOWN_W_STAR"
    inputType["W+"] = "UNKNOWN_W_PLUS"
    inputType["WD"] = "STRUCTURE_DESCRIPTOR"
    inputType["WI"] = "STRUCTURE_IDENTIFIER"
    inputType["XN"] = "UNKNOWN_XN"

    print "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>"
    print "<clues>"
}
{
    line = sprintf("%-99s", $0)

    name = substr(line, 1, 34)
    gsub(/\&/, "&amp;", name)
    gsub(/^ */, "", name)
    gsub(/ *$/, "", name)

    translation = substr(line, 35, 13)
    gsub(/\&/, "&amp;", translation)
    gsub(/^ */, "", translation)
    gsub(/ *$/, "", translation)

    translationExpanded = "false"
    if (substr(line, 98, 1) == "*")
	translationExpanded = "true"

    clueWordId1 = substr(line, 51, 4)
    gsub(/\&/, "&amp;", clueWordId1)
    gsub(/^ */, "", clueWordId1)
    gsub(/ *$/, "", clueWordId1)

    clueType1 = substr(line, 55, 2)
    gsub(/\&/, "&amp;", clueType1)

    clueWordId2 = substr(line, 57, 4)
    gsub(/\&/, "&amp;", clueWordId2)
    gsub(/^ */, "", clueWordId2)
    gsub(/ *$/, "", clueWordId2)

    clueType2 = substr(line, 61, 2)
    gsub(/\&/, "&amp;", clueType2)

    clueWordId3 = substr(line, 63, 4)
    gsub(/\&/, "&amp;", clueWordId3)
    gsub(/^ */, "", clueWordId3)
    gsub(/ *$/, "", clueWordId3)

    clueType3 = substr(line, 67, 2)
    gsub(/\&/, "&amp;", clueType3)

    clueWordId4 = substr(line, 69, 4)
    gsub(/\&/, "&amp;", clueWordId4)
    gsub(/^ */, "", clueWordId4)
    gsub(/ *$/, "", clueWordId4)

    clueType4 = substr(line, 73, 2)
    gsub(/\&/, "&amp;", clueType4)

    clueWordId5 = substr(line, 75, 4)
    gsub(/\&/, "&amp;", clueWordId5)
    gsub(/^ */, "", clueWordId5)
    gsub(/ *$/, "", clueWordId5)

    clueType5 = substr(line, 79, 2)
    gsub(/\&/, "&amp;", clueType5)

    print "    <clue>"
    print "        <name>"  name "</name>"
    print "        <translation>"  translation "</translation>"
    print "        <translationExpanded>"  translationExpanded "</translationExpanded>"

    print "        <words>"
    if (clueWordId1 != "") {
        print "            <word>"
        print "                <id>" clueWordId1 "</id>"
        print "                <type>" inputType[clueType1] "</type>"
        print "            </word>"
    }
    if (clueWordId2 != "") {
        print "            <word>"
        print "                <id>" clueWordId2 "</id>"
        print "                <type>" inputType[clueType2] "</type>"
        print "            </word>"
    }
    if (clueWordId3 != "") {
        print "            <word>"
        print "                <id>" clueWordId3 "</id>"
        print "                <type>" inputType[clueType3] "</type>"
        print "            </word>"
    }
    if (clueWordId4 != "") {
        print "            <word>"
        print "                <id>" clueWordId4 "</id>"
        print "                <type>" inputType[clueType4] "</type>"
        print "            </word>"
    }
    if (clueWordId5 != "") {
        print "            <word>"
        print "                <id>" clueWordId5 "</id>"
        print "                <type>" inputType[clueType5] "</type>"
        print "            </word>"
    }
    print "            </words>"

    print "    </clue>"
}
END {
    print "</clues>"
}
Back to Top