/models/language/wsj/prep.awk

https://bitbucket.org/noelnv/csc575 · AWK · 9 lines · 8 code · 1 blank · 0 comment · 1 complexity · 8cede8dff5c18124db9a6175941f776c MD5 · raw file

  1. {
  2. gsub(/<s[0-9\.\-]+>/, "<s>");
  3. gsub(/<p[0-9\.\-]+>/, "");
  4. gsub(/<\/p>/, "");
  5. if (length($0) > 0) {
  6. print;
  7. }
  8. }