/src/go/src/sp/xpath/functions.go

http://github.com/speedata/publisher · Go · 95 lines · 78 code · 8 blank · 9 comment · 18 complexity · 5394a5cc159fb494e32c2fde9f2bd71e MD5 · raw file

  1. package xpath
  2. import (
  3. "bytes"
  4. "encoding/xml"
  5. "fmt"
  6. "io"
  7. "regexp"
  8. "strings"
  9. )
  10. // replace() xpath function
  11. func Replace(text []byte, rexpr string, repl []byte) []byte {
  12. r, err := regexp.Compile(rexpr)
  13. if err != nil {
  14. return nil
  15. }
  16. // xpath uses $12 for $12 or $1, depending on the existence of $12 or $1.
  17. // go on the other hand uses $12 for $12 and never for $1, so you have to write
  18. // $1 as ${1} if there is text after the $1.
  19. // We escape the $n backwards to prevent expansion of $12 to ${1}2
  20. for i := r.NumSubexp(); i > 0; i-- {
  21. // first create rexepx that match "$i"
  22. x := fmt.Sprintf(`\$(%d)`, i)
  23. nummatcher := regexp.MustCompile(x)
  24. repl = nummatcher.ReplaceAll(repl, []byte(fmt.Sprintf(`$${%d}`, i)))
  25. }
  26. str := r.ReplaceAll(text, repl)
  27. return str
  28. }
  29. // contains() xpath function
  30. func Contains(text []byte, substring []byte) string {
  31. if bytes.Contains(text, substring) {
  32. return "true"
  33. } else {
  34. return "false"
  35. }
  36. }
  37. // tokenize() xpath function
  38. func Tokenize(text []byte, rexpr string) []string {
  39. r, err := regexp.Compile(rexpr)
  40. if err != nil {
  41. return nil
  42. }
  43. idx := r.FindAllIndex(text, -1)
  44. pos := 0
  45. var res []string
  46. for _, v := range idx {
  47. res = append(res, string(text[pos:v[0]]))
  48. pos = v[1]
  49. }
  50. res = append(res, string(text[pos:]))
  51. return res
  52. }
  53. func HtmlToXml(input string) (string, error) {
  54. input = "<toplevel路toplevel>" + input + "</toplevel路toplevel>"
  55. r := strings.NewReader(input)
  56. var w bytes.Buffer
  57. enc := xml.NewEncoder(&w)
  58. dec := xml.NewDecoder(r)
  59. dec.Strict = false
  60. dec.AutoClose = xml.HTMLAutoClose
  61. for {
  62. t, err := dec.Token()
  63. if err == io.EOF {
  64. break
  65. }
  66. if err != nil {
  67. enc.Flush()
  68. return w.String(), err
  69. }
  70. switch v := t.(type) {
  71. case xml.StartElement:
  72. if v.Name.Local != "toplevel路toplevel" {
  73. enc.EncodeToken(t)
  74. }
  75. case xml.EndElement:
  76. if v.Name.Local != "toplevel路toplevel" {
  77. enc.EncodeToken(t)
  78. }
  79. case xml.CharData:
  80. enc.EncodeToken(t)
  81. default:
  82. // fmt.Println(v)
  83. }
  84. }
  85. enc.Flush()
  86. return w.String(), nil
  87. }