PageRenderTime 46ms CodeModel.GetById 17ms app.highlight 23ms RepoModel.GetById 1ms app.codeStats 0ms

/scalate-tool/src/main/scala/org/fusesource/scalate/tool/commands/ToScaml.scala

http://github.com/scalate/scalate
Scala | 335 lines | 248 code | 53 blank | 34 comment | 30 complexity | 2e1dbecd4915308a8a4b2f1806ac42ab MD5 | raw file
  1/**
  2 * Copyright (C) 2009-2011 the original author or authors.
  3 * See the notice.md file distributed with this work for additional
  4 * information regarding copyright ownership.
  5 *
  6 * Licensed under the Apache License, Version 2.0 (the "License");
  7 * you may not use this file except in compliance with the License.
  8 * You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18package org.fusesource.scalate.tool.commands
 19
 20import org.apache.felix.gogo.commands.{Action, Option => option, Argument => argument, Command => command}
 21import scala.xml._
 22import java.io._
 23import java.net.URL
 24import org.fusesource.scalate.util.IOUtil
 25import org.fusesource.scalate.InvalidSyntaxException
 26import util.parsing.input.CharSequenceReader
 27import org.fusesource.scalate.support.{Text=>SSPText, ScalaParseSupport}
 28import org.fusesource.scalate.ssp._
 29import org.w3c.tidy.{TidyMessage, TidyMessageListener, Tidy}
 30import org.apache.felix.service.command.CommandSession
 31
 32/* an even simpler ssp parser */
 33class SspParser extends ScalaParseSupport {
 34  var skipWhitespaceOn = false
 35
 36  override def skipWhitespace = skipWhitespaceOn
 37
 38  def skip_whitespace[T](p: => Parser[T]): Parser[T] = Parser[T] {
 39    in =>
 40      skipWhitespaceOn = true
 41      val result = p(in)
 42      skipWhitespaceOn = false
 43      result
 44  }
 45
 46  val anySpace = text("""[ \t]*""".r)
 47  val identifier = text("""[a-zA-Z0-9\$_]+""".r)
 48  val typeName = text(scalaType)
 49  val someText = text(""".+""".r)
 50
 51  val attribute = skip_whitespace(opt(text("import")) ~ text("var" | "val") ~ identifier ~ (":" ~> typeName)) ~ ("""\s*""".r ~> opt("""=\s*""".r ~> upto("""\s*%>""".r))) ^^ {
 52    case (p_import ~ p_kind ~ p_name ~ p_type) ~ p_default => ScriptletFragment(p_kind+" "+p_name+":"+p_type+" //attribute")
 53  }
 54
 55  val literalPart: Parser[SSPText] =
 56  upto("<%" | """\<%""" | """\\<%""" | "${" | """\${""" | """\\${""" | """\#""" | """\\#""" | directives) ~
 57          opt(
 58            """\<%""" ~ opt(literalPart) ^^ {case x ~ y => "<%" + y.getOrElse("")} |
 59                    """\${""" ~ opt(literalPart) ^^ {case x ~ y => "${" + y.getOrElse("")} |
 60                    """\#""" ~ opt(literalPart) ^^ {case x ~ y => "#" + y.getOrElse("")} |
 61                    """\\""" ^^ {s => """\"""}
 62            ) ^^ {
 63    case x ~ Some(y) => x + y
 64    case x ~ None => x
 65  }
 66
 67  val tagEnding = "+%>" | """%>[ \t]*\r?\n""".r | "%>"
 68  val commentFragment = wrapped("<%--", "--%>") ^^ {CommentFragment(_)}
 69  val altCommentFragment = wrapped("<%#", "%>") ^^ {CommentFragment(_)}
 70  val dollarExpressionFragment = wrapped("${", "}") ^^ {ExpressionFragment(_)}
 71  val expressionFragment = wrapped("<%=", "%>") ^^ {ExpressionFragment(_)}
 72  val attributeFragement = prefixed("<%@", attribute <~ anySpace ~ tagEnding)
 73  val scriptletFragment = wrapped("<%", tagEnding) ^^ {ScriptletFragment(_)}
 74  val textFragment = literalPart ^^ {TextFragment(_)}
 75
 76
 77  def directives = ("#" ~> identifier ~ anySpace ~ opt("(" ~> scalaExpression <~ ")")) ^^ {
 78    case a ~ b ~ c => ScriptletFragment(a+c.map("("+_+")").getOrElse(""))
 79  } | "#(" ~> identifier <~ ")" ^^ {ScriptletFragment(_)}
 80
 81  def scalaExpression: Parser[SSPText] = {
 82    text(
 83      (rep(nonParenText) ~ opt("(" ~> scalaExpression <~ ")") ~ rep(nonParenText)) ^^ {
 84        case a ~ b ~ c =>
 85          val mid = b match {
 86            case Some(tb) => "(" + tb + ")"
 87            case tb => ""
 88          }
 89          a.mkString("") + mid + c.mkString("")
 90      })
 91  }
 92
 93  val nonParenText = characterLiteral | stringLiteral | """[^\(\)\'\"]+""".r
 94
 95  val pageFragment: Parser[PageFragment] = directives | commentFragment | altCommentFragment | dollarExpressionFragment |
 96          attributeFragement | expressionFragment | scriptletFragment |
 97          textFragment
 98
 99  val pageFragments = rep(pageFragment)
100
101  private def phraseOrFail[T](p: Parser[T], in: String): T = {
102    var x = phrase(p)(new CharSequenceReader(in))
103    x match {
104      case Success(result, _) => result
105      case NoSuccess(message, next) => throw new InvalidSyntaxException(message, next.pos);
106    }
107  }
108
109  def getPageFragments(in: String): List[PageFragment] = {
110    phraseOrFail(pageFragments, in)
111  }
112
113}
114
115/**
116 * <p>
117 * </p>
118 *
119 * @author <a href="http://hiramchirino.com">Hiram Chirino</a>
120 */
121@command(scope = "scalate", name = "toscaml", description = "Converts an XML or HTML file to Scaml")
122class ToScaml extends Action {
123
124  @option(name = "--tidy", description = "Should html be tidied first?")
125  var tidy = true
126
127  @argument(index = 0, name = "from", description = "The input file or http URL. If ommited, input is read from the console")
128  var from: String = _
129
130  @argument(index = 1, name = "to", description = "The output file. If ommited, output is written to the console")
131  var to: File = _
132
133  var out:IndentPrintStream = _
134
135  def execute(session: CommandSession): AnyRef = {
136
137    def doit:Unit = {
138
139      var in = if( from==null ) {
140        session.getKeyboard
141      } else {
142        if( from.startsWith("http://") || from.startsWith("https://") ) {
143          new URL(from).openStream
144        } else {
145          new FileInputStream(from)
146        }
147      }
148
149      var data = IOUtil.loadBytes(in)
150      //println("original: "+new String(data, "UTF-8"))
151
152      // Parse out the code bits and wrap them in script tags so that
153      // we can tidy the document.
154      val fragments = (new SspParser).getPageFragments(new String(data, "UTF-8"))
155      data = ("<div>" + (fragments.map(_ match {
156        case ExpressionFragment(code) => "#{" + code.value + "}"
157        case ScriptletFragment(code) => """<scriptlet><![CDATA[""" + code.value + """]]></scriptlet>"""
158        case CommentFragment(comment) => """<!--""" + comment.value + """-->"""
159        case TextFragment(text) => text.value
160        case unexpected: PageFragment => 
161          System.err.println("Unexpected page fragment " + unexpected)
162          "" // skip it
163      }).mkString("")) + "</div>").getBytes("UTF-8")
164      // println("escaped: "+new String(data, "UTF-8"))
165
166      // try to tidy the html first before we try to parse it as XML
167      if (tidy) {
168        val tidy = new Tidy
169        tidy.setXHTML(true)
170        tidy.setXmlTags(true)
171        tidy.setIndentCdata(false)
172        tidy.setEscapeCdata(false)
173        tidy.setQuiet(true)
174
175        val out = new ByteArrayOutputStream()
176        tidy.parse(new ByteArrayInputStream(data), out);
177        data = out.toByteArray
178
179        // println("tidy: "+new String(data, "UTF-8"))
180      }
181
182      // Try to strip out the doc type... stuff..
183      {
184        val text = new String(data, "UTF-8").trim
185        if( text.startsWith("<!DOCTYPE") ) {
186          data = text.substring(text.indexOf('>')+1).getBytes("UTF-8")
187          // println("doctype: "+new String(data, "UTF-8"))
188        }
189      }
190
191      val doc = try {
192        XML.load(new ByteArrayInputStream(data))
193      } catch {
194        case e:SAXParseException =>
195          // save the tidy version...
196          System.err.println("Could not parse the html markup: "+e.getMessage+" at "+e.getLineNumber+":"+e.getColumnNumber)
197          out.write(data)
198          return
199        case e:Throwable =>
200          // save the tidy version...
201          System.err.println("Could not parse the html markup: "+e.getMessage)
202          out.write(data)
203          return
204      }
205      doc.child.foreach(process(_))
206    }
207
208    if( to!=null ) {
209      out = new IndentPrintStream(new FileOutputStream(to));
210      doit
211      out.close()
212    } else {
213      out = new IndentPrintStream(session.getConsole);
214      doit
215      out.flush()
216    }
217    null
218  }
219
220
221  def to_text(line: String): String = {
222    line
223  }
224
225  def to_element(tag: String): String = {
226    var rc = tag
227    if( rc.startsWith("div.") ||  tag.startsWith("div#") ) {
228      rc = rc.stripPrefix("div")
229    }
230    "%"+rc
231  }
232
233  def process(value:AnyRef):Unit = {
234
235    val t = out
236    import t._
237
238    def tag(name:String) = {
239      if( name.matches("""^[\w:_\-]+$""") ) {
240        name
241      } else {
242        "'"+name+"'"
243      }
244    }
245
246    value match {
247
248      case x:Elem =>
249
250        var id=""
251        var clazz=""
252        var atts=""
253
254        def add(key:String, value:String) = {
255          if( atts!="" ) {
256            atts += " "
257          }
258          atts += key+"=\""+value+"\""
259        }
260
261        x.attributes.foreach{ a=>
262          val key = a.key
263          val value = a.value.toString
264          if( key=="id" ) {
265            if( value.matches("""^[\w_\-]+$""") )
266              id = "#"+value
267            else
268              add(key,value)
269          } else if( key=="class" ) {
270            if( value.matches("""^[\w\s_\-]+$""") ) {
271              value.split("""\s""").foreach{ c=>
272                clazz += "."+c
273              }
274            } else {
275              add(key,value)
276            }
277          } else {
278            add(key,value)
279          }
280        }
281
282        if(x.label=="scriptlet") {
283          for( line <- x.child.text.trim().split("""\r?\n""").filter( _.length()!=0) ) {
284            pi.pl("- "+line)
285          }
286        } else {
287
288          pi.p(to_element(tag(x.label)+id+clazz))
289          if( atts!="" ) {
290            p("("+atts+")")
291          }
292
293          x.child match {
294            case Seq(x:Text) =>
295              val value = x.text.trim
296              if (value.contains("\n")) {
297                pl()
298                indent {
299                  process(x)
300                }
301              } else {
302                pl(" "+value)
303              }
304            case x =>
305              pl()
306              indent {
307                x.foreach{ process _ }
308              }
309          }
310        }
311
312
313      case x:Text =>
314        val value = x.text.trim
315        value.split("\r?\n").map(_.trim).foreach{ line =>
316          if(line != "" ) {
317            pi.pl(to_text(line))
318          }
319        }
320
321      case x:AnyRef =>
322        throw new Exception("Unhandled type: "+x.getClass);
323    }
324  }
325
326  class IndentPrintStream(out:OutputStream) extends PrintStream(out) {
327    var level=0
328    def indent[T](op: => T): T = {level += 1; val rc = op; level -= 1; rc}
329
330    def pi = { for (i <- 0 until level) { print("  ") }; this }
331    def p(line: String) = { print(line); this }
332    def pl(line: String="") = { println(line); this }
333  }
334
335}