PageRenderTime 56ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/Utilities/Query.tcl

http://wub.googlecode.com/
TCL | 1034 lines | 795 code | 68 blank | 171 comment | 59 complexity | 08f85e1fc10fe169bb6201f7119cdaf1 MD5 | raw file
  1. # Query - handle URL/HTTP query strings
  2. if {[info exists argv0] && ([info script] eq $argv0)} {
  3. lappend auto_path [file dirname [file normalize [info script]]]
  4. }
  5. package require Debug
  6. Debug define query 10
  7. package require mime
  8. package require base64
  9. package provide Query 2.0
  10. set ::API(Utilities/Query) {
  11. {
  12. Query - parse and manipulate HTML query strings
  13. }
  14. }
  15. namespace eval ::Query {
  16. variable todisk 10000000
  17. variable utf8 [expr {[catch {package require utf8}] == 0}]
  18. variable map
  19. variable dmap
  20. # Support for x-www-urlencoded character mapping
  21. # The spec says: "non-alphanumeric characters are replaced by '%HH'"
  22. set dmap {%0D%0A \n %0d%0a \n %% %}
  23. lappend dmap + " "
  24. set map {% %% = = & & - -}
  25. # set up non-alpha map
  26. for {set i 0} {$i < 256} {incr i} {
  27. set c [format %c $i]
  28. if {![string match {[a-zA-Z0-9]} $c]} {
  29. if {![dict exists $map $c]} {
  30. lappend map $c %[format %.2X $i]
  31. }
  32. }
  33. # must be able to decode any %-form, however stupid
  34. lappend dmap %[format %.2X $i] [binary format c $i]
  35. lappend dmap %[format %.2x $i] [binary format c $i]
  36. }
  37. # These are handled specially
  38. lappend map " " + \n %0D%0A
  39. #puts stderr "QUERY dmap: $dmap"
  40. #puts stderr "QUERY map: $map"
  41. proc 2hex {str} {
  42. binary scan $str H* hex
  43. return $hex
  44. }
  45. # decode
  46. #
  47. # This decodes data in www-url-encoded format.
  48. #
  49. # Arguments:
  50. # An encoded value
  51. #
  52. # Results:
  53. # The decoded value
  54. proc decode {str} {
  55. Debug.query {decode '$str'} 10
  56. variable dmap
  57. set str [string map $dmap $str]
  58. # set str [encoding convertfrom utf-8 $str]
  59. Debug.query {decode dmap '$str' [2hex $str]} 10
  60. return $str
  61. }
  62. # encode
  63. #
  64. # This encodes data in www-url-encoded format.
  65. #
  66. # Arguments:
  67. # A string
  68. #
  69. # Results:
  70. # The encoded value
  71. proc encode {string} {
  72. variable map
  73. # map non-ascii characters away - note: % must be first
  74. Debug.query {encode '$string'}
  75. set string [string map $map $string]
  76. Debug.query {encode post '$string'}
  77. return $string
  78. }
  79. # encode args as a www-url-encoded entity
  80. proc encodeL {args} {
  81. if {[llength $args] == 1} {
  82. set args [lindex $args 0]
  83. }
  84. set entity {}
  85. foreach {n v} $args {
  86. lappend entity "$n=[Query encode $v]"
  87. }
  88. return [join $entity &]
  89. }
  90. # build
  91. #
  92. # This encodes a dict in www-url-encoded format.
  93. #
  94. # Arguments:
  95. # a list of name, value pairs
  96. #
  97. # Results:
  98. # The encoded value
  99. proc build {args} {
  100. if {[llength $args] == 1} {
  101. set args [lindex $args 0]
  102. }
  103. set pairs {}
  104. foreach {n v} $args {
  105. lappend pairs "[encode $n]=[encode $v]"
  106. }
  107. return [join $pairs &]
  108. }
  109. # qparse -- internal parser support
  110. #
  111. # decodes a query string
  112. #
  113. # Arguments:
  114. # qstring a string containing a query
  115. # ct the content type
  116. #
  117. # Results:
  118. # A dictionary of names and values from the query
  119. # The form of the dict is {name {{value {metadata}} ... }}
  120. #
  121. # Side Effects:
  122. proc qparse {qstring count {ct "NONE"}} {
  123. Debug.query {qparse $ct - [string range $qstring 0 80]...}
  124. switch -glob -- [lindex [split $ct \;] 0] {
  125. text/html -
  126. text/xml -
  127. application/xml -
  128. application/x-www-form-urlencoded -
  129. application/x-www-urlencoded -
  130. application/x-url-encoded -
  131. NONE {
  132. set query [dict create]
  133. foreach {x} [split [string trim $qstring] &] {
  134. # Turns out you might not get an = sign,
  135. # especially with <isindex> forms.
  136. set z [split $x =]
  137. if {[llength $z] == 1} {
  138. # var present without assignment
  139. set var [decode [lindex $z 0]]
  140. set val ""
  141. set meta [list -unassigned 1 -count [incr count]]
  142. } else {
  143. # var present with assignment
  144. set var [decode [lindex $z 0]]
  145. set val [decode [join [lrange $z 1 end] =]]
  146. set meta [list -count [incr count]]
  147. }
  148. dict lappend query $var $val $meta
  149. # todo - not quite right - this doesn't allow duplicate
  150. # var=val, but should
  151. }
  152. }
  153. default {
  154. error "Unknown Content-Type: $ct"
  155. }
  156. }
  157. set query [charset $query]
  158. return [list $query $count]
  159. }
  160. # cconvert - convert charset to appropriate encoding
  161. # - try to ensure the correctness of utf-8 input
  162. proc cconvert {query charset} {
  163. if {$charset eq ""} {
  164. set charset utf-8
  165. } else {
  166. set charset [string tolower $charset]
  167. }
  168. Debug.query {cconvert charset '$charset'} 6
  169. variable encodings
  170. if {$charset in [encoding names]} {
  171. # tcl knows of this encoding - so make the conversion
  172. variable utf8
  173. dict for {k v} $query {
  174. set vals {}
  175. foreach {val meta} $v {
  176. if {$utf8 && $charset eq "utf-8"} {
  177. # check the content for utf8 correctness
  178. set point [::utf8::findbad $v]
  179. if {$point < [string length $v] - 1} {
  180. if {$point >= 0} {
  181. incr point
  182. lappend meta -bad $point
  183. }
  184. lappend vals $val $meta
  185. Debug.query {cconvert charset '$charset' - bad at $point} 6
  186. continue
  187. }
  188. }
  189. set v1 [encoding convertfrom $charset $val]
  190. Debug.query {cconvert charset '$charset' '$val'->'$v1'} 6
  191. lappend vals $v1 $meta
  192. }
  193. Debug.query {cconverted $k -> ($vals)} 10
  194. dict set query $k $vals
  195. }
  196. }
  197. return $query
  198. }
  199. # charset - handle '_charset_' hack
  200. # see https://bugzilla.mozilla.org/show_bug.cgi?id=18643
  201. proc charset {query} {
  202. Debug.query {charset [dict get? $query _charset_]}
  203. if {![exists $query _charset_]} {
  204. # no conversion necessary
  205. return $query
  206. }
  207. set query [cconvert $query [value $query _charset_]]
  208. dict unset query _charset_
  209. return $query
  210. }
  211. proc components {token} {
  212. set components [list token $token]
  213. foreach p [::mime::getproperty $token -names] {
  214. dict set components $p [::mime::getproperty $token $p]
  215. }
  216. foreach p [::mime::getheader $token -names] {
  217. dict set components headers $p [::mime::getheader $token $p]
  218. }
  219. Debug.query {Components $token: ($components)}
  220. if {[dict exists $components size] && [dict get $components size] < 100000} {
  221. dict set components body [::mime::getbody $token -decode]
  222. } else {
  223. set body [file tempfile path]
  224. dict set components path $path
  225. dict set components fd $body
  226. dict set ::Httpd::files [info coroutine] $body ;# keep a copy
  227. Debug.query {Components BODY $token: $path}
  228. ::mime::copymessage $token $body ;# make a copy in the given file.
  229. }
  230. if {[dict exists $components parts]} {
  231. # recurse over parts
  232. set parts [dict get $components parts]; dict unset components parts
  233. foreach p [dict get $components $parts] {
  234. dict set components parts $p [components $token]
  235. dict set components parts $p token $token
  236. }
  237. }
  238. Debug.query {Components Full $token: ($components)}
  239. return $components
  240. }
  241. # parse -- parse an http dict's queries
  242. #
  243. # decodes the -query and -entity elements of an httpd dict
  244. #
  245. # Arguments:
  246. # http dict containing an HTTP request
  247. #
  248. # Results:
  249. # A dictionary of names associated with a list of
  250. # values from the request's query part and entity body
  251. proc parse {r} {
  252. if {[dict exists $r -Query]} {
  253. return [dict get $r -Query]
  254. }
  255. if {[dict exists $r -query]} {
  256. # parse the query part normally
  257. Debug.query {parsing query part ([dict get $r -query])}
  258. lassign [qparse [dict get $r -query] 0] query count
  259. } else {
  260. set query {}
  261. set count 0
  262. }
  263. if {[dict exists $r content-length]
  264. && ![dict get $r content-length]
  265. } {
  266. dict unset r content-length
  267. catch {dict unset r content-type}
  268. catch {dict unset r -entity}
  269. catch {dict unset r -entitypath}
  270. } elseif {[dict exists $r -entity]
  271. || [dict exists $r -entitypath]
  272. } {
  273. # there is an entity body
  274. set ct [dict get $r content-type]
  275. Debug.query {parsing entity part of type '$ct'}
  276. switch -glob -- [dict exists $r -entitypath],[dict exists $r -entity],[lindex [split $ct \;] 0] {
  277. 0,1,multipart/* {
  278. lassign [multipart $ct [dict get $r -entity] $count] query count
  279. }
  280. 1,1,multipart/* {
  281. lassign [multipartF $ct [dict get $r -entitypath] [dict get $r -entity] $count] query count
  282. }
  283. default {
  284. # this entity is in memory - use the quick stuff to parse it
  285. lassign [qparse [dict get $r -entity] $count $ct] query count
  286. Debug.query {qparsed [string range $query1 0 80]...}
  287. }
  288. }
  289. }
  290. return $query
  291. }
  292. # dump query dict
  293. proc dump {qd} {
  294. set result {}
  295. foreach name [dict keys $qd] {
  296. lappend result [list $name [string range [value $qd $name] 0 80] [metadata $qd $name]]
  297. }
  298. return $result
  299. }
  300. # numvalues -- how many values does a named element have?
  301. #
  302. # Arguments:
  303. # query query dict generated by parse
  304. # el element name
  305. #
  306. # Results:
  307. # number of values associated with query
  308. proc numvalues {query el} {
  309. return [expr {[llength [dict get $query $el]] / 2}]
  310. }
  311. # value -- return the nth value associated with a name in a query
  312. #
  313. # Arguments:
  314. # query query dict generated by parse
  315. # el name of element
  316. # num index of value
  317. #
  318. # Results:
  319. # num'th value associated with el by query
  320. proc value {query el {num 0}} {
  321. return [lindex [dict get $query $el] [expr {$num * 2}]]
  322. }
  323. proc qvars {query args} {
  324. foreach var $args {
  325. upvar 1 $var _$var
  326. set _$var [value $query $var]
  327. }
  328. }
  329. # add - add a simulated query element
  330. # query - query dict generated by parse
  331. # el - name of element
  332. # val - value of element
  333. # metadata - metadata
  334. proc add {query el val {metadata {}}} {
  335. dict lappend query $el $val $metadata
  336. return $query
  337. }
  338. # metadata -- return the nth metadata associated with a name in a query
  339. #
  340. # Arguments:
  341. # query query dict generated by parse
  342. # el name of element
  343. # num index of value
  344. #
  345. # Results:
  346. # num'th metadata associated with el by query
  347. proc metadata {query el {num 0}} {
  348. return [lindex [dict get $query $el] [expr {$num * 2 + 1}]]
  349. }
  350. # if this is a file, return its characteristics
  351. proc file? {query el {num 0}} {
  352. set md [metadata $query $el $num]
  353. if {![dict exists $md -path]} {
  354. return {}
  355. } else {
  356. return [dict in $md -path -fd -start -size]
  357. }
  358. }
  359. proc copydone {to md bytes {error ""}} {
  360. if {$bytes != [dict get $md -size]
  361. || $error ne ""
  362. } {
  363. if {$error eq ""} {
  364. set error "[$bytes bytes copied, [dict get $md -size] expected."
  365. }
  366. Debug.error {"Query copy error to '$to': $error ($md)"}
  367. }
  368. catch {file close $to}
  369. }
  370. # if this is a file, copy its content to a stream
  371. proc copy {fd callback query el {num 0}} {
  372. if {$callback eq ""} {
  373. set callback {Query copydone}
  374. }
  375. set md [metadata $query $el $num]
  376. if {![dict exists $md -path]} {
  377. return 0
  378. } else {
  379. set md [dict in $md -path -fd -start -size]
  380. dict with md {
  381. chan seek ${-fd} ${-start}
  382. chan copy ${-fd} $fd ${-size} -command [list {*}$callback $fd $md]
  383. }
  384. return 1
  385. }
  386. }
  387. proc copytmp {file callback query el {num 0}} {
  388. set fd [file tempfile path]
  389. set result [copy $fd $callback $query $el $num]
  390. if {$result} {
  391. return [list $fd $path]
  392. } else {
  393. return {}
  394. }
  395. }
  396. # exists -- does a value with the given name exist
  397. #
  398. # Arguments:
  399. # query query dict generated by parse
  400. # el name of element
  401. # num number of element's values
  402. #
  403. # Results:
  404. # true if el is in query
  405. proc exists {query el {num 0}} {
  406. if {$num == 0} {
  407. return [dict exists $query $el]
  408. } else {
  409. return [expr {
  410. [dict exists $query $el]
  411. && ([llength [dict get $query $el]] > ($num*3))
  412. }]
  413. }
  414. }
  415. # return a name, value, meta list from the query dict
  416. proc nvmlist {query} {
  417. set result {}
  418. dict for {n v} $query {
  419. foreach {val meta} $v {
  420. lappend result $n $val $meta
  421. }
  422. }
  423. return $result
  424. }
  425. # values -- return the list of values associated with a name in a query
  426. #
  427. # Arguments:
  428. # query query dict generated by parse
  429. # el name of element
  430. #
  431. # Results:
  432. # list of values associated with el by query
  433. proc values {query el} {
  434. set result {}
  435. foreach {v m} [dict get $query $el] {
  436. lappend result $v
  437. }
  438. return $result
  439. }
  440. proc query_encode {query} {
  441. set q {}
  442. foreach n [Query vars $query] {
  443. foreach v [Query values $query $n] {
  444. if {$v ne ""} {
  445. lappend q "$n=[Query encode $v]"
  446. } else {
  447. lappend q $n
  448. }
  449. }
  450. }
  451. return [join $q &]
  452. }
  453. # vars -- the list of names in the query
  454. #
  455. # Arguments:
  456. # query query dict generated by parse
  457. #
  458. # Results:
  459. # list of values associated with el by query
  460. proc vars {query} {
  461. return [dict keys $query]
  462. }
  463. # flatten -- flatten query ignoring multiple values and metadata
  464. #
  465. # construct a list able to be flattened into an array
  466. #
  467. # Arguments:
  468. # query query dict generated by parse
  469. #
  470. # Results:
  471. # list of values associated with el by query
  472. # multiple values are stored with ,$index as a name suffix
  473. proc flatten {query} {
  474. set result {}
  475. dict for {n v} $query {
  476. set count 0
  477. foreach {val meta} $v {
  478. if {$count} {
  479. lappend result $n,$count $val
  480. } else {
  481. lappend result $n $val
  482. }
  483. incr count
  484. }
  485. }
  486. return $result
  487. }
  488. # for compatibility with ncgi.
  489. proc nvlist {query} {
  490. return [flatten $query]
  491. }
  492. # replace - replace a simulated query element
  493. # query - query dict generated by parse
  494. # el - name of element
  495. # val - value of element
  496. # metadata - metadata
  497. proc replace {query el val {metadata {}}} {
  498. if {[dict exists $query $el]} {
  499. dict set query $el $val
  500. } else {
  501. dict lappend query $el $val $metadata
  502. }
  503. return $query
  504. }
  505. # parseMimeValue
  506. #
  507. # Parse a MIME header value, which has the form
  508. # value; param=value; param2="value2"; param3='value3'
  509. #
  510. # Arguments:
  511. # value The mime header value. This does not include the mime
  512. # header field name, but everything after it.
  513. #
  514. # Results:
  515. # A two-element list, the first is the primary value,
  516. # the second is in turn a name-value list corresponding to the
  517. # parameters. Given the above example, the return value is
  518. # {
  519. # value
  520. # {param value param2 value param3 value3}
  521. # }
  522. proc parseMimeValue {value} {
  523. set parts [split $value \;]
  524. set results [list [string trim [lindex $parts 0]]]
  525. set paramList {}
  526. foreach sub [lrange $parts 1 end] {
  527. if {[regexp -- {([^=]+)=(.+)} $sub match key val]} {
  528. set key [string trim [string tolower $key]]
  529. set val [string trim $val]
  530. # Allow single as well as double quotes
  531. if {[regexp -- {^[\"']} $val quote]} {
  532. # need a quote for balance
  533. if {[regexp -- ^${quote}(\[^$quote\]*)$quote $val x val2]} {
  534. # Trim quotes and any extra crap after close quote
  535. set val $val2
  536. }
  537. }
  538. lappend paramList $key $val
  539. Debug.query {parseMimeValue $key: '[string range $val 0 80]...'}
  540. }
  541. }
  542. if {[llength $paramList]} {
  543. lappend results $paramList
  544. }
  545. return $results
  546. }
  547. # pconvert - convert part's charset to appropriate encoding
  548. # - try to ensure the correctness of utf-8 input
  549. proc pconvert {charset content} {
  550. Debug.query {pconvert '$charset'} 6
  551. if {$charset eq ""} {
  552. return $content
  553. }
  554. if {$charset ni [encoding names]} {
  555. Debug.error {Query pconvert doesn't know how to convert '$charset'}
  556. return $content
  557. }
  558. # tcl knows of this encoding - so make the conversion
  559. variable utf8
  560. if {$utf8 && $charset eq "utf-8"} {
  561. # check the content for utf8 correctness
  562. set point [::utf8::findbad $content]
  563. if {$point < [string length $v] - 1} {
  564. if {$point >= 0} {
  565. incr point
  566. lappend meta -bad $point
  567. }
  568. lappend vals $val $meta
  569. continue
  570. }
  571. }
  572. Debug.query {pconvert '$charset'}
  573. return [encoding convertfrom $charset $content]
  574. }
  575. # multipart
  576. #
  577. # This parses multipart form data.
  578. # Based on work by Steve Ball for TclHttpd
  579. #
  580. # Arguments:
  581. # type The Content-Type, because we need boundary options
  582. # query The raw multipart query data
  583. #
  584. # Results:
  585. # An alternating list of names and values
  586. # In this case, the value is a two element list:
  587. # content, which is the main value of the element
  588. # headers, which in turn is a list names and values
  589. # The header name/value pairs come primarily from the MIME headers
  590. # like Content-Type that appear in each part. However, the
  591. # Content-Disposition header is handled specially. It has several
  592. # parameters like "name" and "filename" that are important, so they
  593. # are promoted to to the same level as Content-Type. Otherwise,
  594. # if a header like Content-Type has parameters, they appear as a list
  595. # after the primary value of the header. For example, if the
  596. # part has these two headers:
  597. #
  598. # Content-Disposition: form-data; name="Foo"; filename="/a/b/C.txt"
  599. # Content-Type: text/html; charset="iso-8859-1"; mumble='extra'
  600. #
  601. # Then the header list will have this structure:
  602. # {
  603. # content-disposition form-data
  604. # name Foo
  605. # filename /a/b/C.txt
  606. # content-type {text/html {charset iso-8859-1 mumble extra}}
  607. # }
  608. # Note that the header names are mapped to all lowercase. You can
  609. # use "array set" on the header list to easily find things like the
  610. # filename or content-type. You should always use [lindex $value 0]
  611. # to account for values that have parameters, like the content-type
  612. # example above. Finally, not that if the value has a second element,
  613. # which are the parameters, you can "array set" that as well.
  614. proc multipart {type query {count -1}} {
  615. set parsedType [parseMimeValue $type]
  616. if {![string match multipart/* [lindex $parsedType 0]]} {
  617. error "Not a multipart Content-Type: [lindex $parsedType 0]"
  618. }
  619. Debug.query {multipart parsed Mime Values $type '$parsedType'}
  620. #puts stderr "PARTS: $query"
  621. array set options [lindex $parsedType 1]
  622. if {![info exists options(boundary)]} {
  623. error "No boundary given for multipart document"
  624. }
  625. set boundary $options(boundary)
  626. Debug.query {multipart options $type '[array get options]'}
  627. # The query data is typically read in binary mode, which preserves
  628. # the \r\n sequence from a Windows-based browser.
  629. # Also, binary data may contain \r\n sequences.
  630. if {[string match "*$boundary\r\n*" $query]} {
  631. set lineDelim "\r\n"
  632. # puts "DELIM"
  633. } else {
  634. set lineDelim "\n"
  635. # puts "NO"
  636. }
  637. # Iterate over the boundary string and chop into parts
  638. set len [string length $query]
  639. # [string length $lineDelim]+2 is for "$lineDelim--"
  640. set blen [expr {[string length $lineDelim] + 2 + [string length $boundary]}]
  641. set first 1
  642. set results [dict create]
  643. # Ensuring the query data starts
  644. # with a newline makes the string first test simpler
  645. if {[string first $lineDelim $query 0] != 0} {
  646. set query $lineDelim$query
  647. }
  648. set offset 0
  649. set charset "" ;# charset encoding of part
  650. set te "" ;# transfer encoding of part
  651. while {[set offset [string first "$lineDelim--$boundary" $query $offset]] >= 0} {
  652. # offset is the position of the next boundary string
  653. # in $query after $offset
  654. Debug.query {multipart found offset:$offset/[string length $query]}
  655. if {$first} {
  656. set first 0 ;# this was the opening delimiter
  657. } else {
  658. # this was the delimiter bounding current element
  659. # generate a n,v element from parsed content
  660. set content [string range $query $off2 [expr {$offset -1}]]
  661. Debug.query {encodings te:$te charset:$charset}
  662. # decode transfer encoding
  663. switch -- $te {
  664. quoted-printable {
  665. set content [::mime::qp_decode $content]
  666. }
  667. base64 {
  668. set content [::base64::decode]
  669. }
  670. 7bit - 8bit - binary - "" {}
  671. default {
  672. Debug.error {Query multipart can't handle TE '$te'}
  673. }
  674. }
  675. # decode charset encoding
  676. if {$charset ne ""} {
  677. Debug.query {pconverting: $formName '$charset'}
  678. set content [pconvert $charset $content]
  679. }
  680. dict lappend results $formName $content $headers
  681. }
  682. incr offset $blen ;# skip boundary in stream
  683. # Check for the terminating entity boundary,
  684. # which is signaled by --$boundary--
  685. if {[string range $query $offset [expr {$offset + 1}]] eq "--"} {
  686. # end of parse
  687. Debug.query {multipart endparse offset:$offset/[string length $query]}
  688. break
  689. }
  690. # We have a new element. Split headers out from content.
  691. # The headers become a nested dict structure in result:
  692. # {header-name { value { paramname paramvalue ... } } }
  693. # find off2, the offset of the delimiter which terminates
  694. # the current element
  695. set off2 [string first "$lineDelim$lineDelim" $query $offset]
  696. Debug.query {multipart parsed between:$offset...$off2 /[string length $query]}
  697. # generate a dict called headers with element's headers and values
  698. set headers [dict create -count [incr count]]
  699. set formName "" ;# any header 'name' becomes the element name
  700. set charset ""
  701. set te ""
  702. foreach line [split [string range $query $offset $off2] $lineDelim] {
  703. if {[regexp -- {([^:\t ]+):(.*)$} $line x hdrname value]} {
  704. set hdrname [string tolower $hdrname]
  705. # RFC2388: Field names originally in non-ASCII character sets may be encoded
  706. # within the value of the "name" parameter using the standard method
  707. # described in RFC 2047.
  708. # encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
  709. # charset = token ; see section 3
  710. # encoding = token ; see section 4
  711. # We're not going to support that.
  712. set valueList [parseMimeValue $value]
  713. Debug.query {part header $hdrname: $valueList}
  714. switch -- $hdrname {
  715. content-disposition {
  716. # Promote Content-Disposition parameters up to headers,
  717. # and look for the "name" that identifies the form element
  718. dict set headers $hdrname [lindex $valueList 0]
  719. foreach {n v} [lindex $valueList 1] {
  720. set n [string tolower $n]
  721. Debug.query {multipart content-disposition: $n '$v'}
  722. dict set headers $n $v
  723. if {$n eq "name"} {
  724. set formName $v ;# the name of the element
  725. }
  726. }
  727. }
  728. content-type {
  729. # RFC2388: As with all multipart MIME types, each part has an optional
  730. # "Content-Type", which defaults to text/plain.
  731. set charset [string tolower [dict get? [lindex $valueList 1] charset]]
  732. dict set headers $hdrname $valueList
  733. }
  734. content-transfer-encoding {
  735. # RFC2388: The value supplied
  736. # for a part may need to be encoded and the "content-transfer-encoding"
  737. # header supplied if the value does not conform to the default
  738. # encoding. [See section 5 of RFC 2046 for more details.]
  739. set te $valueList
  740. dict set headers $hdrname $valueList
  741. }
  742. default {
  743. Debug.query {multipart header: $hdrname '$valueList'}
  744. set te $valuelist
  745. dict set headers $hdrname $valueList
  746. }
  747. }
  748. } elseif {$line ne ""} {
  749. error "bogus field: '$line'"
  750. } else {
  751. Debug.query {multipart headers last line}
  752. }
  753. }
  754. # we have now ingested the part's headers
  755. if {$off2 > 0} {
  756. # +[string length "$lineDelim$lineDelim"] for the
  757. # $lineDelim$lineDelim
  758. incr off2 [string length "$lineDelim$lineDelim"]
  759. set offset $off2
  760. } else {
  761. break
  762. }
  763. }
  764. Debug.query {headers: $results}
  765. return [list $results $count]
  766. }
  767. proc scanF {fd pattern} {
  768. chan seek $fd 0
  769. set bsize [chan configure $fd -buffersize]
  770. set psize [string length $pattern]
  771. if {$psize >= $bsize} {
  772. error "pattern is longer than buffer"
  773. }
  774. chan configure $fd -blocking 0
  775. set result {0}
  776. set prior "" ;# previous buffer
  777. while {1} {
  778. set next [chan read $fd $bsize]
  779. set found [string first $pattern $prior$next]
  780. if {$found < 0} {
  781. if {[chan eof $fd]} break
  782. #Debug.query {scanF not found at [chan tell $fd]}
  783. continue
  784. } else {
  785. # got a match in buffer
  786. Debug.query {scanF found at $found from [chan tell $fd]}
  787. set got [expr {[chan tell $fd]-[string length $next]-[string length $prior]+$found}]
  788. lappend result $got [expr {$got + [string length $pattern]}]
  789. set prior [string range $next end-$psize end]
  790. }
  791. }
  792. # compensate for 'terminating boundary' extra leading --
  793. lappend result [expr {[chan tell $fd]-[string length $pattern]-2}]
  794. return $result
  795. }
  796. proc multipartF {type path fd {count -1}} {
  797. set parsedType [parseMimeValue $type]
  798. if {![string match multipart/* [lindex $parsedType 0]]} {
  799. error "Not a multipart Content-Type: [lindex $parsedType 0]"
  800. }
  801. Debug.query {multipartF parsed Mime Values type:'$type' options:'$parsedType'}
  802. array set options [lindex $parsedType 1]
  803. if {![info exists options(boundary)]} {
  804. error "No boundary given for multipart document"
  805. }
  806. set boundary $options(boundary)
  807. Debug.query {multipartF options '[array get options]'}
  808. # Iterate over the file looking for boundary string and chop into parts
  809. set boundary --$boundary\r\n
  810. set boundaries [scanF $fd $boundary]
  811. Debug.query {boundaries: $boundaries}
  812. if {[llength $boundaries] < 2} {
  813. error "multipart improperly formed"
  814. }
  815. set i -1
  816. set q {}
  817. foreach {start end} $boundaries {
  818. set size [expr {$end-$start}]
  819. if {$size} {
  820. chan seek $fd $start
  821. incr i
  822. Debug.query {bounded $i ($start..$end $size)}
  823. chan configure $fd -translation crlf
  824. set headers [list -count [incr count]]
  825. set formName "Part$i" ;# any header 'name' becomes the element name
  826. while {[gets $fd line] > 0
  827. && [chan tell $fd] < $end
  828. } {
  829. Debug.query {LINE $i: '$line'}
  830. # generate a dict called headers with element's headers and values
  831. if {[regexp -- {([^:\t ]+):(.*)$} $line x hdrname value]} {
  832. set hdrname [string tolower $hdrname]
  833. # RFC2388: Field names originally in non-ASCII character sets may be encoded
  834. # within the value of the "name" parameter using the standard method
  835. # described in RFC 2047.
  836. # encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
  837. # charset = token ; see section 3
  838. # encoding = token ; see section 4
  839. # We're not going to support that.
  840. set valueList [parseMimeValue $value]
  841. Debug.query {hdr: $hdrname ($valueList)}
  842. switch -- $hdrname {
  843. content-disposition {
  844. # Promote Content-Disposition parameters up to headers,
  845. # and look for the "name" that identifies the form element
  846. dict lappend headers $hdrname [lindex $valueList 0]
  847. foreach {n v} [lindex $valueList 1] {
  848. set n [string tolower $n]
  849. Debug.query {multipart content-disposition: $n '$v'}
  850. lappend headers $n $v
  851. if {$n eq "name"} {
  852. set formName $v ;# the name of the element
  853. }
  854. }
  855. }
  856. content-type {
  857. # RFC2388: As with all multipart MIME types, each part has an optional
  858. # "Content-Type", which defaults to text/plain.
  859. set charset [string tolower [dict get? [lindex $valueList 1] charset]]
  860. dict lappend headers $hdrname $valueList
  861. }
  862. content-transfer-encoding {
  863. # RFC2388: The value supplied
  864. # for a part may need to be encoded and the "content-transfer-encoding"
  865. # header supplied if the value does not conform to the default
  866. # encoding. [See section 5 of RFC 2046 for more details.]
  867. set te $valueList
  868. dict lappend headers $hdrname $valueList
  869. }
  870. default {
  871. Debug.query {multipart header: $hdrname '$valueList'}
  872. set te $valuelist
  873. dict lappend headers $hdrname $valueList
  874. }
  875. }
  876. }
  877. }
  878. set csize [expr {$end - [chan tell $fd]-2}] ;# content size
  879. if {$csize > $::Httpd::todisk} {
  880. set content {}
  881. set headers [dict merge $headers [list -path $path -fd $fd -start $start -size $csize]]
  882. } else {
  883. set content [chan read $fd $csize]
  884. }
  885. dict lappend q $formName $content $headers
  886. chan configure $fd -translation binary
  887. }
  888. }
  889. Debug.query {multipartF Result: $q}
  890. return [list $q $count]
  891. }
  892. namespace export -clear *
  893. namespace ensemble create -subcommands {}
  894. }
  895. if {[info exists argv0] && ([info script] eq $argv0)} {
  896. foreach test {
  897. "error=User%20Doesn?error=User%20Doesn't%20Exist"
  898. "error=Passwords%20don't%20match"
  899. "error=first&error=second&error=third"
  900. } {
  901. lassign [Query qparse $test 0] query count
  902. puts stderr "'$test' -> ($query)"
  903. puts stderr "find error '[Query value $query error]'"
  904. set query [Query parse [dict create -query $test]]
  905. puts stderr "'$test' -> ($query)"
  906. puts stderr "find error '[Query value $query error]'"
  907. puts stderr "flattened: [Query flatten $query]"
  908. }
  909. # here's something I caught in the wild
  910. set q {N {8942 {}} cancel {Cancel {-count 4}} C {{This is a Work-in-progress translation (to Swedish) of the eleven syntactic rules of Tcl. (see [Endekalogue] for other translations). [Category Documentation] |} {-count 1 -bad 163}} O {{1182004521 lars_h@81.231.37.27} {-count 2}}}
  911. set metadata [Query metadata $q C]
  912. puts stderr "metadata: $metadata"
  913. }