PageRenderTime 25ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/nltk_contrib/nltk_contrib/lpath/lpath/lpath.py

http://nltk.googlecode.com/
Python | 1043 lines | 1038 code | 5 blank | 0 comment | 0 complexity | 00731cc06cc8ab527204bdda9cfd05b7 MD5 | raw file
Possible License(s): Apache-2.0, AGPL-1.0
  1. import time
  2. T0 = time.time()
  3. import sys
  4. import re
  5. from nltk import parse_cfg
  6. from nltk import Tree
  7. from nltk import parse
  8. __all__ = ["translate", "translate2", "get_profile", "get_grammar", "get_base_grammar", "tokenize"]
  9. GR = ""
  10. # The following is the LPath+ grammar.
  11. # Some rules of original grammar, which appear in LPath papers
  12. # until 2006, are commented out using a single pound sign (#).
  13. grammar_text = """
  14. #P -> AP | AP '{' P '}'
  15. P -> S | P S | P LCB P RCB
  16. #AP -> | S AP
  17. #S -> A T | A T '[' R ']'
  18. S -> S2 | LRB S2 RRB STAR | LRB S2 RRB PLUS | LRB S2 RRB OPT
  19. #
  20. S2 -> S1 | S1 LSB R RSB
  21. #
  22. S1 -> A T
  23. A -> "/" | "//" | "." | "\\" | "\\\\" | "<=" | "=>" | "<==" | "==>" | "<-" | "->" | "<--" | "-->"
  24. #T -> Qname | "_" | "@" Qname C Qname
  25. T -> Qname | "_"
  26. ATT -> AT Qname C Qname
  27. #R -> R "or" R | R "and" R | "not" R | "(" R ")" | P | P C Qname
  28. R -> R OR R | R AND R | LRB R RRB | P | LCB R RCB | ATT | NOT P | NOT ATT | NOT LRB R RRB
  29. C -> "=" | "<=" | ">=" | "<>" | "like"
  30. LCB -> '{'
  31. RCB -> '}'
  32. LRB -> '('
  33. RRB -> ')'
  34. LSB -> '['
  35. RSB -> ']'
  36. PLUS -> '+'
  37. STAR -> '*'
  38. OPT -> '?'
  39. AT -> '@'
  40. OR -> 'or'
  41. AND -> 'and'
  42. NOT -> 'not'
  43. """
  44. def tokenize(q):
  45. p = re.compile(r"\s*({|}|\[|]|//|/|\.|\\\\|\\|<==|==>|<=|=>|<--|-->|<-|->|@|or|and|not|\(|\)|>=|=|<>|like|_|\*|\+|\?)\s*")
  46. tokens = []
  47. q = q.strip()
  48. N = len(q)
  49. d = 0 # scan position
  50. while d < N:
  51. # scan and add double-quoted string
  52. if q[d] == '"':
  53. for j in range(d+1,N):
  54. if q[j]=='"' and m!='\\':
  55. break
  56. tokens.append(('s',q[d+1:j]))
  57. d = j+1
  58. elif q[d] == '@':
  59. # If we have an attribute, scan ahead until we reach the end of the attribute name.
  60. for j in range(d + 1, N):
  61. if not q[j].isalnum():
  62. break
  63. tokens.append(('r', '@'))
  64. tokens.append(('s', q[d + 1: j]))
  65. d = j
  66. # find next reserved word while scanning free string before it
  67. d0 = d
  68. while d < N:
  69. m = p.match(q, d)
  70. if m:
  71. if m.group(1) == '_':
  72. if d == d0:
  73. break
  74. else:
  75. break
  76. d += 1
  77. qname = q[d0:d]
  78. # if there is a free string, split it and add them to the tokens list
  79. if qname:
  80. for s in qname.split():
  81. tokens.append(('s',s))
  82. # add the reserved word to the tokens list
  83. if m:
  84. tokens.append(('r',m.group(1)))
  85. d = m.span()[1]
  86. return tokens
  87. class SerialNumber:
  88. def __init__(self):
  89. self.n = 0
  90. def inc(self):
  91. self.n += 1
  92. def __int__(self):
  93. return self.n
  94. def __sub__(self, n):
  95. return self.n - n
  96. class AND(list):
  97. def __init__(self, *args):
  98. list.__init__(self)
  99. self.joiner = "and"
  100. for e in args:
  101. if isinstance(e, flatten):
  102. self += e
  103. else:
  104. self.append(e)
  105. def __str__(self):
  106. L = []
  107. for x in self:
  108. if isinstance(x, str):
  109. L.append(x)
  110. elif isinstance(x, AND) or isinstance(x, OR) or isinstance(x, NOT):
  111. L.append(str(x))
  112. elif isinstance(x, flatten):
  113. for e in x:
  114. L.append("%s%s%s" % tuple(e))
  115. elif isinstance(x, list):
  116. L.append("%s%s%s" % tuple(x))
  117. elif isinstance(x, Trans):
  118. L.append("exists (%s)" % x.getSql())
  119. else:
  120. L.append(str(x))
  121. L.append(self.joiner)
  122. return "(" + " ".join(L[:-1]) + ")"
  123. def __unicode__(self):
  124. L = []
  125. for x in self:
  126. if isinstance(x, str):
  127. L.append(unicode(x))
  128. elif isinstance(x, unicode):
  129. L.append(x)
  130. elif isinstance(x, AND) or isinstance(x, OR) or isinstance(x, NOT):
  131. L.append(unicode(x))
  132. elif isinstance(x, flatten):
  133. for e in x:
  134. L.append("%s%s%s" % tuple(e))
  135. elif isinstance(x, list):
  136. L.append("%s%s%s" % tuple(x))
  137. elif isinstance(x, Trans):
  138. L.append("exists (%s)" % x.getSql())
  139. else:
  140. L.append(unicode(x))
  141. L.append(self.joiner)
  142. return "(" + " ".join(L[:-1]) + ")"
  143. def __add__(self, lst):
  144. self += lst
  145. return self
  146. class OR(AND):
  147. def __init__(self, *args):
  148. AND.__init__(self, *args)
  149. self.joiner = "or"
  150. class GRP(AND):
  151. pass
  152. class NOT:
  153. def __init__(self, lst):
  154. self.lst = lst
  155. def __str__(self):
  156. return "not " + str(self.lst)
  157. def __unicode__(self):
  158. return "not " + unicode(self.lst)
  159. class flatten(list):
  160. pass
  161. class Step:
  162. FIELDMAP = {
  163. 'sid':'sid',
  164. 'tid':'tid',
  165. 'id':'id',
  166. 'pid':'pid',
  167. 'left':'l',
  168. 'right':'r',
  169. 'depth':'d',
  170. 'type':'type',
  171. 'name':'name',
  172. 'value':'value',
  173. }
  174. def __init__(self):
  175. self.conditional = None
  176. self.WHERE = []
  177. def getConstraints(self):
  178. C = []
  179. for c1,op,c2 in self.WHERE:
  180. C.append(["%s.%s" % (self.tab,c1), op, c2])
  181. return C
  182. def __getattr__(self, k):
  183. if k in self.FIELDMAP:
  184. return self.tab + "." + self.FIELDMAP[k]
  185. else:
  186. if hasattr(self, k):
  187. eval('self.' + k)
  188. else:
  189. raise(AttributeError("Step instance has no attribute '%s'" % k))
  190. class Trans:
  191. TR = {
  192. '_':'under',
  193. }
  194. def __init__(self, t, sn, pstep=None, tname='T', scope=None):
  195. assert(type(t) == Tree)
  196. assert(t.node == 'P' or t.node == 'ATT' or t.node == 'R')
  197. self.sn = sn
  198. self.pstep = pstep
  199. self.tname = tname
  200. if self.pstep:
  201. self.prefix = self.pstep.tab
  202. self.step = pstep
  203. else:
  204. self.prefix = ""
  205. self.WHERE = AND()
  206. self.WHEREs = [] # context stack
  207. #self.WHERE2 = [] # restrictions
  208. #self.WHERE3 = [] # inter-step constraints
  209. #self.WHERE3 = []
  210. #self.WHERE4 = [] # scopic constraints
  211. #self.WHERE5 = [] # alignment constraints
  212. #self.WHERE6 = [] # conditional axis
  213. self.steps = []
  214. self.scope = scope
  215. self._expand(t)
  216. if self.pstep:
  217. self._interpreteAxis(self.pstep, self.steps[0].axis, self.steps[0])
  218. def _getNewTableName(self):
  219. s = "%s%d" % (self.tname,self.sn,)
  220. self.sn.inc()
  221. return s
  222. def _beginGrp(self, cls=GRP):
  223. self.WHEREs.append(self.WHERE)
  224. self.WHERE = cls()
  225. def _finishGrp(self, cls=None):
  226. if cls is not None:
  227. self.WHEREs[-1].append(cls(self.WHERE))
  228. else:
  229. self.WHEREs[-1].append(self.WHERE)
  230. self.WHERE = self.WHEREs.pop()
  231. def getSql(self):
  232. if self.pstep:
  233. sql = "select 1 "
  234. else:
  235. sql = "select %s.* " % self.steps[-1].tab
  236. sql += "from %s " % ",".join([self.tname+" "+s.tab for s in self.steps])
  237. for s in self.steps:
  238. if not s.conditional:
  239. self.WHERE += s.getConstraints()
  240. for i,s in enumerate(self.steps[:-1]):
  241. s2 = self.steps[i+1]
  242. self._interpreteAxis(s, s2.axis, s2)
  243. w = unicode(self.WHERE).strip()
  244. if w: sql += "where %s" % w
  245. return sql
  246. def _expand(self, t):
  247. name = "_" + t.node
  248. for c in t:
  249. name += "_"
  250. if isinstance(c,str) or isinstance(c,unicode):
  251. name += self.TR[c]
  252. else:
  253. name += c.node
  254. return eval("self.%s" % name)(t)
  255. def _interpreteScope(self, scope, step):
  256. self.WHERE += [
  257. [scope.left, "<=", step.left],
  258. [scope.right, ">=", step.right]
  259. ]
  260. def _alignLeft(self, step1, step2):
  261. self.WHERE += [
  262. [step1.left, "=", step2.left],
  263. ]
  264. def _alignRight(self, step1, step2):
  265. self.WHERE += [
  266. [step1.right, "=", step2.right],
  267. ]
  268. def _interpreteAxis(self, step1, axis, step2):
  269. if step2.conditional is not None:
  270. if axis == '/':
  271. zWHERE = AND(
  272. ["z.sid", "=", step2.sid],
  273. ["z.tid", "=", step2.tid],
  274. ["z.left", ">=", step1.left],
  275. ["z.left", "<=", step2.left],
  276. ["z.right", "<=", step1.right],
  277. ["z.right", ">=", step2.right],
  278. ["z.depth", ">", step1.depth],
  279. ["z.depth", "<=", step2.depth],
  280. )
  281. if hasattr(step2, 'conditionalRestriction'):
  282. s = step2.conditionalRestriction.getSql()
  283. s = re.sub(" "+step2.tab+"\\.", " z.", s)
  284. s = "exists (%s)" % s
  285. zWHERE.append(NOT(GRP(flatten(step2.getConstraints()),s)))
  286. else:
  287. zWHERE.append(NOT(GRP(flatten(step2.getConstraints()))))
  288. self.WHERE += [
  289. [step1.sid, "=", step2.sid],
  290. [step1.tid, "=", step2.tid],
  291. ]
  292. if step2.conditional == '?':
  293. self.WHERE += [
  294. GRP(OR(
  295. [step1.id, "=", step2.id],
  296. AND([step1.id, "=", step2.pid]) +
  297. step2.getConstraints()
  298. ))
  299. ]
  300. elif step2.conditional == '+':
  301. self.WHERE += [
  302. [step1.left, "<=", step2.left],
  303. [step1.right, ">=", step2.right],
  304. [step1.depth, "<", step2.depth],
  305. "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE))
  306. ]
  307. elif step2.conditional == '*':
  308. self.WHERE += [
  309. GRP(OR(
  310. [step1.id, "=", step2.id],
  311. AND([step1.left, "<=", step2.left],
  312. [step1.right, ">=", step2.right],
  313. [step1.depth, "<", step2.depth],
  314. "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE)))
  315. ))
  316. ]
  317. elif axis == '\\':
  318. zWHERE = AND(
  319. ["z.sid", "=", step2.sid],
  320. ["z.tid", "=", step2.tid],
  321. ["z.left", "<=", step1.left],
  322. ["z.left", ">=", step2.left],
  323. ["z.right", ">=", step1.right],
  324. ["z.right", "<=", step2.right],
  325. ["z.depth", "<", step1.depth],
  326. ["z.depth", ">=", step2.depth],
  327. )
  328. if hasattr(step2, 'conditionalRestriction'):
  329. s = step2.conditionalRestriction.getSql()
  330. s = re.sub(" "+step2.tab+"\\.", " z.", s)
  331. s = "exists (%s)" % s
  332. zWHERE.append(NOT(GRP(flatten(step2.getConstraints()),s)))
  333. else:
  334. zWHERE.append(NOT(GRP(flatten(step2.getConstraints()))))
  335. self.WHERE += [
  336. [step1.sid, "=", step2.sid],
  337. [step1.tid, "=", step2.tid],
  338. ]
  339. if step2.conditional == '?':
  340. self.WHERE += [
  341. GRP(OR(
  342. [step1.id, "=", step2.id],
  343. AND([step1.pid, "=", step2.id]) +
  344. step2.getConstraints()
  345. ))
  346. ]
  347. elif step2.conditional == '+':
  348. self.WHERE += [
  349. [step1.left, ">=", step2.left],
  350. [step1.right, "<=", step2.right],
  351. [step1.depth, ">", step2.depth],
  352. "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE))
  353. ]
  354. elif step2.conditional == '*':
  355. self.WHERE += [
  356. GRP(OR(
  357. [step1.id, "=", step2.id],
  358. AND([step1.left, ">=", step2.left],
  359. [step1.right, "<=", step2.right],
  360. [step1.depth, ">", step2.depth],
  361. "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE)))
  362. ))
  363. ]
  364. elif axis == '->':
  365. cWHERE = AND(
  366. ["c.sid", "=", "z.sid"],
  367. ["c.tid", "=", "z.tid"],
  368. ["c.pid", "=", "z.id"]
  369. )
  370. wWHERE = AND(
  371. ["w.sid", "=", "z.sid"],
  372. ["w.tid", "=", "z.tid"],
  373. ["w.left", "<", "z.right"],
  374. ["w.right", ">", "z.left"],
  375. ["w.left", ">=", step1.right],
  376. ["w.right", "<=", step2.left],
  377. flatten(step2.getConstraints())
  378. )
  379. if hasattr(step2, 'conditionalRestriction'):
  380. s = step2.conditionalRestriction.getSql()
  381. s = re.sub(" "+step2.tab+"\\.", " w.", s)
  382. s = "exists (%s)" % s
  383. zWHERE.append(s)
  384. zWHERE = AND(
  385. ["z.sid", "=", step2.sid],
  386. ["z.tid", "=", step2.tid],
  387. ["z.left", ">=", step1.right],
  388. ["z.right", "<=", step2.left],
  389. NOT(GRP(flatten(step2.getConstraints()))),
  390. "not exists (select 1 from %s c where %s)" % (self.tname,unicode(cWHERE)),
  391. "not exists (select 1 from %s w where %s)" % (self.tname,unicode(wWHERE))
  392. )
  393. self.WHERE += [
  394. [step1.sid, "=", step2.sid],
  395. [step1.tid, "=", step2.tid],
  396. ]
  397. if step2.conditional == '?':
  398. self.WHERE += [
  399. GRP(OR(
  400. [step1.id, "=", step2.id],
  401. AND([step1.right, "=", step2.left],
  402. flatten(step2.getConstraints())
  403. )))
  404. ]
  405. elif step2.conditional == '+':
  406. self.WHERE += [
  407. [step1.right, "<=", step2.left],
  408. flatten(step2.getConstraints()),
  409. "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE))
  410. ]
  411. elif step2.conditional == '*':
  412. self.WHERE += [
  413. GRP(OR(
  414. [step1.id, "=", step2.id],
  415. GRP(AND(
  416. [step1.right, "<=", step2.left],
  417. flatten(step2.getConstraints()),
  418. "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE))
  419. ))))
  420. ]
  421. elif axis == '<-':
  422. cWHERE = AND(
  423. ["c.sid", "=", "z.sid"],
  424. ["c.tid", "=", "z.tid"],
  425. ["c.pid", "=", "z.id"]
  426. )
  427. wWHERE = AND(
  428. ["w.sid", "=", "z.sid"],
  429. ["w.tid", "=", "z.tid"],
  430. ["w.left", "<", "z.right"],
  431. ["w.right", ">", "z.left"],
  432. ["w.left", ">=", step2.right],
  433. ["w.right", "<=", step1.left],
  434. flatten(step2.getConstraints())
  435. )
  436. if hasattr(step2, 'conditionalRestriction'):
  437. s = step2.conditionalRestriction.getSql()
  438. s = re.sub(" "+step2.tab+"\\.", " w.", s)
  439. s = "exists (%s)" % s
  440. zWHERE.append(s)
  441. zWHERE = AND(
  442. ["z.sid", "=", step2.sid],
  443. ["z.tid", "=", step2.tid],
  444. ["z.left", ">=", step2.right],
  445. ["z.right", "<=", step1.left],
  446. NOT(GRP(flatten(step2.getConstraints()))),
  447. "not exists (select 1 from %s c where %s)" % (self.tname,unicode(cWHERE)),
  448. "not exists (select 1 from %s w where %s)" % (self.tname,unicode(wWHERE))
  449. )
  450. self.WHERE += [
  451. [step1.sid, "=", step2.sid],
  452. [step1.tid, "=", step2.tid],
  453. ]
  454. if step2.conditional == '?':
  455. self.WHERE += [
  456. GRP(OR(
  457. [step1.id, "=", step2.id],
  458. AND([step1.left, "=", step2.right],
  459. flatten(step2.getConstraints())
  460. )))
  461. ]
  462. elif step2.conditional == '+':
  463. self.WHERE += [
  464. [step1.left, ">=", step2.right],
  465. flatten(step2.getConstraints()),
  466. "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE))
  467. ]
  468. elif step2.conditional == '*':
  469. self.WHERE += [
  470. GRP(OR(
  471. [step1.id, "=", step2.id],
  472. GRP(AND(
  473. [step1.left, ">=", step2.right],
  474. flatten(step2.getConstraints()),
  475. "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE))
  476. ))))
  477. ]
  478. elif axis == '=>':
  479. cWHERE = AND(
  480. ["c.sid", "=", "z.sid"],
  481. ["c.tid", "=", "z.tid"],
  482. ["c.pid", "=", "z.id"]
  483. )
  484. wWHERE = AND(
  485. ["w.sid", "=", "z.sid"],
  486. ["w.tid", "=", "z.tid"],
  487. ["w.left", "<", "z.right"],
  488. ["w.right", ">", "z.left"],
  489. ["w.left", ">=", step1.right],
  490. ["w.right", "<=", step2.left],
  491. flatten(step2.getConstraints())
  492. )
  493. if hasattr(step2, 'conditionalRestriction'):
  494. s = step2.conditionalRestriction.getSql()
  495. s = re.sub(" "+step2.tab+"\\.", " w.", s)
  496. s = "exists (%s)" % s
  497. zWHERE.append(s)
  498. zWHERE = AND(
  499. ["z.sid", "=", step2.sid],
  500. ["z.tid", "=", step2.tid],
  501. ["z.left", ">=", step1.right],
  502. ["z.right", "<=", step2.left],
  503. NOT(GRP(flatten(step2.getConstraints()))),
  504. "not exists (select 1 from %s c where %s)" % (self.tname,unicode(cWHERE)),
  505. "not exists (select 1 from %s w where %s)" % (self.tname,unicode(wWHERE))
  506. )
  507. self.WHERE += [
  508. [step1.sid, "=", step2.sid],
  509. [step1.tid, "=", step2.tid],
  510. ]
  511. if step2.conditional == '?':
  512. self.WHERE += [
  513. GRP(OR(
  514. [step1.id, "=", step2.id],
  515. AND([step1.right, "=", step2.left],
  516. [step1.pid, "=", step2.pid],
  517. flatten(step2.getConstraints())
  518. )))
  519. ]
  520. elif step2.conditional == '+':
  521. self.WHERE += [
  522. [step1.right, "<=", step2.left],
  523. [step1.pid, "=", step2.pid],
  524. flatten(step2.getConstraints()),
  525. "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE))
  526. ]
  527. elif step2.conditional == '*':
  528. self.WHERE += [
  529. GRP(OR(
  530. [step1.id, "=", step2.id],
  531. GRP(AND(
  532. [step1.right, "<=", step2.left],
  533. [step1.pid, "=", step2.pid],
  534. flatten(step2.getConstraints()),
  535. "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE))
  536. ))))
  537. ]
  538. elif axis == '<=':
  539. cWHERE = AND(
  540. ["c.sid", "=", "z.sid"],
  541. ["c.tid", "=", "z.tid"],
  542. ["c.pid", "=", "z.id"]
  543. )
  544. wWHERE = AND(
  545. ["w.sid", "=", "z.sid"],
  546. ["w.tid", "=", "z.tid"],
  547. ["w.left", "<", "z.right"],
  548. ["w.right", ">", "z.left"],
  549. ["w.left", ">=", step2.right],
  550. ["w.right", "<=", step1.left],
  551. flatten(step2.getConstraints())
  552. )
  553. if hasattr(step2, 'conditionalRestriction'):
  554. s = step2.conditionalRestriction.getSql()
  555. s = re.sub(" "+step2.tab+"\\.", " w.", s)
  556. s = "exists (%s)" % s
  557. zWHERE.append(s)
  558. zWHERE = AND(
  559. ["z.sid", "=", step2.sid],
  560. ["z.tid", "=", step2.tid],
  561. ["z.left", ">=", step2.right],
  562. ["z.right", "<=", step1.left],
  563. NOT(GRP(flatten(step2.getConstraints()))),
  564. "not exists (select 1 from %s c where %s)" % (self.tname,unicode(cWHERE)),
  565. "not exists (select 1 from %s w where %s)" % (self.tname,unicode(wWHERE))
  566. )
  567. self.WHERE += [
  568. [step1.sid, "=", step2.sid],
  569. [step1.tid, "=", step2.tid],
  570. ]
  571. if step2.conditional == '?':
  572. self.WHERE += [
  573. GRP(OR(
  574. [step1.id, "=", step2.id],
  575. AND([step1.left, "=", step2.right],
  576. [step1.pid, "=", step2.pid],
  577. flatten(step2.getConstraints())
  578. )))
  579. ]
  580. elif step2.conditional == '+':
  581. self.WHERE += [
  582. [step1.left, ">=", step2.right],
  583. [step1.pid, "=", step2.pid],
  584. flatten(step2.getConstraints()),
  585. "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE))
  586. ]
  587. elif step2.conditional == '*':
  588. self.WHERE += [
  589. GRP(OR(
  590. [step1.id, "=", step2.id],
  591. GRP(AND(
  592. [step1.left, ">=", step2.right],
  593. [step1.pid, "=", step2.pid],
  594. flatten(step2.getConstraints()),
  595. "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE))
  596. ))))
  597. ]
  598. # normal (non-conditional) axis
  599. elif step2.conditional is None:
  600. if axis == '/':
  601. self.WHERE += [
  602. [step1.sid, "=", step2.sid],
  603. [step1.tid, "=", step2.tid],
  604. [step1.id, "=", step2.pid],
  605. ]
  606. elif axis == '//':
  607. self.WHERE += [
  608. [step1.sid, "=", step2.sid],
  609. [step1.tid, "=", step2.tid],
  610. [step1.left, "<=", step2.left],
  611. [step1.right, ">=", step2.right],
  612. [step1.depth, "<", step2.depth],
  613. ]
  614. elif axis == '\\':
  615. self.WHERE += [
  616. [step1.sid, "=", step2.sid],
  617. [step1.tid, "=", step2.tid],
  618. [step1.pid, "=", step2.id]
  619. ]
  620. elif axis == '\\\\':
  621. self.WHERE += [
  622. [step1.sid, "=", step2.sid],
  623. [step1.tid, "=", step2.tid],
  624. [step1.depth, ">", step2.depth],
  625. [step1.left, ">=", step2.left],
  626. [step1.right, "<=", step2.right]
  627. ]
  628. elif axis == '->':
  629. self.WHERE += [
  630. [step1.sid, "=", step2.sid],
  631. [step1.tid, "=", step2.tid],
  632. [step1.right, "=", step2.left],
  633. ]
  634. elif axis == '-->':
  635. self.WHERE += [
  636. [step1.sid, "=", step2.sid],
  637. [step1.tid, "=", step2.tid],
  638. [step1.right, "<=", step2.left],
  639. ]
  640. elif axis == '<-':
  641. self.WHERE += [
  642. [step1.sid, "=", step2.sid],
  643. [step1.tid, "=", step2.tid],
  644. [step1.left, "=", step2.right],
  645. ]
  646. elif axis == '<--':
  647. self.WHERE += [
  648. [step1.sid, "=", step2.sid],
  649. [step1.tid, "=", step2.tid],
  650. [step1.left, ">=", step2.right],
  651. ]
  652. elif axis == '=>':
  653. self.WHERE += [
  654. [step1.sid, "=", step2.sid],
  655. [step1.tid, "=", step2.tid],
  656. [step1.right, "=", step2.left],
  657. [step1.pid, "=", step2.pid]
  658. ]
  659. elif axis == '==>':
  660. self.WHERE += [
  661. [step1.sid, "=", step2.sid],
  662. [step1.tid, "=", step2.tid],
  663. [step1.right, "<=", step2.left],
  664. [step1.pid, "=", step2.pid]
  665. ]
  666. elif axis == '<=':
  667. self.WHERE += [
  668. [step1.sid, "=", step2.sid],
  669. [step1.tid, "=", step2.tid],
  670. [step1.left, "=", step2.right],
  671. [step1.pid, "=", step2.pid]
  672. ]
  673. elif axis == '<==':
  674. self.WHERE += [
  675. [step1.sid, "=", step2.sid],
  676. [step1.tid, "=", step2.tid],
  677. [step1.left, ">=", step2.right],
  678. [step1.pid, "=", step2.pid]
  679. ]
  680. elif axis == '.' or axis == '@':
  681. self.WHERE += [
  682. [step1.sid, "=", step2.sid],
  683. [step1.tid, "=", step2.tid],
  684. [step1.id, "=", step2.id]
  685. ]
  686. def _P_S(self, tree):
  687. self._expand(tree[0])
  688. def _P_P_S(self, tree):
  689. p = tree[0]
  690. s = tree[1]
  691. self._expand(p)
  692. self._expand(s)
  693. def _P_P_LCB_P_RCB(self, tree):
  694. p1 = tree[0]
  695. p2 = tree[2]
  696. self._expand(p1)
  697. oldscope = self.scope
  698. self.scope = self.step
  699. self._expand(p2)
  700. self.scope = oldscope
  701. def _S_S2(self, tree):
  702. self._expand(tree[0])
  703. def _S_LRB_S2_RRB_STAR(self, tree):
  704. s2 = tree[1]
  705. self.step.conditional = '*'
  706. self._expand(s2)
  707. def _S_LRB_S2_RRB_PLUS(self, tree):
  708. s2 = tree[1]
  709. self.step.conditional = '+'
  710. self._expand(s2)
  711. def _S_LRB_S2_RRB_OPT(self, tree):
  712. s2 = tree[1]
  713. self.step.conditional = '?'
  714. self._expand(s2)
  715. def _S2_S1(self, tree):
  716. self._expand(tree[0])
  717. def _S2_S1_LSB_R_RSB(self, tree):
  718. s1 = tree[0]
  719. r = tree[2]
  720. self._expand(s1)
  721. self._expand(r)
  722. def _S1_A_T(self, tree):
  723. a = tree[0]
  724. t = tree[1]
  725. self.step = Step()
  726. if self.steps:
  727. self.step.sn = self.steps[-1].sn + 1
  728. else:
  729. self.step.sn = 0
  730. self.step.axis = a[0]
  731. #self.step.tab = self.prefix + self.tname + str(self.step.sn)
  732. self.step.tab = self._getNewTableName()
  733. self.steps.append(self.step)
  734. self._expand(t)
  735. if self.scope:
  736. self._interpreteScope(self.scope, self.step)
  737. def _T_Qname(self, t):
  738. tag = t[0][0]
  739. if tag[0] == '^':
  740. tag = tag[1:]
  741. if len(self.steps) > 1:
  742. if self.scope:
  743. self._alignLeft(self.scope, self.step)
  744. if tag[-1] == '$':
  745. tag = tag[:-1]
  746. if len(self.steps) > 1:
  747. if self.scope:
  748. self._alignRight(self.scope, self.step)
  749. self.step.WHERE = [
  750. ['type','=',"'syn'"],
  751. ['name','=',"'%s'" % tag],
  752. ]
  753. def _T_under(self, t):
  754. self.step.WHERE = [
  755. ['type','=',"'syn'"]
  756. ]
  757. def _ATT_AT_Qname_C_Qname(self, t):
  758. self.step = Step()
  759. if self.steps:
  760. self.step.sn = self.steps[-1].sn + 1
  761. else:
  762. self.step.sn = 0
  763. self.step.axis = '@'
  764. #self.step.tab = self.prefix + self.tname + str(self.step.sn)
  765. self.step.tab = self._getNewTableName()
  766. self.steps.append(self.step)
  767. self.step.WHERE = [
  768. ['type','=',"'att'"],
  769. ['name','=',"'@%s'" % t[1][0]],
  770. ['value', " %s " % t[2][0],"'%s'" % t[3][0]],
  771. ]
  772. def _R_R_OR_R(self, t):
  773. self._beginGrp(OR)
  774. self._expand(t[0])
  775. self._expand(t[2])
  776. self._finishGrp()
  777. def _R_R_AND_R(self, t):
  778. self._beginGrp(AND)
  779. self._expand(t[0])
  780. self._expand(t[2])
  781. self._finishGrp()
  782. def _R_LRB_R_RRB(self, t):
  783. self._beginGrp(GRP)
  784. self._expand(t[1])
  785. self._finishGrp()
  786. def _R_P(self, t):
  787. tr = Trans(t[0], self.sn, self.step, self.tname, self.scope)
  788. self.WHERE.append(tr)
  789. def _R_LCB_R_RCB(self, t):
  790. oldscope = self.scope
  791. self.scope = self.step
  792. self._beginGrp()
  793. self._expand(t[1])
  794. self._finishGrp()
  795. self.scope = oldscope
  796. def _R_ATT(self, t):
  797. tr = Trans(t[0], self.sn, self.step, self.tname, self.scope)
  798. self.WHERE.append(tr)
  799. def _R_NOT_P(self, t):
  800. tr = Trans(t[1], self.sn, self.step, self.tname, self.scope)
  801. self.WHERE.append(NOT(GRP(tr)))
  802. def _R_NOT_ATT(self, t):
  803. tr = Trans(t[1], self.sn, self.step, self.tname, self.scope)
  804. self.WHERE.append(NOT(GRP(tr)))
  805. def _R_NOT_LRB_R_RRB(self, t):
  806. self._beginGrp(GRP)
  807. self._expand(t[2])
  808. self._finishGrp(NOT)
  809. class TransFlat(Trans):
  810. def getSql(self):
  811. if not hasattr(self,"steps2"):
  812. self.steps2 = []
  813. if self.pstep:
  814. sql = "select 1 "
  815. else:
  816. s = ",".join([x.tab+".*" for x in self.steps+self.steps2])
  817. sql = "select %s " % s
  818. sql += "from %s " % ",".join([self.tname+" "+s.tab for s in self.steps+self.steps2])
  819. for s in self.steps:
  820. if not s.conditional:
  821. self.WHERE += s.getConstraints()
  822. for i,s in enumerate(self.steps[:-1]):
  823. s2 = self.steps[i+1]
  824. self._interpreteAxis(s, s2.axis, s2)
  825. w = unicode(self.WHERE).strip()
  826. if w: sql += "where %s" % w
  827. return sql
  828. def _R_P(self, t):
  829. if not hasattr(self,'steps2'): self.steps2 = []
  830. tr = TransFlat(t[0], self.sn, self.step, self.tname, self.scope)
  831. self.steps2 += tr.steps
  832. for s in tr.steps:
  833. if not s.conditional:
  834. tr.WHERE += s.getConstraints()
  835. for i,s in enumerate(tr.steps[:-1]):
  836. s2 = tr.steps[i+1]
  837. tr._interpreteAxis(s, s2.axis, s2)
  838. self.WHERE.append(unicode(tr.WHERE).strip())
  839. def translate2(q,tname='T'):
  840. global T2, T3, T4, T5, T6, GR
  841. T2 = time.time()
  842. # tokenization
  843. l = tokenize(q)
  844. tokens = [a[1] for a in l]
  845. assert(tokens[0] == '//')
  846. T3 = time.time()
  847. # build grammar
  848. GR = grammar_text
  849. for typ, t in l:
  850. if typ == 's':
  851. GR += "Qname -> '" + t + "'\n"
  852. grammar = parse_cfg(GR)
  853. parser = parse.ChartParser(grammar, parse.TD_STRATEGY)
  854. T4 = time.time()
  855. # chart-parse the query
  856. trees = parser.nbest_parse(tokens)
  857. if not trees:
  858. T5 = T6 = time.time()
  859. return None, None
  860. tree = trees[0]
  861. T5 = time.time()
  862. # translate the parse tree
  863. r = Trans(tree,SerialNumber(),tname=tname).getSql()
  864. T6 = time.time()
  865. try:
  866. r1 = TransFlat(tree,SerialNumber(),tname=tname).getSql()
  867. except:
  868. r1 = None
  869. r1 = TransFlat(tree,SerialNumber(),tname=tname).getSql()
  870. return r, r1
  871. def translate(q,tname='T'):
  872. return translate2(q,tname)[0]
  873. def print_profile():
  874. print
  875. print " python startup: %6.3fs" % (T1-T0)
  876. print " query tokenization: %6.3fs" % (T3-T2)
  877. print " grammar parsing: %6.3fs" % (T4-T3)
  878. print " chart parsing: %6.3fs" % (T5-T4)
  879. print " translation: %6.3fs" % (T6-T5)
  880. print
  881. def get_profile():
  882. # tok/grammar/parsing/trans times
  883. return (T3-T2,T4-T3,T5-T4,T6-T5)
  884. def get_grammar():
  885. """
  886. Returns the CFG grammar that has recently been used.
  887. """
  888. return GR
  889. def get_base_grammar():
  890. """
  891. Returns the base LPath+ CFG grammar.
  892. """
  893. return grammar_text
  894. T1 = time.time()
  895. T2 = T3 = T4 = T5 = T6 = 0.0
  896. if __name__ == "__main__":
  897. import sys
  898. #l = tokenize('//A//B')
  899. q = '//A(/B[{//C->D$}])+'
  900. #l = tokenize('//A[{//B-->C}]')
  901. #l = tokenize('//A[//B or //C]')
  902. #l = tokenize('//S[//@lex="saw"]')
  903. #l = tokenize('//VP[//NP$]')
  904. #l = tokenize('//VP[{//^V->NP->PP$}]')
  905. #l = tokenize('//A//B//C')
  906. print translate2(sys.argv[1])[1]
  907. print_profile()
  908. #print get_grammar()