PageRenderTime 64ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/parser4confluence.py

https://bitbucket.org/atlassian/gdpr
Python | 802 lines | 793 code | 2 blank | 7 comment | 0 complexity | 1319db0825c45daabb6fc20e4e2ccea3 MD5 | raw file
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Usage:
  5. parser4confluence.py -u <user> -f <filename> -d <database>
  6. """
  7. """
  8. __author__ = "Tomasz Dudzisz <tdudzisz@atlassian.com>"
  9. __author__ = "Rudy Slaiby <rslaiby@atlassian.com>"
  10. __author__ = "Tomasz Kanafa <tkanafa@atlassian.com>"
  11. __copyright__ = "Copyright 2018, Atlassian.com"
  12. __credits__ = []
  13. __license__ = "BSD"
  14. __version__ = "1.0.8"
  15. __status__ = "Development"
  16. __created__ = 11/04/2018
  17. __updated__ = 20/04/2018
  18. __project__ = gdpr-framework
  19. __file__ = parser4confluence.py
  20. __description__ =
  21. """
  22. from docopt import docopt
  23. from schema import Schema, And
  24. import sys, os, json, traceback
  25. class Opts(object):
  26. def __init__(self, __doc__):
  27. self.username = None
  28. self.filename = None
  29. self.database = None
  30. self.valid = False
  31. arguments = docopt(__doc__)
  32. self._validate(arguments)
  33. def _validate(self, arguments):
  34. schema = Schema({
  35. '-u': True,
  36. '<user>': And(str, len),
  37. '-f': True,
  38. '<filename>': And(str, len),
  39. '-d': True,
  40. '<database>': And(str, len),
  41. })
  42. args = schema.validate(arguments)
  43. self.username = args['<user>']
  44. self.username = self.username.strip('\'')
  45. self.filename = args['<filename>']
  46. self.database = args['<database>']
  47. self.valid = self.username and self.filename and self.database
  48. class SearchData(object):
  49. def __init__(self):
  50. self.username = None
  51. self.data = None
  52. self.where = "where"
  53. self.action = "action"
  54. self.origin = "origin"
  55. self.type = "type"
  56. self.description = "description"
  57. self.active = "active"
  58. self.db_mysql = "mysql"
  59. self.db_mssql = "mssql"
  60. self.db_oracle = "oracle"
  61. self.db_postgresql = "postgresql"
  62. self.user_order = True
  63. self.order = 1
  64. def execute(self):
  65. self.clear_output()
  66. for key, value in self.data.items():
  67. # self.show_metadata(key, value)
  68. self.save_query(key, value)
  69. self.order += 1
  70. continue
  71. def save_query(self, table, metadata):
  72. type = None
  73. comments = None
  74. sql = None
  75. origin = metadata[self.origin]
  76. description = metadata[self.description]
  77. if not metadata[self.active]:
  78. return
  79. for action in metadata[self.action]:
  80. type = action[self.type]
  81. comments = []
  82. comments.append("-- Type : {}".format(type))
  83. comments.append("-- Origin : {}".format(origin))
  84. comments.append("-- Description: {}".format(description))
  85. sql = self.get_sql_table(table, metadata)
  86. print(sql)
  87. self.save_to_file(self.database, self.add_comments(comments, sql), type, table)
  88. def add_comments(self, comment, sql):
  89. """
  90. add comments to sql
  91. """
  92. data = comment[:]
  93. data.append(sql)
  94. data = "\n".join(data)
  95. return data
  96. def get_order(self):
  97. if self.user_order:
  98. if self.order < 10:
  99. order = "0{}_".format(self.order)
  100. else:
  101. order = "{}_".format(self.order)
  102. return order
  103. else:
  104. return ""
  105. def save_to_file(self, database, data, type, table):
  106. try:
  107. file_name = "{}{}_{}.sql".format(self.get_order(), type, table.strip())
  108. path = "{}/{}/{}".format(self.output_path, database, file_name)
  109. f = open(path, "w")
  110. f.write(data)
  111. f.close()
  112. except Exception as e:
  113. print("Error saving file: {}".format(file_name))
  114. raise Exception(e)
  115. def show_metadata(self, key, value):
  116. print("\n{}".format(key))
  117. print(self.get_value("origin", value))
  118. print(self.get_value("description", value))
  119. print(self.get_value("action", value))
  120. for cname, cvalue in value[self.action].items():
  121. print("\t\t{0:<25}: {0:<30}".format(cname, cvalue))
  122. print("\t{}:".format(self.where))
  123. for cname, cvalue in value[self.where].items():
  124. print("\t\t{0:<25}: {0:<30}".format(cname, cvalue))
  125. def clear_output(self):
  126. """
  127. 1. create output dir if doesn't exist
  128. 2. remove a dir content in exists
  129. """
  130. path = "{}/{}".format(self.output_path, self.database)
  131. self.recreate_dir(path)
  132. def recreate_dir(self, path):
  133. if os.path.exists(path):
  134. print("Output directory found, cleaning: {}".format(path))
  135. # for the_file in os.listdir(path):
  136. # file_path = os.path.join(path, the_file)
  137. # try:
  138. # if os.path.isfile(file_path):
  139. # print("Removing: {}".format(file_path))
  140. # os.unlink(file_path)
  141. # except Exception as e:
  142. # print(e)
  143. else:
  144. print("Creating output dir: {}".format(self.output_path))
  145. os.makedirs(path, exist_ok=True)
  146. def parse_args(self):
  147. args = Opts(__doc__)
  148. if not args.valid:
  149. sys.exit(1)
  150. self.username = args.username
  151. self.data = self.read_json(args.filename)
  152. self.database = args.database
  153. base = os.path.basename(args.filename)
  154. self.output_path = "{}_{}".format(base.split('.')[0], "queries")
  155. def read_json(self, filename):
  156. if os.path.isfile(filename):
  157. content = open(filename, 'r').read()
  158. data = json.loads(content)
  159. return data
  160. print("No such file: {}".format(filename))
  161. sys.exit(1)
  162. def get_value(self, keyword, entry):
  163. if keyword in entry:
  164. return "\t{0:<33}: {0:<20}".format(keyword, entry[keyword])
  165. def get_column(self, entry):
  166. return entry
  167. def get_sql_clause(self, type, left_column, right_column):
  168. def exact_match_handler(left_column, right_column):
  169. return "{} = '{}' ".format(left_column, right_column)
  170. def regex_match_handler(left_column, right_column):
  171. return "{} like '{}' ".format(left_column, right_column)
  172. def none_handler(left_column, right_column): return "1 = 0".format(left_column, right_column)
  173. def exact_value_handler(left_column, right_column): return "{} = {} ".format(left_column, right_column)
  174. def free_text_handler(left_column, right_column): return "{} {} ".format(left_column, right_column)
  175. ret = {
  176. 'user_key': exact_match_handler,
  177. 'email': exact_match_handler,
  178. 'text': regex_match_handler,
  179. 'url': regex_match_handler,
  180. 'jql': regex_match_handler,
  181. 'audit_changed_value': regex_match_handler,
  182. 'audit_data': regex_match_handler,
  183. 'changeitem_string': regex_match_handler,
  184. 'changeitem_value': regex_match_handler,
  185. 'exact_value': exact_value_handler,
  186. 'free_text': free_text_handler,
  187. }.get(type, none_handler)(left_column, right_column)
  188. return ret
  189. def get_sql_where_clause(self, where_details):
  190. sql_where_clause = ''
  191. if where_details:
  192. for current_column, current_value in where_details.items():
  193. current_type = current_value['type']
  194. if current_type == 'nested':
  195. if "db_specific" in current_value and self.should_apply_db_specific(current_value):
  196. # apply the database specific query
  197. sql_where_clause = self.apply_db_specific(current_column, current_value)
  198. else:
  199. sql_where_clause += "and {} IN (select {} from {} where {})".format(current_column, current_value['column_select'], current_value['table'], self.get_sql_where_clause(current_value['value']))
  200. elif current_type == 'exact_value':
  201. # in case a we need to compare 2 columns
  202. sql_where_clause += "and {}".format(self.get_sql_clause(current_value['type'], current_column, current_value['value']))
  203. elif current_type == 'function':
  204. sql_where_clause += self.apply_function(current_column, current_value)
  205. else:
  206. sql_where_clause += 'and {}'.format(self.get_sql_clause(current_value['type'], current_column, self.username))
  207. else:
  208. sql_where_clause = "1 = 1"
  209. # Slice up the starting and
  210. sql_where_clause = self.slice_string(sql_where_clause, "and ")
  211. return sql_where_clause
  212. def slice_string(self, text, text_to_slice):
  213. if (len(text) > len(text_to_slice)) and (text.startswith(text_to_slice)):
  214. text = text[len(text_to_slice):]
  215. return text
  216. def should_apply_db_specific(self, current_value):
  217. if current_value['db_specific'] == self.database:
  218. return True
  219. return False
  220. def apply_function(self, current_column, current_value):
  221. function_name = current_value['function_name']
  222. function_params_text = ''
  223. all_params = current_value['parameters']
  224. for current_parameter in all_params:
  225. current_param_type = current_parameter['type']
  226. if current_param_type == 'nested':
  227. function_params_text += ", (select {} from {} where {})".format(current_parameter['column_select'], current_parameter['table'], self.get_sql_where_clause(current_parameter['value']))
  228. else:
  229. function_params_text += ", {}".format(current_parameter['value'])
  230. # Slice up the starting comma
  231. function_params_text = self.slice_string(function_params_text, ", ")
  232. sql_where_clause = "and {} = {}({})".format(current_column, function_name, function_params_text)
  233. return sql_where_clause
  234. def apply_db_specific(self, current_column, current_value):
  235. sql_where_clause = ""
  236. # construct the inner query
  237. sql_where_clause += "select {} from {} where {}".format(current_value['column_select'], current_value['table'], self.get_sql_where_clause(current_value['value']))
  238. # wrap it with the database specific statement
  239. sql_where_clause = self.replace_db_specific(current_value, sql_where_clause)
  240. # glue the first part of the where with the wrapped inner query
  241. sql_where_clause = "and {} IN {}".format(current_column, sql_where_clause)
  242. return sql_where_clause
  243. def replace_db_specific(self, current_value, sql_where_clause):
  244. # Slice up the starting and before wrapping it
  245. sql_where_clause = self.slice_string(sql_where_clause, "and ")
  246. wrapped_query = sql_where_clause
  247. # apply any database specific thing here
  248. if current_value['db_specific'] == self.db_postgresql:
  249. wrapped_query = current_value['wrap'].replace("$1", sql_where_clause)
  250. elif current_value['db_specific'] == self.db_mysql:
  251. wrapped_query = current_value['wrap'].replace("$1", sql_where_clause)
  252. elif current_value['db_specific'] == self.db_mssql:
  253. wrapped_query = current_value['wrap'].replace("$1", sql_where_clause)
  254. elif current_value['db_specific'] == self.db_oracle:
  255. wrapped_query = current_value['wrap'].replace("$1", sql_where_clause)
  256. return wrapped_query
  257. def build_insert_statements(self):
  258. if self.database == "oracle":
  259. return self.build_insert_statements_for_oracle()
  260. elif self.database == "mssql":
  261. return self.build_insert_statements_for_mssql()
  262. elif self.database == "mysql":
  263. return self.build_insert_statements_for_mysql()
  264. elif self.database == "postgresql":
  265. return self.build_insert_statements_for_postgres()
  266. def build_insert_statements_for_mssql(self):
  267. insert1 = """INSERT INTO journalentry (journal_name, creationdate, type, message) VALUES
  268. ('main_index', CURRENT_TIMESTAMP - 2,
  269. 'DELETE_CHANGE_DOCUMENTS', concat('com.atlassian.confluence.pages.Attachment-', (
  270. SELECT CONTENTID
  271. FROM CONTENT
  272. WHERE PAGEID IN (
  273. SELECT CONTENTID
  274. FROM CONTENT
  275. WHERE CONTENTTYPE = 'USERINFO' AND CONTENT.USERNAME IN (
  276. SELECT user_key
  277. FROM user_mapping
  278. WHERE username = '{}')))));""".format(self.username)
  279. insert2 = """INSERT INTO journalentry (journal_name, creationdate, type, message) VALUES
  280. ('main_index', CURRENT_TIMESTAMP - 2, 'DELETE_DOCUMENT',
  281. concat('com.atlassian.confluence.pages.Attachment-', (
  282. SELECT CONTENTID
  283. FROM CONTENT
  284. WHERE PAGEID IN (
  285. SELECT CONTENTID
  286. FROM CONTENT
  287. WHERE CONTENTTYPE = 'USERINFO' AND CONTENT.USERNAME IN (
  288. SELECT user_key
  289. FROM user_mapping
  290. WHERE username = '{}')))));""".format(self.username)
  291. insert3 = """INSERT INTO journalentry
  292. SELECT 'main_index' AS journal_name
  293. , CURRENT_TIMESTAMP - 2 AS creationdate
  294. , 'DELETE_DOCUMENT' AS type
  295. , concat('com.atlassian.confluence.user.PersonalInformation-', C2.CONTENTID) AS message
  296. FROM (
  297. SELECT CONTENTID
  298. FROM CONTENT
  299. WHERE CONTENTTYPE = 'USERINFO' AND CONTENT.USERNAME IN (
  300. SELECT user_key
  301. FROM user_mapping
  302. WHERE username = '{}')) AS C2;""".format(self.username)
  303. insert4 = """INSERT INTO journalentry
  304. SELECT 'main_index' AS journal_name
  305. , CURRENT_TIMESTAMP - 2 AS creationdate
  306. , 'DELETE_CHANGE_DOCUMENTS' AS type
  307. , concat('com.atlassian.confluence.user.PersonalInformation-', C2.CONTENTID) AS message
  308. FROM (
  309. SELECT CONTENTID
  310. FROM CONTENT
  311. WHERE CONTENTTYPE = 'USERINFO' AND CONTENT.USERNAME IN (
  312. SELECT user_key
  313. FROM user_mapping
  314. WHERE username = '{}')) AS C2;""".format(self.username)
  315. insert5 = """INSERT INTO journalentry
  316. SELECT 'main_index' AS journal_name
  317. , CURRENT_TIMESTAMP - 2 AS creationdate
  318. , type_name AS type
  319. , CASE
  320. WHEN (C2.CONTENTTYPE = 'PAGE')
  321. THEN concat('com.atlassian.confluence.pages.Page-', C2.CONTENTID)
  322. WHEN (C2.CONTENTTYPE = 'BLOGPOST')
  323. THEN concat('com.atlassian.confluence.pages.BlogPost-', C2.CONTENTID)
  324. WHEN (C2.CONTENTTYPE = 'COMMENT')
  325. THEN concat('com.atlassian.confluence.pages.Comment-', C2.CONTENTID)
  326. END AS message
  327. FROM (
  328. SELECT CONTENT.CONTENTID
  329. , CONTENT.CONTENTTYPE
  330. , BODYCONTENT.BODY
  331. FROM CONTENT
  332. JOIN BODYCONTENT
  333. ON CONTENT.CONTENTID = BODYCONTENT.CONTENTID
  334. WHERE CONTENTTYPE IN ('PAGE', 'BLOGPOST', 'COMMENT')
  335. AND CONTENT.PREVVER IS NULL
  336. AND CONTENT.CONTENT_STATUS = 'current'
  337. AND BODYCONTENT.BODY LIKE CONCAT('%<ri:user ri:userkey="', (
  338. SELECT user_key
  339. FROM user_mapping
  340. WHERE user_mapping.username = '{}'), '"%')
  341. ) AS C2
  342. JOIN (SELECT 'ADD_CHANGE_DOCUMENT' AS type_name
  343. UNION SELECT 'UPDATE_DOCUMENT' AS type_name) AS tmp_type
  344. ON (1 = 1);""".format(self.username)
  345. # return "{}\n\n{}\n\n{}\n\n{}".format(insert1, insert2, insert3, insert4)
  346. # remove the attachment journal entry for now, since it breaks when there are more than 1 attachment (and also breaks for 0 attachment)
  347. return "{}\n\n{}\n\n{}".format(insert3, insert4, insert5)
  348. def build_insert_statements_for_oracle(self):
  349. insert1 = """INSERT INTO journalentry (entry_id, journal_name, creationdate, type, message) VALUES
  350. (SEQ_JOURNAL_ENTRY_ID.nextval, 'main_index', CURRENT_TIMESTAMP - 2,
  351. 'DELETE_CHANGE_DOCUMENTS', concat('com.atlassian.confluence.pages.Attachment-', (
  352. SELECT CONTENTID
  353. FROM CONTENT
  354. WHERE PAGEID IN (
  355. SELECT CONTENTID
  356. FROM CONTENT
  357. WHERE CONTENTTYPE = 'USERINFO' AND CONTENT.USERNAME IN (
  358. SELECT user_key
  359. FROM user_mapping
  360. WHERE username = '{}')))));""".format(self.username)
  361. insert2 = """INSERT INTO journalentry (entry_id, journal_name, creationdate, type, message) VALUES
  362. (SEQ_JOURNAL_ENTRY_ID.nextval, 'main_index', CURRENT_TIMESTAMP - 2, 'DELETE_DOCUMENT',
  363. concat('com.atlassian.confluence.pages.Attachment-', (
  364. SELECT CONTENTID
  365. FROM CONTENT
  366. WHERE PAGEID IN (
  367. SELECT CONTENTID
  368. FROM CONTENT
  369. WHERE CONTENTTYPE = 'USERINFO' AND CONTENT.USERNAME IN (
  370. SELECT user_key
  371. FROM user_mapping
  372. WHERE username = '{}')))));""".format(self.username)
  373. insert3 = """INSERT INTO journalentry
  374. SELECT SEQ_JOURNAL_ENTRY_ID.nextval AS entry_id
  375. , 'main_index' AS journal_name
  376. , CURRENT_TIMESTAMP - 2 AS creationdate
  377. , 'DELETE_DOCUMENT' AS type
  378. , concat('com.atlassian.confluence.user.PersonalInformation-', CONTENTID) AS message
  379. FROM (
  380. SELECT CONTENTID
  381. FROM CONTENT
  382. WHERE CONTENTTYPE = 'USERINFO' AND CONTENT.USERNAME IN (
  383. SELECT user_key
  384. FROM user_mapping
  385. WHERE username = '{}'));""".format(self.username)
  386. insert4 = """INSERT INTO journalentry
  387. SELECT SEQ_JOURNAL_ENTRY_ID.nextval AS entry_id
  388. , 'main_index' AS journal_name
  389. , CURRENT_TIMESTAMP - 2 AS creationdate
  390. , 'DELETE_CHANGE_DOCUMENTS' AS type
  391. , concat('com.atlassian.confluence.user.PersonalInformation-', CONTENTID) AS message
  392. FROM (
  393. SELECT CONTENTID
  394. FROM CONTENT
  395. WHERE CONTENTTYPE = 'USERINFO' AND CONTENT.USERNAME IN (
  396. SELECT user_key
  397. FROM user_mapping
  398. WHERE username = '{}'));""".format(self.username)
  399. insert5 = """INSERT INTO journalentry
  400. SELECT SEQ_JOURNAL_ENTRY_ID.nextval AS entry_id
  401. , 'main_index' AS journal_name
  402. , CURRENT_TIMESTAMP - 2 AS creationdate
  403. , type_name AS type
  404. , CASE
  405. WHEN (CONTENTTYPE = 'PAGE')
  406. THEN concat('com.atlassian.confluence.pages.Page-', CONTENTID)
  407. WHEN (CONTENTTYPE = 'BLOGPOST')
  408. THEN concat('com.atlassian.confluence.pages.BlogPost-', CONTENTID)
  409. WHEN (CONTENTTYPE = 'COMMENT')
  410. THEN concat('com.atlassian.confluence.pages.Comment-', CONTENTID)
  411. END AS message
  412. FROM (
  413. SELECT CONTENT.CONTENTID
  414. , CONTENT.CONTENTTYPE
  415. FROM CONTENT
  416. JOIN BODYCONTENT
  417. ON CONTENT.CONTENTID = BODYCONTENT.CONTENTID
  418. WHERE CONTENTTYPE IN ('PAGE', 'BLOGPOST', 'COMMENT')
  419. AND CONTENT.PREVVER IS NULL
  420. AND CONTENT.CONTENT_STATUS = 'current'
  421. AND BODYCONTENT.BODY LIKE '%<ri:user ri:userkey="' || (
  422. SELECT user_key
  423. FROM user_mapping
  424. WHERE user_mapping.username = '{}') || '"%'
  425. )
  426. JOIN (SELECT 'ADD_CHANGE_DOCUMENT' type_name FROM dual
  427. UNION
  428. SELECT 'UPDATE_DOCUMENT' type_name FROM dual) tmp_type
  429. ON (1 = 1);""".format(self.username)
  430. # return "{}\n\n{}\n\n{}\n\n{}".format(insert1, insert2, insert3, insert4)
  431. # remove the attachment journal entry for now, since it breaks when there are more than 1 attachment (and also breaks for 0 attachment)
  432. return "{}\n\n{}\n\n{}".format(insert3, insert4, insert5)
  433. def build_insert_statements_for_mysql(self):
  434. insert1 = """INSERT INTO journalentry (JOURNAL_NAME, CREATIONDATE, TYPE, MESSAGE)
  435. VALUES ('main_index', CURRENT_TIMESTAMP() - INTERVAL 2 DAY, 'DELETE_CHANGE_DOCUMENTS', concat('com.atlassian.confluence.pages.Attachment-', (
  436. SELECT CONTENTID
  437. FROM CONTENT
  438. WHERE PAGEID IN (
  439. SELECT CONTENTID
  440. FROM CONTENT
  441. WHERE CONTENTTYPE = 'USERINFO' AND CONTENT.USERNAME IN (
  442. SELECT user_key
  443. FROM user_mapping
  444. WHERE username = '{}')))));""".format(self.username)
  445. insert2 = """INSERT INTO journalentry (JOURNAL_NAME, CREATIONDATE, TYPE, MESSAGE)
  446. VALUES ('main_index', CURRENT_TIMESTAMP() - INTERVAL 2 DAY, 'DELETE_DOCUMENT', concat('com.atlassian.confluence.pages.Attachment-', (
  447. SELECT CONTENTID
  448. FROM CONTENT
  449. WHERE PAGEID IN (
  450. SELECT CONTENTID
  451. FROM CONTENT
  452. WHERE CONTENTTYPE = 'USERINFO' AND CONTENT.USERNAME IN (
  453. SELECT user_key
  454. FROM user_mapping
  455. WHERE username = '{}')))));""".format(self.username)
  456. insert3 = """INSERT INTO journalentry
  457. SELECT @n := @n + 1 AS id
  458. , 'main_index' AS JOURNAL_NAME
  459. , CURRENT_TIMESTAMP() - INTERVAL 2 DAY AS CREATIONDATE
  460. , 'DELETE_DOCUMENT' AS TYPE
  461. , concat('com.atlassian.confluence.user.PersonalInformation-', tmp.CONTENTID) AS MESSAGE
  462. FROM (
  463. SELECT CONTENTID
  464. FROM CONTENT
  465. WHERE CONTENTTYPE = 'USERINFO' AND CONTENT.USERNAME IN (
  466. SELECT user_key
  467. FROM user_mapping
  468. WHERE username =
  469. '{}')) AS tmp,
  470. (
  471. SELECT @n := (
  472. SELECT max(ENTRY_ID)
  473. FROM journalentry)) AS parameter;""".format(self.username)
  474. insert4 = """INSERT INTO journalentry
  475. SELECT @n := @n + 1 AS id
  476. , 'main_index' AS JOURNAL_NAME
  477. , CURRENT_TIMESTAMP() - INTERVAL 2 DAY AS CREATIONDATE
  478. , 'DELETE_CHANGE_DOCUMENTS' AS TYPE
  479. , concat('com.atlassian.confluence.user.PersonalInformation-', tmp.CONTENTID) AS MESSAGE
  480. FROM (
  481. SELECT CONTENTID
  482. FROM CONTENT
  483. WHERE CONTENTTYPE = 'USERINFO' AND CONTENT.USERNAME IN (
  484. SELECT user_key
  485. FROM user_mapping
  486. WHERE username =
  487. '{}')) AS tmp,
  488. (
  489. SELECT @n := (
  490. SELECT max(ENTRY_ID)
  491. FROM journalentry)) AS parameter;""".format(self.username)
  492. insert5 = """INSERT INTO journalentry (
  493. SELECT @n := @n + 1 AS id
  494. , 'main_index' AS JOURNAL_NAME
  495. , CURRENT_TIMESTAMP() - INTERVAL 2 DAY AS CREATIONDATE
  496. , type_name AS TYPE
  497. , CASE tmp.CONTENTTYPE
  498. WHEN 'PAGE'
  499. THEN concat('com.atlassian.confluence.pages.Page-', tmp.CONTENTID)
  500. WHEN 'BLOGPOST'
  501. THEN concat('com.atlassian.confluence.pages.BlogPost-', tmp.CONTENTID)
  502. WHEN 'COMMENT'
  503. THEN concat('com.atlassian.confluence.pages.Comment-', tmp.CONTENTID)
  504. END AS message
  505. FROM (
  506. SELECT CONTENT.CONTENTID
  507. , CONTENT.CONTENTTYPE
  508. FROM CONTENT
  509. JOIN BODYCONTENT
  510. ON CONTENT.CONTENTID = BODYCONTENT.CONTENTID
  511. WHERE CONTENT.PREVVER IS NULL
  512. AND CONTENT.CONTENTTYPE IN ('PAGE', 'BLOGPOST', 'COMMENT')
  513. AND CONTENT.CONTENT_STATUS = 'current'
  514. AND BODYCONTENT.BODY LIKE concat('%<ri:user ri:userkey="', (
  515. SELECT user_key
  516. FROM user_mapping
  517. WHERE user_mapping.username = '{}'), '"%')) AS tmp
  518. JOIN (SELECT 'ADD_CHANGE_DOCUMENT' AS type_name
  519. UNION SELECT 'UPDATE_DOCUMENT' AS type_name) AS tmp_type
  520. ON (1 = 1)
  521. ,
  522. (
  523. SELECT @n := (
  524. SELECT max(ENTRY_ID)
  525. FROM journalentry)) AS parameter);""".format(self.username)
  526. # return "{}\n\n{}\n\n{}\n\n{}".format(insert1, insert2, insert3, insert4)
  527. # remove the attachment journal entry for now, since it breaks when there are more than 1 attachment (and also breaks for 0 attachment)
  528. return "{}\n\n{}\n\n{}".format(insert3, insert4, insert5)
  529. def build_insert_statements_for_postgres(self):
  530. insert1 = """INSERT INTO journalentry (entry_id, journal_name, creationdate, type, message) VALUES
  531. (nextval('seq_journal_entry_id'), 'main_index', (current_timestamp(0) - INTERVAL '2 day') :: TIMESTAMP,
  532. 'DELETE_CHANGE_DOCUMENTS', concat('com.atlassian.confluence.pages.Attachment-', (
  533. SELECT CONTENTID
  534. FROM CONTENT
  535. WHERE PAGEID IN (
  536. SELECT CONTENTID
  537. FROM CONTENT
  538. WHERE CONTENTTYPE = 'USERINFO' AND CONTENT.USERNAME IN (
  539. SELECT user_key
  540. FROM user_mapping
  541. WHERE username = '{}')))));""".format(self.username)
  542. insert2 = """INSERT INTO journalentry (entry_id, journal_name, creationdate, type, message) VALUES
  543. (nextval('seq_journal_entry_id'), 'main_index', (current_timestamp(0) - INTERVAL '2 day') :: TIMESTAMP, 'DELETE_DOCUMENT',
  544. concat('com.atlassian.confluence.pages.Attachment-', (
  545. SELECT CONTENTID
  546. FROM CONTENT
  547. WHERE PAGEID IN (
  548. SELECT CONTENTID
  549. FROM CONTENT
  550. WHERE CONTENTTYPE = 'USERINFO' AND CONTENT.USERNAME IN (
  551. SELECT user_key
  552. FROM user_mapping
  553. WHERE username = '{}')))));""".format(self.username)
  554. insert3 = """INSERT INTO journalentry
  555. SELECT nextval('seq_journal_entry_id') AS entry_id
  556. , 'main_index' AS journal_name
  557. , (current_timestamp(0) - INTERVAL '2 day') :: TIMESTAMP AS creationdate
  558. , 'DELETE_DOCUMENT' AS type
  559. , concat('com.atlassian.confluence.user.PersonalInformation-', C2.CONTENTID) AS message
  560. FROM (
  561. SELECT CONTENTID
  562. FROM CONTENT
  563. WHERE CONTENTTYPE = 'USERINFO' AND CONTENT.USERNAME IN (
  564. SELECT user_key
  565. FROM user_mapping
  566. WHERE username = '{}')) AS C2;""".format(self.username)
  567. insert4 = """INSERT INTO journalentry
  568. SELECT nextval('seq_journal_entry_id') AS entry_id
  569. , 'main_index' AS journal_name
  570. , (current_timestamp(0) - INTERVAL '2 day') :: TIMESTAMP AS creationdate
  571. , 'DELETE_CHANGE_DOCUMENTS' AS type
  572. , concat('com.atlassian.confluence.user.PersonalInformation-', C2.CONTENTID) AS message
  573. FROM (
  574. SELECT CONTENTID
  575. FROM CONTENT
  576. WHERE CONTENTTYPE = 'USERINFO' AND CONTENT.USERNAME IN (
  577. SELECT user_key
  578. FROM user_mapping
  579. WHERE username = '{}')) AS C2;""".format(self.username)
  580. insert5 = """INSERT INTO journalentry
  581. SELECT nextval('seq_journal_entry_id') AS entry_id
  582. , 'main_index' AS journal_name
  583. , (current_timestamp(0) - INTERVAL '2 day') :: TIMESTAMP AS creationdate
  584. , type_name AS type
  585. , CASE
  586. WHEN (C2.CONTENTTYPE = 'PAGE')
  587. THEN concat('com.atlassian.confluence.pages.Page-', C2.CONTENTID)
  588. WHEN (C2.CONTENTTYPE = 'BLOGPOST')
  589. THEN concat('com.atlassian.confluence.pages.BlogPost-', C2.CONTENTID)
  590. WHEN (C2.CONTENTTYPE = 'COMMENT')
  591. THEN concat('com.atlassian.confluence.pages.Comment-', C2.CONTENTID)
  592. END AS message
  593. FROM (
  594. SELECT CONTENT.CONTENTID
  595. , CONTENT.CONTENTTYPE
  596. FROM CONTENT
  597. INNER JOIN BODYCONTENT
  598. ON CONTENT.CONTENTID = BODYCONTENT.CONTENTID
  599. WHERE CONTENTTYPE IN ('PAGE', 'BLOGPOST', 'COMMENT')
  600. AND CONTENT.PREVVER IS NULL
  601. AND CONTENT.CONTENT_STATUS = 'current'
  602. AND BODYCONTENT.BODY LIKE CONCAT('%<ri:user ri:userkey="', (
  603. SELECT user_key
  604. FROM user_mapping
  605. WHERE user_mapping.username = '{}'), '"%')
  606. ) AS C2
  607. JOIN (SELECT 'ADD_CHANGE_DOCUMENT' AS type_name
  608. UNION SELECT 'UPDATE_DOCUMENT' AS type_name) AS tmp_type
  609. ON (1 = 1);""".format(self.username)
  610. # return "{}\n\n{}\n\n{}\n\n{}".format(insert1, insert2, insert3, insert4)
  611. # remove the attachment journal entry for now, since it breaks when there are more than 1 attachment (and also breaks for 0 attachment)
  612. return "{}\n\n{}\n\n{}".format(insert3, insert4, insert5)
  613. def get_sql_action(self, table_name, action):
  614. sql_action = ""
  615. action_type = action['type']
  616. if action_type == 'select':
  617. action_where = action['columns']
  618. sql_action = "{} {} from {}".format(action_type, action_where, table_name.strip())
  619. if self.database == self.db_postgresql:
  620. sql_action = '{} {} from "{}"'.format(action_type, action_where, table_name.strip())
  621. elif action_type == 'update':
  622. action_where = action['where']
  623. sql_action = "{} {} set {}".format(action_type, table_name.strip(), action_where)
  624. elif action_type == 'delete':
  625. sql_action = "{} from {}".format(action_type, table_name.strip())
  626. return sql_action
  627. def get_sql_table(self, table_name, table_description):
  628. actions = table_description[self.action]
  629. result = []
  630. final = ""
  631. for action in actions:
  632. if action['type'] == 'insert':
  633. final = self.build_insert_statements()
  634. else:
  635. action = self.get_sql_action(table_name, action)
  636. where = self.get_sql_where_clause(table_description[self.where])
  637. final = "{} where {};".format(action, where)
  638. result.append(final)
  639. result = "\n".join(result)
  640. return result
  641. if __name__ == '__main__':
  642. try:
  643. search = SearchData()
  644. search.parse_args()
  645. search.execute()
  646. except Exception as e:
  647. print("ERROR: {}".format(e))
  648. traceback.print_exc()
  649. sys.exit(1)