/reparseprops.py

https://github.com/brmson/Sentence-selection
Python | 95 lines | 71 code | 8 blank | 16 comment | 19 complexity | 1e4b3958be6c68f874c689f1581c6b1b MD5 | raw file
  1. # -*- coding: utf-8 -*-
  2. """
  3. Usage: reparseprops.py DATAPATH
  4. input=yodaqa csv outputs (properties)
  5. output=jacana formated files for use in save.py
  6. """
  7. import os
  8. import sys
  9. import glob
  10. QPATH="data/Qtrain.txt"
  11. PPATH="data/Ptrain.txt"
  12. NPATH="data/Ntrain.txt"
  13. CPATH1="data/Clues1train.txt"
  14. CPATH0="data/Clues0train.txt"
  15. #TPATH="data/curated-test"
  16. #TQPATH="data/Qtest.txt"
  17. #TPPATH="data/Ptest.txt"
  18. #TNPATH="data/Ntest.txt"
  19. #TCPATH1="data/Clues1test.txt"
  20. #TCPATH0="data/Clues0test.txt"
  21. def notNumber(s):
  22. try:
  23. float(s)
  24. return False
  25. except ValueError:
  26. return True
  27. def reparseProps(PATH,QPATH,PPATH,NPATH,CPATH1,CPATH0):
  28. q=open(QPATH,'w')
  29. p=open(PPATH,'w')
  30. n=open(NPATH,'w')
  31. cp=open(CPATH1,'w')
  32. cn=open(CPATH0,'w')
  33. qnum=0
  34. for path in glob.glob(PATH + '/*'):
  35. i=0
  36. p.write("<A "+str(qnum)+">\n")
  37. n.write("<A "+str(qnum)+">\n")
  38. propdict=dict()
  39. propset=set()
  40. with open(path,'r') as f:
  41. for line in f:
  42. s=line.split(" ")
  43. if(s[0]!="<Q>"):
  44. s=line.split(" ")
  45. text=" ".join(s[2:]).lower()
  46. if text in propdict:
  47. if(s[0]=='1'):
  48. propdict[text]='1'
  49. continue
  50. propdict[text]=s[0]
  51. with open(path,'r') as f:
  52. for line in f:
  53. s=line.split(" ")
  54. if(s[0]=="<Q>" and i==0):
  55. q.write("<Q "+str(qnum)+">\n")
  56. q.write(" ".join(s[1:]))
  57. q.write("</Q>\n")
  58. i+=1
  59. qnum+=1
  60. continue
  61. elif(s[0]=="<Q>" and i!=0):
  62. continue
  63. if notNumber(s[0]) or notNumber(s[1]):
  64. continue
  65. # print s
  66. text=" ".join(s[2:]).lower()
  67. if text not in propset:
  68. # print text
  69. if(propdict[text]=='1'):
  70. p.write(text)
  71. cp.write(" ".join(s[1:2])+"\n")
  72. else:
  73. n.write(text)
  74. cn.write(" ".join(s[1:2])+"\n")
  75. propset.add(text)
  76. p.write("</A>\n")
  77. n.write("</A>\n")
  78. print ".",
  79. q.close()
  80. p.close()
  81. n.close()
  82. cp.close()
  83. cn.close()
  84. PATH = sys.argv[1]
  85. reparseProps(PATH,QPATH,PPATH,NPATH,CPATH1,CPATH0)
  86. #reparseProps(TPATH,TQPATH,TPPATH,TNPATH,TCPATH1,TCPATH0)