PageRenderTime 97ms CodeModel.GetById 43ms app.highlight 47ms RepoModel.GetById 1ms app.codeStats 0ms

/share/sphinx/ExtractRstFromSource.py

Relevant Search: With Applications for Solr and Elasticsearch

For more in depth reading about search, ranking and generally everything you could ever want to know about how lucene, elasticsearch or solr work under the hood I highly suggest this book. Easily one of the most interesting technical books I have read in a long time. If you are tasked with solving search relevance problems even if not in Solr or Elasticsearch it should be your first reference. Amazon Affiliate Link
http://github.com/imageworks/OpenColorIO
Python | 324 lines | 305 code | 9 blank | 10 comment | 4 complexity | 4b13f4d0801a870f32d832622f99e2a3 MD5 | raw file
  1#!/usr/bin/python
  2
  3"""
  4Small Script to extract reStructuredText from OCIO headers
  5    - http://sphinx.pocoo.org/rest.html
  6    - http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html
  7"""
  8
  9# TODO: extract void foo() { blah = 0 }; signatures correctly
 10# TODO: handle typedef and enums better
 11# TODO: handle OCIOEXPORT macro better
 12# TODO: handle thow() funcs better
 13
 14RUNTEST = False
 15
 16import re, sys
 17
 18single_rst_comment = r"//(?P<single_comment>(!cpp:|!rst::).*\n)"
 19block_rst_comment = r"/\*(?P<block_comment>(!cpp:|!rst::)([^*]*\*+)+?/)"
 20rst_comment_regex = re.compile(r"(%s)|(%s)" % (single_rst_comment, block_rst_comment), re.MULTILINE)
 21func_signature_regex = re.compile(r"(?P<sig_name>[^ ]*\(.*\))")
 22
 23rst_types = ["!rst::", "!cpp:class::", "!cpp:function::", "!cpp:member::",
 24             "!cpp:type::"]
 25
 26def getRstType(string):
 27    for rtype in rst_types:
 28        if string[0 : len(rtype)] == rtype:
 29            return rtype[1:]
 30    return None
 31
 32def getNextCodeLine(string, rst_type, from_pos):
 33    
 34    end = from_pos
 35    signature = ""
 36    
 37    if rst_type == "rst::":
 38        return signature, end
 39    
 40    if rst_type == "cpp:class::":
 41        
 42        class_open = False
 43        
 44        # first non-blank line that starts with 'class'
 45        found_signature = False
 46        
 47        # loop till the end of the class '};'
 48        ## skip other open/close '{' '}'
 49        skip_close = False
 50        x = end
 51        while x < len(string):
 52            if string[x] != '\n' and not found_signature:
 53                signature += string[x]
 54            if string[x] == '\n' and not found_signature:
 55                signature = signature.strip()
 56                if signature != '':
 57                    found_signature = True
 58                    signature = signature.replace("class", "")
 59                    # TODO: this seem a bit dirty
 60                    signature = signature.replace("OCIOEXPORT ", "")
 61                    signature = signature.strip()
 62                    signature = signature.split(' ', 1)[0]
 63            if string[x] == '{' and not class_open:
 64                class_open = True
 65            elif string[x] == '{' and class_open:
 66                skip_close = True
 67            elif string[x] == '}' and skip_close:
 68                skip_close = False
 69            elif string[x] == '}':
 70                end = x
 71                break
 72            x += 1
 73        return signature, end
 74    
 75    # else
 76    skip = False
 77    while string[end] != ";":
 78        if string[end] != ' ' and skip:
 79            skip = False
 80            signature += ' '
 81        if string[end] == '\n':
 82            skip = True
 83        if not skip:
 84            signature += string[end]
 85        end += 1
 86    signature += string[end]
 87    # TODO: this seem a bit dirty
 88    signature = signature.replace("OCIOEXPORT ", "")
 89    signature = signature.replace(" throw()", "")
 90    signature = signature.strip()
 91    if signature[len(signature)-1] == ';':
 92        signature = signature[:len(signature)-1]
 93    
 94    # hack hack hack
 95    if rst_type == "cpp:type::":
 96        if signature[:7] == "typedef":
 97            bits = signature.split()
 98            signature = bits[len(bits)-1]
 99        if signature[:4] == "enum":
100            bits = signature.split()
101            signature = bits[1]
102    
103    return signature, end
104
105def getNextCommentLine(string, from_pos, buffer = ""):
106    end = from_pos
107    tmp = ""
108    while string[end] != "\n":
109        tmp += string[end]
110        end += 1
111    tmp += string[end]
112    if tmp.lstrip()[:2] == "//":
113        if tmp.lstrip()[2:][0] == " ":
114            buffer += tmp.lstrip()[3:]
115        else:
116            buffer += tmp.lstrip()[2:]
117        buffer, end = getNextCommentLine(string, end+1, buffer)
118    else:
119        end = from_pos
120    return buffer, end
121
122class Comment:
123    
124    def __init__(self, comment, start, end):
125        self.comment = comment
126        self.start = start
127        self.end = end
128    
129    def getRstType(self):
130        return getRstType(self.comment)
131    
132    def __str__(self):
133        
134        buffer = self.comment
135        for rtype in rst_types:
136            if buffer[0 : len(rtype)] == rtype:
137                buffer =  buffer[len(rtype):]
138        
139        buffer_lines = buffer.splitlines()
140        buffer_lines[0] = buffer_lines[0].strip()
141        
142        if self.getRstType() == "rst::":
143            buffer_lines.append('')
144            buffer = '\n'.join(buffer_lines)
145            return buffer
146        
147        if buffer_lines[0] != '':
148            buffer_lines.insert(0, '')
149        for x in xrange(0, len(buffer_lines)):
150            buffer_lines[x] = "   %s" % buffer_lines[x]
151        buffer_lines.append('')
152        buffer = '\n'.join(buffer_lines)
153        
154        return buffer
155
156def ExtractRst(string, fileh):
157    
158    items = []
159    
160    for item in rst_comment_regex.finditer(string):
161        start, end = item.span()
162        itemdict = item.groupdict()
163        if itemdict["single_comment"] != None:
164            ##
165            buf = itemdict["single_comment"]
166            comment, end = getNextCommentLine(string, end)
167            buf += comment
168            ##
169            items.append(Comment(buf, start, end))
170        
171        elif itemdict["block_comment"] != None:
172            ##
173            itemdict["block_comment"] = \
174                itemdict["block_comment"][:len(itemdict["block_comment"])-2]
175            buf_lines = itemdict["block_comment"].splitlines()
176            indent = 0
177            if len(buf_lines) > 1:
178                for char in buf_lines[1]:
179                    if char != ' ':
180                        break
181                    indent += 1
182            # remove indent
183            bufa = [buf_lines[0]]
184            for x in xrange(1, len(buf_lines)):
185                bufa.append(buf_lines[x][indent:])
186            buf = '\n'.join(bufa) + '\n'
187            ##
188            items.append(Comment(buf, start, end))
189            
190    ##
191    fileh.write('\n')
192    namespaces = []
193    for thing in items:
194        rst_type = thing.getRstType()
195        
196        # .. cpp:function:: SomeClass::func2(const char * filename, std::istream& foo)
197        #    this is some of the documentation
198        #    for this function
199        
200        signature, end = getNextCodeLine(string, rst_type, thing.end)
201        
202        # if we are a class work out the begining and end so we can
203        # give function signatures the correct namespace
204        if rst_type == "cpp:class::":
205            tmp = { 'name': signature, 'start': thing.end, 'end': end }
206            namespaces.append(tmp)
207            fileh.write(".. %s %s\n" % (rst_type, signature) )
208        elif rst_type != "rst::":
209            for namespace in namespaces:
210                if end > namespace['start'] and end < namespace['end']:
211                    func = func_signature_regex.search(signature)
212                    funcpart = str(func.groupdict()["sig_name"])
213                    signature = signature.replace(funcpart, "%s::%s" % (namespace['name'], funcpart))
214                    break
215            fileh.write(".. %s %s\n" % (rst_type, signature) )
216        
217        fileh.write(str(thing))
218        
219        fileh.write('\n')
220        
221    fileh.flush()
222
223if __name__ == "__main__":
224    
225    if not RUNTEST:
226        
227        if len(sys.argv) <= 2:
228            sys.stderr.write("\nYou need to specify an input and output file\n\n")
229            sys.exit(1)
230        
231        src = open(sys.argv[1]).read()
232        output = file(sys.argv[2], 'w')
233        ExtractRst(src, output)
234        output.close()
235        
236    elif RUNTEST:
237        testdata = """
238
239//!rst:: -------------
240
241// this comment should be ignored
242
243//!rst:: foobar
244// this is apart of the same
245// comment
246
247// this is also ignored
248
249/* this is
250a block comment which is
251ignored */
252
253//!cpp:class::
254// this is a comment about the class
255class FooBar : public std::exception
256{
257    ...
258};
259
260/*!cpp:class::
261this is also a comment about this class
262*/
263   
264	
265class FooBar2 : public std::exception
266{
267    ...
268};
269
270/*!cpp:class::
271this is also a comment about this class with no new line */
272class FooBar3 : public std::exception
273{
274    ...
275};
276
277//!cpp:class::
278class SomeClass
279{
280public:
281    
282    //!cpp:function::
283    // this is some cool function for
284    // some purpose
285    //    this line is indented
286    static fooPtr func1();
287    
288    /*!cpp:function::
289    this is a much better func for some other
290    purpose
291          this is also indented */
292    static barPtr func2();
293    
294    /*!cpp:function:: this func wraps over two
295    lines which needs
296    to be caught
297    */
298    static weePtr func2(const char * filename,
299                        std::istream& foo);
300};
301
302//!cpp:function:: the class namespace should still get set correctly
303void foobar1();
304
305//!cpp:class:: this is some super informative
306// docs
307class SomeClass
308{
309public:
310    //!cpp:function:: the class namespace should still get set correctly
311    void foobar2();
312};
313
314//!cpp:function:: the class namespace should still get set correctly
315void foobar3();
316
317/*!rst:: this is a rst block
318**comment which needs**
319to be supported
320*/
321
322"""
323        ExtractRst(testdata)
324