PageRenderTime 61ms CodeModel.GetById 13ms app.highlight 42ms RepoModel.GetById 1ms app.codeStats 1ms

/indra/llmessage/llmessagetemplateparser.cpp

https://bitbucket.org/lindenlab/viewer-beta/
C++ | 761 lines | 615 code | 75 blank | 71 comment | 161 complexity | cb1db1c274742306b50ff45bfe4ecfb6 MD5 | raw file
  1/** 
  2 * @file llmessagetemplateparser.cpp
  3 * @brief LLMessageTemplateParser implementation
  4 *
  5 * $LicenseInfo:firstyear=2007&license=viewerlgpl$
  6 * Second Life Viewer Source Code
  7 * Copyright (C) 2010, Linden Research, Inc.
  8 * 
  9 * This library is free software; you can redistribute it and/or
 10 * modify it under the terms of the GNU Lesser General Public
 11 * License as published by the Free Software Foundation;
 12 * version 2.1 of the License only.
 13 * 
 14 * This library is distributed in the hope that it will be useful,
 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 17 * Lesser General Public License for more details.
 18 * 
 19 * You should have received a copy of the GNU Lesser General Public
 20 * License along with this library; if not, write to the Free Software
 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 22 * 
 23 * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
 24 * $/LicenseInfo$
 25 */
 26
 27#include "linden_common.h"
 28#include "llmessagetemplateparser.h"
 29#include <boost/tokenizer.hpp>
 30
 31
 32// What follows is a bunch of C functions to do validation.
 33
 34// Lets support a small subset of regular expressions here
 35// Syntax is a string made up of:
 36//	a	- checks against alphanumeric				([A-Za-z0-9])
 37//	c	- checks against character					([A-Za-z])
 38//	f	- checks against first variable character	([A-Za-z_])
 39//	v	- checks against variable					([A-Za-z0-9_])
 40//	s	- checks against sign of integer			([-0-9])
 41//  d	- checks against integer digit				([0-9])
 42//  *	- repeat last check
 43
 44// checks 'a'
 45BOOL	b_return_alphanumeric_ok(char c)
 46{
 47	if (  (  (c < 'A')
 48		   ||(c > 'Z'))
 49		&&(  (c < 'a')
 50		   ||(c > 'z'))
 51		&&(  (c < '0')
 52		   ||(c > '9')))
 53	{
 54		return FALSE;
 55	}
 56	return TRUE;
 57}
 58
 59// checks 'c'
 60BOOL	b_return_character_ok(char c)
 61{
 62	if (  (  (c < 'A')
 63		   ||(c > 'Z'))
 64		&&(  (c < 'a')
 65		   ||(c > 'z')))
 66	{
 67		return FALSE;
 68	}
 69	return TRUE;
 70}
 71
 72// checks 'f'
 73BOOL	b_return_first_variable_ok(char c)
 74{
 75	if (  (  (c < 'A')
 76		   ||(c > 'Z'))
 77		&&(  (c < 'a')
 78		   ||(c > 'z'))
 79		&&(c != '_'))
 80	{
 81		return FALSE;
 82	}
 83	return TRUE;
 84}
 85
 86// checks 'v'
 87BOOL	b_return_variable_ok(char c)
 88{
 89	if (  (  (c < 'A')
 90		   ||(c > 'Z'))
 91		&&(  (c < 'a')
 92		   ||(c > 'z'))
 93		&&(  (c < '0')
 94		   ||(c > '9'))
 95		&&(c != '_'))
 96	{
 97		return FALSE;
 98	}
 99	return TRUE;
100}
101
102// checks 's'
103BOOL	b_return_signed_integer_ok(char c)
104{
105	if (  (  (c < '0')
106		   ||(c > '9'))
107		&&(c != '-'))
108	{
109		return FALSE;
110	}
111	return TRUE;
112}
113
114// checks 'd'
115BOOL	b_return_integer_ok(char c)
116{
117	if (  (c < '0')
118		||(c > '9'))
119	{
120		return FALSE;
121	}
122	return TRUE;
123}
124
125BOOL	(*gParseCheckCharacters[])(char c) =
126{
127	b_return_alphanumeric_ok,
128	b_return_character_ok,
129	b_return_first_variable_ok,
130	b_return_variable_ok,
131	b_return_signed_integer_ok,
132	b_return_integer_ok
133};
134
135S32 get_checker_number(char checker)
136{
137	switch(checker)
138	{
139	case 'a':
140		return 0;
141	case 'c':
142		return 1;
143	case 'f':
144		return 2;
145	case 'v':
146		return 3;
147	case 's':
148		return 4;
149	case 'd':
150		return 5;
151	case '*':
152		return 9999;
153	default:
154		return -1;
155	}
156}
157
158// check token based on passed simplified regular expression
159BOOL	b_check_token(const char *token, const char *regexp)
160{
161	S32 tptr, rptr = 0;
162	S32 current_checker, next_checker = 0;
163
164	current_checker = get_checker_number(regexp[rptr++]);
165
166	if (current_checker == -1)
167	{
168		llerrs << "Invalid regular expression value!" << llendl;
169		return FALSE;
170	}
171
172	if (current_checker == 9999)
173	{
174		llerrs << "Regular expression can't start with *!" << llendl;
175		return FALSE;
176	}
177
178	for (tptr = 0; token[tptr]; tptr++)
179	{
180		if (current_checker == -1)
181		{
182			llerrs << "Input exceeds regular expression!\nDid you forget a *?" << llendl;
183			return FALSE;
184		}
185
186		if (!gParseCheckCharacters[current_checker](token[tptr]))
187		{
188			return FALSE;
189		}
190		if (next_checker != 9999)
191		{
192			next_checker = get_checker_number(regexp[rptr++]);
193			if (next_checker != 9999)
194			{
195				current_checker = next_checker;
196			}
197		}
198	}
199	return TRUE;
200}
201
202// C variable can be made up of upper or lower case letters, underscores, or numbers, but can't start with a number
203BOOL	b_variable_ok(const char *token)
204{
205	if (!b_check_token(token, "fv*"))
206	{
207		llwarns << "Token '" << token << "' isn't a variable!" << llendl;
208		return FALSE;
209	}
210	return TRUE;
211}
212
213// An integer is made up of the digits 0-9 and may be preceded by a '-'
214BOOL	b_integer_ok(const char *token)
215{
216	if (!b_check_token(token, "sd*"))
217	{
218		llwarns << "Token isn't an integer!" << llendl;
219		return FALSE;
220	}
221	return TRUE;
222}
223
224// An integer is made up of the digits 0-9
225BOOL	b_positive_integer_ok(const char *token)
226{
227	if (!b_check_token(token, "d*"))
228	{
229		llwarns << "Token isn't an integer!" << llendl;
230		return FALSE;
231	}
232	return TRUE;
233}
234
235
236// Done with C functions, here's the tokenizer.
237
238typedef boost::tokenizer< boost::char_separator<char> > tokenizer;	
239
240LLTemplateTokenizer::LLTemplateTokenizer(const std::string & contents) : mStarted(false), mTokens()
241{
242	boost::char_separator<char> newline("\r\n", "", boost::keep_empty_tokens);
243	boost::char_separator<char> spaces(" \t");
244	U32 line_counter = 1;
245	
246	tokenizer line_tokens(contents, newline);
247	for(tokenizer::iterator line_iter = line_tokens.begin();
248		line_iter != line_tokens.end();
249		++line_iter, ++line_counter)
250	{
251		tokenizer word_tokens(*line_iter, spaces);
252		for(tokenizer::iterator word_iter = word_tokens.begin();
253			word_iter != word_tokens.end();
254			++word_iter)
255		{
256			if((*word_iter)[0] == '/')
257			{
258				break;   // skip to end of line on comments
259			}
260			positioned_token pt;// = new positioned_token();
261			pt.str = std::string(*word_iter);
262			pt.line = line_counter;
263			mTokens.push_back(pt);
264		}
265	}
266	mCurrent = mTokens.begin();
267}
268void LLTemplateTokenizer::inc()
269{
270	if(atEOF())
271	{
272		error("trying to increment token of EOF");
273	}
274	else if(mStarted)
275	{
276		++mCurrent;
277	}
278	else
279	{
280		mStarted = true;
281		mCurrent = mTokens.begin();
282	}
283}
284void LLTemplateTokenizer::dec()
285{
286	if(mCurrent == mTokens.begin())
287	{
288		if(mStarted)
289		{
290			mStarted = false;
291		}
292		else
293		{
294			error("trying to decrement past beginning of file");
295		}
296	}
297	else
298	{
299		mCurrent--;
300	}
301}
302
303std::string LLTemplateTokenizer::get() const
304{
305	if(atEOF())
306	{
307		error("trying to get EOF");
308	}
309	return mCurrent->str;
310}
311
312U32 LLTemplateTokenizer::line() const
313{
314	if(atEOF())
315	{
316		return 0;
317	}
318	return mCurrent->line;
319}
320
321bool LLTemplateTokenizer::atEOF() const
322{
323	return mCurrent == mTokens.end();
324}
325
326std::string LLTemplateTokenizer::next()
327{
328	inc();
329	return get();
330}
331
332bool LLTemplateTokenizer::want(const std::string & token)
333{
334	if(atEOF()) return false;
335	inc();
336	if(atEOF()) return false;
337	if(get() != token)
338	{
339		dec(); // back up a step
340		return false;
341	}
342	return true;
343}
344
345bool LLTemplateTokenizer::wantEOF()
346{
347	// see if the next token is EOF
348	if(atEOF()) return true;
349	inc();
350	if(!atEOF())
351	{
352		dec(); // back up a step
353		return false;
354	}
355	return true;
356}
357
358void LLTemplateTokenizer::error(std::string message) const
359{
360	if(atEOF())
361	{
362		llerrs << "Unexpected end of file: " << message << llendl;
363	}
364	else
365	{
366		llerrs << "Problem parsing message template at line "
367			   << line() << ", with token '" << get() << "' : "
368			   << message << llendl;
369	}
370}
371
372
373// Done with tokenizer, next is the parser.
374
375LLTemplateParser::LLTemplateParser(LLTemplateTokenizer & tokens):
376	mVersion(0.f),
377	mMessages()
378{
379	// the version number should be the first thing in the file
380	if (tokens.want("version"))
381	{
382		// version number
383		std::string vers_string = tokens.next();
384		mVersion = (F32)atof(vers_string.c_str());
385		
386		llinfos << "### Message template version " << mVersion << "  ###" << llendl;
387	}
388	else
389	{
390		llerrs << "Version must be first in the message template, found "
391			   << tokens.next() << llendl;
392	}
393
394	while(LLMessageTemplate * templatep = parseMessage(tokens))
395	{
396		if (templatep->getDeprecation() != MD_DEPRECATED)
397		{
398			mMessages.push_back(templatep);
399		}
400		else
401		{
402			delete templatep;
403		}
404	}
405
406	if(!tokens.wantEOF())
407	{
408		llerrs << "Expected end of template or a message, instead found: "
409			   << tokens.next() << " at " << tokens.line() << llendl;
410	}
411}
412
413F32 LLTemplateParser::getVersion() const
414{
415	return mVersion;
416}
417
418LLTemplateParser::message_iterator LLTemplateParser::getMessagesBegin() const
419{
420	return mMessages.begin();
421}
422
423LLTemplateParser::message_iterator LLTemplateParser::getMessagesEnd() const
424{
425	return mMessages.end();
426}
427
428
429// static
430LLMessageTemplate * LLTemplateParser::parseMessage(LLTemplateTokenizer & tokens)
431{
432	LLMessageTemplate	*templatep = NULL;
433	if(!tokens.want("{"))
434	{
435		return NULL;
436	}
437
438	// name first
439	std::string template_name = tokens.next();
440	
441	// is name a legit C variable name
442	if (!b_variable_ok(template_name.c_str()))
443	{
444		llerrs << "Not legit variable name: " << template_name << " at " << tokens.line() << llendl;
445	}
446
447	// ok, now get Frequency ("High", "Medium", or "Low")
448	EMsgFrequency frequency = MFT_LOW;
449	std::string freq_string = tokens.next();
450	if (freq_string == "High")
451	{
452		frequency = MFT_HIGH;
453	}
454	else if (freq_string == "Medium")
455	{
456		frequency = MFT_MEDIUM;
457	}
458	else if (freq_string == "Low" || freq_string == "Fixed")
459	{
460		frequency = MFT_LOW;
461	}
462	else
463	{
464		llerrs << "Expected frequency, got " << freq_string << " at " << tokens.line() << llendl;
465	}
466
467	// TODO more explicit checking here pls
468	U32 message_number = strtoul(tokens.next().c_str(),NULL,0);
469
470	switch (frequency) {
471	case MFT_HIGH:
472		break;
473	case MFT_MEDIUM:
474		message_number = (255 << 8) | message_number;
475		break;
476	case MFT_LOW:
477		message_number = (255 << 24) | (255 << 16) | message_number;
478		break;
479	default:
480		llerrs << "Unknown frequency enum: " << frequency << llendl;
481	}
482   
483	templatep = new LLMessageTemplate(
484		template_name.c_str(),
485		message_number,
486		frequency);
487		
488	// Now get trust ("Trusted", "NotTrusted")
489	std::string trust = tokens.next();
490	if (trust == "Trusted")
491	{
492		templatep->setTrust(MT_TRUST);
493	}
494	else if (trust == "NotTrusted")
495	{
496		templatep->setTrust(MT_NOTRUST);
497	}
498	else
499	{
500		llerrs << "Bad trust " << trust << " at " << tokens.line() << llendl;
501	}
502	
503	// get encoding
504	std::string encoding = tokens.next();
505	if(encoding == "Unencoded")
506	{
507		templatep->setEncoding(ME_UNENCODED);
508	}
509	else if(encoding == "Zerocoded")
510	{
511		templatep->setEncoding(ME_ZEROCODED);
512	}
513	else
514	{
515		llerrs << "Bad encoding " << encoding << " at " << tokens.line() << llendl;
516	}
517
518	// get deprecation
519	if(tokens.want("Deprecated"))
520	{
521		templatep->setDeprecation(MD_DEPRECATED);
522	}
523	else if (tokens.want("UDPDeprecated"))
524	{
525		templatep->setDeprecation(MD_UDPDEPRECATED);
526	}
527	else if (tokens.want("UDPBlackListed"))
528	{
529		templatep->setDeprecation(MD_UDPBLACKLISTED);
530	}
531	else if (tokens.want("NotDeprecated"))
532	{
533		// this is the default value, but it can't hurt to set it twice
534		templatep->setDeprecation(MD_NOTDEPRECATED);
535	}
536	else {
537		// It's probably a brace, let's just start block processing
538	}
539
540	while(LLMessageBlock * blockp = parseBlock(tokens))
541	{
542		templatep->addBlock(blockp);
543	}
544	
545	if(!tokens.want("}"))
546	{
547		llerrs << "Expecting closing } for message " << template_name
548			   << " at " << tokens.line() << llendl;
549	}
550	return templatep;
551}
552
553// static
554LLMessageBlock * LLTemplateParser::parseBlock(LLTemplateTokenizer & tokens)
555{
556	LLMessageBlock * blockp = NULL;
557
558	if(!tokens.want("{"))
559	{
560		return NULL;
561	}
562
563	// name first
564	std::string block_name = tokens.next();
565
566	// is name a legit C variable name
567	if (!b_variable_ok(block_name.c_str()))
568	{
569		llerrs << "not a legal block name: " << block_name
570			   << " at " << tokens.line() << llendl;
571	}
572
573	// now, block type ("Single", "Multiple", or "Variable")
574	std::string block_type = tokens.next();
575	// which one is it?
576	if (block_type == "Single")
577	{
578		// ok, we can create a block
579		blockp = new LLMessageBlock(block_name.c_str(), MBT_SINGLE);
580	}
581	else if (block_type == "Multiple")
582	{
583		// need to get the number of repeats
584		std::string repeats = tokens.next();
585		
586		// is it a legal integer
587		if (!b_positive_integer_ok(repeats.c_str()))
588		{
589			llerrs << "not a legal integer for block multiple count: "
590				   << repeats << " at " << tokens.line() << llendl;
591		}
592		
593		// ok, we can create a block
594		blockp = new LLMessageBlock(block_name.c_str(),
595									MBT_MULTIPLE,
596									atoi(repeats.c_str()));
597	}
598	else if (block_type == "Variable")
599	{
600		// ok, we can create a block
601		blockp = new LLMessageBlock(block_name.c_str(), MBT_VARIABLE);
602	}
603	else
604	{
605		llerrs << "bad block type: " << block_type
606			   << " at " << tokens.line() << llendl;
607	}
608
609
610	while(LLMessageVariable * varp = parseVariable(tokens))
611	{
612		blockp->addVariable(varp->getName(),
613							varp->getType(),
614							varp->getSize());
615		delete varp;
616	}
617
618	if(!tokens.want("}"))
619	{
620		llerrs << "Expecting closing } for block " << block_name
621			   << " at " << tokens.line() << llendl;
622	}
623	return blockp;
624   
625}
626
627// static
628LLMessageVariable * LLTemplateParser::parseVariable(LLTemplateTokenizer & tokens)
629{
630	LLMessageVariable * varp = NULL;
631	if(!tokens.want("{"))
632	{
633		return NULL;
634	}
635
636	std::string var_name = tokens.next();
637
638	if (!b_variable_ok(var_name.c_str()))
639	{
640		llerrs << "Not a legit variable name: " << var_name
641			   << " at " << tokens.line() << llendl;
642	}
643
644	std::string var_type = tokens.next();
645
646	if (var_type == "U8")
647	{
648		varp = new LLMessageVariable(var_name.c_str(), MVT_U8, 1);					
649	}
650	else if (var_type == "U16")
651	{
652		varp = new LLMessageVariable(var_name.c_str(), MVT_U16, 2);					
653	}
654	else if (var_type == "U32")
655	{
656		varp = new LLMessageVariable(var_name.c_str(), MVT_U32, 4);					
657	}
658	else if (var_type == "U64")
659	{
660		varp = new LLMessageVariable(var_name.c_str(), MVT_U64, 8);					
661	}
662	else if (var_type == "S8")
663	{
664		varp = new LLMessageVariable(var_name.c_str(), MVT_S8, 1);					
665	}
666	else if (var_type == "S16")
667	{
668		varp = new LLMessageVariable(var_name.c_str(), MVT_S16, 2);					
669	}
670	else if (var_type == "S32")
671	{
672		varp = new LLMessageVariable(var_name.c_str(), MVT_S32, 4);					
673	}
674	else if (var_type == "S64")
675	{
676		varp = new LLMessageVariable(var_name.c_str(), MVT_S64, 8);					
677	}
678	else if (var_type == "F32")
679	{
680		varp = new LLMessageVariable(var_name.c_str(), MVT_F32, 4);					
681	}
682	else if (var_type == "F64")
683	{
684		varp = new LLMessageVariable(var_name.c_str(), MVT_F64, 8);					
685	}
686	else if (var_type == "LLVector3")
687	{
688		varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector3, 12);					
689	}
690	else if (var_type == "LLVector3d")
691	{
692		varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector3d, 24);
693	}
694	else if (var_type == "LLVector4")
695	{
696		varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector4, 16);					
697	}
698	else if (var_type == "LLQuaternion")
699	{
700		varp = new LLMessageVariable(var_name.c_str(), MVT_LLQuaternion, 12);
701	}
702	else if (var_type == "LLUUID")
703	{
704		varp = new LLMessageVariable(var_name.c_str(), MVT_LLUUID, 16);					
705	}
706	else if (var_type == "BOOL")
707	{
708		varp = new LLMessageVariable(var_name.c_str(), MVT_BOOL, 1);					
709	}
710	else if (var_type == "IPADDR")
711	{
712		varp = new LLMessageVariable(var_name.c_str(), MVT_IP_ADDR, 4);					
713	}
714	else if (var_type == "IPPORT")
715	{
716		varp = new LLMessageVariable(var_name.c_str(), MVT_IP_PORT, 2);
717	}
718	else if (var_type == "Fixed" || var_type == "Variable")
719	{
720		std::string variable_size = tokens.next();
721		
722		if (!b_positive_integer_ok(variable_size.c_str()))
723		{
724			llerrs << "not a legal integer variable size: " << variable_size
725				   << " at " << tokens.line() << llendl;
726		}
727
728		EMsgVariableType type_enum;
729		if(var_type == "Variable")
730		{
731			type_enum = MVT_VARIABLE;
732		}
733		else if(var_type == "Fixed")
734		{
735			type_enum = MVT_FIXED;
736		}
737		else
738		{
739			type_enum = MVT_FIXED; // removes a warning
740			llerrs << "bad variable type: " << var_type
741				   << " at " << tokens.line() << llendl;
742		}
743
744		varp = new LLMessageVariable(
745			var_name.c_str(),
746			type_enum,
747			atoi(variable_size.c_str()));
748	}
749	else
750	{
751		llerrs << "bad variable type:" << var_type
752			   << " at " << tokens.line() << llendl;
753	}
754
755	if(!tokens.want("}"))
756	{
757		llerrs << "Expecting closing } for variable " << var_name
758			   << " at " << tokens.line() << llendl;
759	}
760	return varp;
761}