PageRenderTime 68ms CodeModel.GetById 38ms RepoModel.GetById 0ms app.codeStats 1ms

/tags/LaTeXML-0.3.2/lib/LaTeXML/Token.pm

https://github.com/vismor/LaTeXML
Perl | 426 lines | 183 code | 50 blank | 193 comment | 36 complexity | c9b9c3b6038e2fa62d0e9ff8cdec29ca MD5 | raw file
  1. # /=====================================================================\ #
  2. # | LaTeXML::Token, LaTeXML::Tokens | #
  3. # | Representation of Token(s) | #
  4. # |=====================================================================| #
  5. # | Part of LaTeXML: | #
  6. # | Public domain software, produced as part of work done by the | #
  7. # | United States Government & not subject to copyright in the US. | #
  8. # |---------------------------------------------------------------------| #
  9. # | Bruce Miller <bruce.miller@nist.gov> #_# | #
  10. # | http://dlmf.nist.gov/LaTeXML/ (o o) | #
  11. # \=========================================================ooo==U==ooo=/ #
  12. #**********************************************************************
  13. # A Token represented as a pair: [string,catcode]
  14. # string is a character or control sequence.
  15. # Yes, a bit inefficient, but code is clearer...
  16. #**********************************************************************
  17. package LaTeXML::Token;
  18. use strict;
  19. use LaTeXML::Global;
  20. use LaTeXML::Object;
  21. our @ISA = qw(LaTeXML::Object);
  22. #======================================================================
  23. # See LaTeXML::Global for constructors.
  24. #======================================================================
  25. # Categories of Category codes.
  26. # For Tokens with these catcodes, only the catcode is relevant for comparison.
  27. # (if they even make it to a stage where they get compared)
  28. our @primitive_catcode = (1,1,1,1,
  29. 1,1,1,1,
  30. 1,0,1,0,
  31. 0,0,0,0,
  32. 0,1);
  33. my @standardchar=("\\",'{','}','$',
  34. '&',"\n",'#','^',
  35. '_',undef,undef,undef,
  36. undef,undef,'%',undef);
  37. our @CC_NAME=qw(Escape Begin End Math Align EOL Parameter Superscript Subscript
  38. Ignore Space Letter Other Active Comment Invalid
  39. ControlSequence NotExpanded);
  40. our @CC_SHORT_NAME = qw(T_ESCAPE T_BEGIN T_END T_MATH
  41. T_ALIGN T_EOL T_PARAM T_SUPER
  42. T_SUB T_IGNORE T_SPACE T_LETTER
  43. T_OTHER T_ACTIVE T_COMMENT T_INVALID
  44. T_CS T_NOTEXPANDED
  45. );
  46. #======================================================================
  47. # Accessors.
  48. sub isaToken { 1; }
  49. # Get the CS Name of the token. This is the name that definitions will be
  50. # stored under; It's the same for various `different' BEGIN tokens, eg.
  51. sub getCSName {
  52. my($token)=@_;
  53. my $cc = $$token[1];
  54. ($primitive_catcode[$cc] ? $CC_NAME[$cc] : $$token[0]); }
  55. # Return the string or character part of the token
  56. sub getString { $_[0]->[0]; }
  57. # Return the character code of character part of the token, or 256 if it is a control sequence
  58. sub getCharcode{ ($_[0]->[1] == CC_CS ? 256 : ord($_[0]->[0])); }
  59. # Return the catcode of the token.
  60. sub getCatcode { $_[0]->[1]; }
  61. sub getDefinition {
  62. my $defn = $STATE->lookupMeaning($_[0]);
  63. (defined $defn && $defn->isaDefinition ? $defn : undef); }
  64. # Defined so a Token or Tokens can be used interchangeably.
  65. sub unlist { ($_[0]); }
  66. sub getLocator { ''; }
  67. #======================================================================
  68. # Note that this converts the string to a more `user readable' form using `standard' chars for catcodes.
  69. # We'll need to be careful about using string instead of untex for internal purposes where the
  70. # actual character is needed.
  71. sub untex {
  72. my($self)=@_;
  73. ($standardchar[$$self[1]] || $$self[0]); }
  74. sub toString { $_[0]->[0]; }
  75. # Convenience, for a CS token.
  76. # Return the expansion (Tokens) that would invoke it with the given args.
  77. sub invocation {
  78. my($self,@args)=@_;
  79. $self->getDefinition->invocation(@args); }
  80. #======================================================================
  81. # Methods for overloaded ops.
  82. # Compare two tokens; They are equal if they both have same catcode,
  83. # and either the catcode is one of the primitive ones, or thier strings
  84. # are equal.
  85. sub equals {
  86. my($a,$b)=@_;
  87. (defined $b
  88. && (ref $a) eq (ref $b))
  89. && ($$a[1] eq $$b[1])
  90. && ($primitive_catcode[$$a[1]] || ($$a[0] eq $$b[0])); }
  91. # Primarily for error reporting.
  92. sub stringify {
  93. my($self)=@_;
  94. $CC_SHORT_NAME[$$self[1]].'['.$$self[0].']'; }
  95. #**********************************************************************
  96. # LaTeXML::Tokens
  97. # A blessed reference to a list of LaTeXML::Token's
  98. # It implements the core API of Mouth, as if pre-tokenized.
  99. #**********************************************************************
  100. package LaTeXML::Tokens;
  101. use strict;
  102. use LaTeXML::Global;
  103. use LaTeXML::Object;
  104. our @ISA = qw(LaTeXML::Object);
  105. sub new {
  106. my($class,@tokens)=@_;
  107. bless [@tokens],$class; }
  108. # Return a list of the tokens making up this Tokens
  109. sub unlist { @{$_[0]}; }
  110. # Return a shallow copy of the Tokens
  111. sub clone {
  112. my($self)=@_;
  113. bless [@$self], ref $self; }
  114. sub toString { join('',map($_->toString, @{$_[0]})); }
  115. # Return a string containing the TeX form of the Tokens
  116. sub untex {
  117. my($self)=@_;
  118. my $string = '';
  119. my $prevmac=0;
  120. foreach my $token (@$self){
  121. next if $token->getCatcode == CC_COMMENT;
  122. my $s = $token->untex();
  123. $string .= ' ' if $prevmac && ($s =~ /^\w/);
  124. $string .= $s;
  125. $prevmac = ($s =~ /^\\/) if $s; }
  126. $string; }
  127. # Methods for overloaded ops.
  128. sub equals {
  129. my($a,$b)=@_;
  130. return 0 unless defined $b && (ref $a) eq (ref $b);
  131. my @a = @$a;
  132. my @b = @$b;
  133. while(@a && @b && ($a[0]->equals($b[0]))){
  134. shift(@a); shift(@b); }
  135. return !(@a || @b); }
  136. sub stringify {
  137. my($self)=@_;
  138. "Tokens[".join(',',map($_->toString,@$self))."]"; }
  139. #======================================================================
  140. # The following implements the Mouth API, so that a Token list can
  141. # act as a pre-tokenized source of tokens.
  142. sub finish {}
  143. sub hasMoreInput {
  144. my($self)=@_;
  145. scalar(@$self); }
  146. sub readToken {
  147. my($self)=@_;
  148. return unless @$self;
  149. shift(@$self); }
  150. sub getLocator { ''; }
  151. #**********************************************************************
  152. package LaTeXML::Number;
  153. use LaTeXML::Global;
  154. use LaTeXML::Object;
  155. our @ISA = qw(LaTeXML::Object);
  156. use strict;
  157. sub new {
  158. my($class,$number)=@_;
  159. bless [$number||"0"],$class; }
  160. sub valueOf { $_[0]->[0]; }
  161. sub toString { $_[0]->[0]; }
  162. sub untex { $_[0]->toString.'\relax'; }
  163. sub unlist { $_[0]; }
  164. sub negate { (ref $_[0])->new(- $_[0]->valueOf); }
  165. sub add { (ref $_[0])->new($_[0]->valueOf + $_[1]->valueOf); }
  166. # arg 2 is a number
  167. sub multiply { (ref $_[0])->new($_[0]->valueOf * $_[1]); }
  168. sub stringify { "Number[".$_[0]->[0]."]"; }
  169. #**********************************************************************
  170. package LaTeXML::Dimension;
  171. use LaTeXML::Global;
  172. our @ISA=qw(LaTeXML::Number);
  173. use strict;
  174. sub new {
  175. my($class,$sp)=@_;
  176. $sp = "0" unless $sp;
  177. if($sp =~ /^(\d*\.?\d*)([a-zA-Z][a-zA-Z])$/){ # Dimensions given.
  178. $sp = $1 * $STATE->convertUnit($2); }
  179. bless [$sp||"0"],$class; }
  180. sub toString { ($_[0]->[0]/65536).'pt'; }
  181. sub stringify { "Dimension[".$_[0]->[0]."]"; }
  182. #**********************************************************************
  183. package LaTeXML::MuDimension;
  184. use LaTeXML::Global;
  185. our @ISA=qw(LaTeXML::Dimension);
  186. sub stringify { "MuDimension[".$_[0]->[0]."]"; }
  187. #**********************************************************************
  188. package LaTeXML::Glue;
  189. use LaTeXML::Global;
  190. our @ISA=qw(LaTeXML::Dimension);
  191. use strict;
  192. our %fillcode=(fil=>1,fill=>2,filll=>3);
  193. our @FILL=('','fil','fill','filll');
  194. sub new {
  195. my($class,$sp,$plus,$pfill,$minus,$mfill)=@_;
  196. if((!defined $plus) && (!defined $pfill) && (!defined $minus) && (!defined $mfill)){
  197. if($sp =~ /^(\d*\.?\d*)$/){}
  198. elsif($sp =~ /^(\d*\.?\d*)(\w\w)(\s+plus(\d*\.?\d*)(fil|fill|filll|[a-zA-Z][a-zA-Z))(\s+minus(\d*\.?\d*)(fil|fill|filll|[a-zA-Z][a-zA-Z]))?$/){
  199. my($f,$u,$p,$pu,$m,$mu)=($1,$2,$4,$5,$7,$8);
  200. $sp = $f * $STATE->convertUnit($u);
  201. if(!$pu){}
  202. elsif($fillcode{$pu}){ $plus=$p; $pfill=$pu; }
  203. else { $plus = $p * $STATE->convertUnit($pu); $pfill=0; }
  204. if(!$mu){}
  205. elsif($fillcode{$mu}){ $minus=$m; $mfill=$mu; }
  206. else { $minus = $m * $STATE->convertUnit($mu); $mfill=0; }
  207. }}
  208. bless [$sp||"0",$plus||"0",$pfill||0,$minus||"0",$mfill||0],$class; }
  209. #sub getStretch { $_[0]->[1]; }
  210. #sub getShrink { $_[0]->[2]; }
  211. sub toString {
  212. my($self)=@_;
  213. my ($sp,$plus,$pfill,$minus,$mfill)=@$self;
  214. my $string = ($sp/65536)."pt";
  215. $string .= ' plus '. ($pfill ? $plus .$FILL[$pfill] : ($plus/65536) .'pt') if $plus != 0;
  216. $string .= ' minus '.($mfill ? $minus.$FILL[$mfill] : ($minus/65536).'pt') if $minus != 0;
  217. $string; }
  218. sub negate {
  219. my($pts,$p,$pf,$m,$mf)=@{$_[0]};
  220. (ref $_[0])->new(-$pts,-$p,$pf,-$m,$mf); }
  221. sub add {
  222. my($self,$other)=@_;
  223. my($pts,$p,$pf,$m,$mf)=@$self;
  224. if(ref $other eq 'LaTeXML::Glue'){
  225. my($pts2,$p2,$pf2,$m2,$mf2)=@$other;
  226. $pts += $pts2;
  227. if($pf == $pf2){ $p+=$p2; }
  228. elsif($pf < $pf2){ $p=$p2; $pf=$pf2; }
  229. if($mf == $mf2){ $m+=$m2; }
  230. elsif($mf < $mf2){ $m=$m2; $mf=$mf2; }
  231. (ref $_[0])->new($pts,$p,$pf,$m,$mf); }
  232. else {
  233. (ref $_[0])->new($pts+$other->valueOf,$p,$pf,$m,$mf); }}
  234. sub multiply {
  235. my($self,$other)=@_;
  236. my($pts,$p,$pf,$m,$mf)=@$self;
  237. (ref $_[0])->new($pts*$other,$p*$other,$pf,$m*$other,$mf); }
  238. sub stringify { "Glue[".join(',',@{$_[0]})."]"; }
  239. #**********************************************************************
  240. package LaTeXML::MuGlue;
  241. use LaTeXML::Global;
  242. our @ISA=qw(LaTeXML::Glue);
  243. sub stringify { "MuGlue[".join(',',@{$_[0]})."]"; }
  244. #**********************************************************************
  245. 1;
  246. __END__
  247. =pod
  248. =head1 NAME
  249. C<LaTeXML::Token>, C<LaTeXML::Tokens>, C<LaTeXML::Number>, C<LaTeXML::Dimension>, etc. -- representation
  250. of tokens and related objects.
  251. =head1 DESCRIPTION
  252. This module defines Tokens (C<LaTeXML::Token>, C<LaTeXML::Tokens>)
  253. and other things (C<LaTeXML::Number>, C<LaTeXML::Dimension>, C<LaTeXML::MuDimension>,
  254. C<LaTeXML::Glue> and C<LaTeXML::MuGlue>) that get created during tokenization
  255. and expansion.
  256. A C<LaTeXML::Token> represents a TeX token which is a pair of a character or string and
  257. a category code. A C<LaTeXML::Tokens> is a list of tokens (and also implements the API
  258. of a L<LaTeXML::Mouth> so that tokens can be read from a list).
  259. The other classes (C<LaTeXML::Number>, C<LaTeXML::Dimension>, C<LaTeXML::MuDimension>,
  260. C<LaTeXML::Glue> and C<LaTeXML::MuGlue>) represent various core TeX data types that
  261. are parsed from tokens by the C<LaTeXML::Gullet>.
  262. =head2 Common methods
  263. The following methods apply to all objects.
  264. =over 4
  265. =item C<< @tokens = $object->unlist; >>
  266. Return a list of the tokens making up this C<$object>.
  267. =item C<< $string = $object->toString; >>
  268. Return a string representing C<$object>.
  269. =item C<< $string = $object->untex; >>
  270. Return the TeX form of C<$object>, suitable (hopefully) for processing by TeX.
  271. =back
  272. =head2 Token methods
  273. The following methods are specific to C<LaTeXML::Token>.
  274. =over 4
  275. =item C<< $string = $token->getCSName; >>
  276. Return the string or character part of the C<$token>; for the special category
  277. codes, returns the standard string (eg. C<T_BEGIN->getCSName> returns "{").
  278. =item C<< $string = $token->getString; >>
  279. Return the string or character part of the C<$token>.
  280. =item C<< $code = $token->getCharcode; >>
  281. Return the character code of the character part of the C<$token>, or 256 if it is a control sequence.
  282. =item C<< $code = $token->getCatcode; >>
  283. Return the catcode of the C<$token>.
  284. =item C<< $defn = $token->getDefinition; >>
  285. Return the current definition associated with C<$token> in C<$STATE>, or
  286. undef if none.
  287. =item C<< $tokens = $token->invocation(@args); >>
  288. Return the L<LaTeXML::Tokens> representing the invocation of C<$token> acting
  289. on the arguments in C<@args>. C<$token> must have a associated definition.
  290. =back
  291. =head2 Tokens methods
  292. The following methods are specific to C<LaTeXML::Tokens>.
  293. =over 4
  294. =item C<< $tokenscopy = $tokens->clone; >>
  295. Return a shallow copy of the $tokens. This is useful before reading from a C<LaTeXML::Tokens>.
  296. =item C<< $token = $tokens->readToken; >>
  297. Returns (and remove) the next token from $tokens. This is part of the public API of L<LaTeXML::Mouth>
  298. so that a C<LaTeXML::Tokens> can serve as a L<LaTeXML::Mouth>.
  299. =back
  300. =head2 Numerics methods
  301. These methods apply to the various numeric objects
  302. (C<LaTeXML::Number>, C<LaTeXML::Dimension>, C<LaTeXML::MuDimension>,
  303. C<LaTeXML::Glue> and C<LaTeXML::MuGlue>)
  304. =over 4
  305. =item C<< $n = $object->valueOf; >>
  306. Return the value in scaled points (ignoring shrink and stretch, if any).
  307. =item C<< $n = $object->negate; >>
  308. Return an object representing the negative of the C<$object>.
  309. =item C<< $n = $object->add($other); >>
  310. Return an object representing the sum of C<$object> and C<$other>
  311. =item C<< $n = $object->multiply($n); >>
  312. Return an object representing the product of C<$object> and C<$n> (a regular number).
  313. =back
  314. =head1 AUTHOR
  315. Bruce Miller <bruce.miller@nist.gov>
  316. =head1 COPYRIGHT
  317. Public domain software, produced as part of work done by the
  318. United States Government & not subject to copyright in the US.
  319. =cut