/ext/scintilla/src/Document.cxx
C++ | 2820 lines | 2408 code | 224 blank | 188 comment | 881 complexity | abc9d1f76baa83fbdd1917e385cb431d MD5 | raw file
Possible License(s): GPL-3.0, LGPL-3.0, MPL-2.0-no-copyleft-exception, GPL-2.0, LGPL-2.0
- // Scintilla source code edit control
- /** @file Document.cxx
- ** Text document that handles notifications, DBCS, styling, words and end of line.
- **/
- // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
- // The License.txt file describes the conditions under which this software may be distributed.
-
- #include <stdlib.h>
- #include <string.h>
- #include <stdio.h>
- #include <assert.h>
- #include <ctype.h>
-
- #include <stdexcept>
- #include <string>
- #include <vector>
- #include <algorithm>
-
- #ifdef CXX11_REGEX
- #include <regex>
- #endif
-
- #include "Platform.h"
-
- #include "ILexer.h"
- #include "Scintilla.h"
-
- #include "CharacterSet.h"
- #include "SplitVector.h"
- #include "Partitioning.h"
- #include "RunStyles.h"
- #include "CellBuffer.h"
- #include "PerLine.h"
- #include "CharClassify.h"
- #include "Decoration.h"
- #include "CaseFolder.h"
- #include "Document.h"
- #include "RESearch.h"
- #include "UniConversion.h"
- #include "UnicodeFromUTF8.h"
-
- #ifdef SCI_NAMESPACE
- using namespace Scintilla;
- #endif
-
- static inline bool IsPunctuation(char ch) {
- return IsASCII(ch) && ispunct(ch);
- }
-
- void LexInterface::Colourise(int start, int end) {
- if (pdoc && instance && !performingStyle) {
- // Protect against reentrance, which may occur, for example, when
- // fold points are discovered while performing styling and the folding
- // code looks for child lines which may trigger styling.
- performingStyle = true;
-
- int lengthDoc = pdoc->Length();
- if (end == -1)
- end = lengthDoc;
- int len = end - start;
-
- PLATFORM_ASSERT(len >= 0);
- PLATFORM_ASSERT(start + len <= lengthDoc);
-
- int styleStart = 0;
- if (start > 0)
- styleStart = pdoc->StyleAt(start - 1);
-
- if (len > 0) {
- instance->Lex(start, len, styleStart, pdoc);
- instance->Fold(start, len, styleStart, pdoc);
- }
-
- performingStyle = false;
- }
- }
-
- int LexInterface::LineEndTypesSupported() {
- if (instance) {
- int interfaceVersion = instance->Version();
- if (interfaceVersion >= lvSubStyles) {
- ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
- return ssinstance->LineEndTypesSupported();
- }
- }
- return 0;
- }
-
- Document::Document() {
- refCount = 0;
- pcf = NULL;
- #ifdef _WIN32
- eolMode = SC_EOL_CRLF;
- #else
- eolMode = SC_EOL_LF;
- #endif
- dbcsCodePage = 0;
- lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
- endStyled = 0;
- styleClock = 0;
- enteredModification = 0;
- enteredStyling = 0;
- enteredReadOnlyCount = 0;
- insertionSet = false;
- tabInChars = 8;
- indentInChars = 0;
- actualIndentInChars = 8;
- useTabs = true;
- tabIndents = true;
- backspaceUnindents = false;
-
- matchesValid = false;
- regex = 0;
-
- UTF8BytesOfLeadInitialise();
-
- perLineData[ldMarkers] = new LineMarkers();
- perLineData[ldLevels] = new LineLevels();
- perLineData[ldState] = new LineState();
- perLineData[ldMargin] = new LineAnnotation();
- perLineData[ldAnnotation] = new LineAnnotation();
-
- cb.SetPerLine(this);
-
- pli = 0;
- }
-
- Document::~Document() {
- for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
- it->watcher->NotifyDeleted(this, it->userData);
- }
- for (int j=0; j<ldSize; j++) {
- delete perLineData[j];
- perLineData[j] = 0;
- }
- delete regex;
- regex = 0;
- delete pli;
- pli = 0;
- delete pcf;
- pcf = 0;
- }
-
- void Document::Init() {
- for (int j=0; j<ldSize; j++) {
- if (perLineData[j])
- perLineData[j]->Init();
- }
- }
-
- int Document::LineEndTypesSupported() const {
- if ((SC_CP_UTF8 == dbcsCodePage) && pli)
- return pli->LineEndTypesSupported();
- else
- return 0;
- }
-
- bool Document::SetDBCSCodePage(int dbcsCodePage_) {
- if (dbcsCodePage != dbcsCodePage_) {
- dbcsCodePage = dbcsCodePage_;
- SetCaseFolder(NULL);
- cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
- return true;
- } else {
- return false;
- }
- }
-
- bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
- if (lineEndBitSet != lineEndBitSet_) {
- lineEndBitSet = lineEndBitSet_;
- int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
- if (lineEndBitSetActive != cb.GetLineEndTypes()) {
- ModifiedAt(0);
- cb.SetLineEndTypes(lineEndBitSetActive);
- return true;
- } else {
- return false;
- }
- } else {
- return false;
- }
- }
-
- void Document::InsertLine(int line) {
- for (int j=0; j<ldSize; j++) {
- if (perLineData[j])
- perLineData[j]->InsertLine(line);
- }
- }
-
- void Document::RemoveLine(int line) {
- for (int j=0; j<ldSize; j++) {
- if (perLineData[j])
- perLineData[j]->RemoveLine(line);
- }
- }
-
- // Increase reference count and return its previous value.
- int Document::AddRef() {
- return refCount++;
- }
-
- // Decrease reference count and return its previous value.
- // Delete the document if reference count reaches zero.
- int SCI_METHOD Document::Release() {
- int curRefCount = --refCount;
- if (curRefCount == 0)
- delete this;
- return curRefCount;
- }
-
- void Document::SetSavePoint() {
- cb.SetSavePoint();
- NotifySavePoint(true);
- }
-
- void Document::TentativeUndo() {
- if (!TentativeActive())
- return;
- CheckReadOnly();
- if (enteredModification == 0) {
- enteredModification++;
- if (!cb.IsReadOnly()) {
- bool startSavePoint = cb.IsSavePoint();
- bool multiLine = false;
- int steps = cb.TentativeSteps();
- //Platform::DebugPrintf("Steps=%d\n", steps);
- for (int step = 0; step < steps; step++) {
- const int prevLinesTotal = LinesTotal();
- const Action &action = cb.GetUndoStep();
- if (action.at == removeAction) {
- NotifyModified(DocModification(
- SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
- } else if (action.at == containerAction) {
- DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
- dm.token = action.position;
- NotifyModified(dm);
- } else {
- NotifyModified(DocModification(
- SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
- }
- cb.PerformUndoStep();
- if (action.at != containerAction) {
- ModifiedAt(action.position);
- }
-
- int modFlags = SC_PERFORMED_UNDO;
- // With undo, an insertion action becomes a deletion notification
- if (action.at == removeAction) {
- modFlags |= SC_MOD_INSERTTEXT;
- } else if (action.at == insertAction) {
- modFlags |= SC_MOD_DELETETEXT;
- }
- if (steps > 1)
- modFlags |= SC_MULTISTEPUNDOREDO;
- const int linesAdded = LinesTotal() - prevLinesTotal;
- if (linesAdded != 0)
- multiLine = true;
- if (step == steps - 1) {
- modFlags |= SC_LASTSTEPINUNDOREDO;
- if (multiLine)
- modFlags |= SC_MULTILINEUNDOREDO;
- }
- NotifyModified(DocModification(modFlags, action.position, action.lenData,
- linesAdded, action.data));
- }
-
- bool endSavePoint = cb.IsSavePoint();
- if (startSavePoint != endSavePoint)
- NotifySavePoint(endSavePoint);
-
- cb.TentativeCommit();
- }
- enteredModification--;
- }
- }
-
- int Document::GetMark(int line) {
- return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
- }
-
- int Document::MarkerNext(int lineStart, int mask) const {
- return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
- }
-
- int Document::AddMark(int line, int markerNum) {
- if (line >= 0 && line <= LinesTotal()) {
- int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
- AddMark(line, markerNum, LinesTotal());
- DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
- NotifyModified(mh);
- return prev;
- } else {
- return 0;
- }
- }
-
- void Document::AddMarkSet(int line, int valueSet) {
- if (line < 0 || line > LinesTotal()) {
- return;
- }
- unsigned int m = valueSet;
- for (int i = 0; m; i++, m >>= 1)
- if (m & 1)
- static_cast<LineMarkers *>(perLineData[ldMarkers])->
- AddMark(line, i, LinesTotal());
- DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
- NotifyModified(mh);
- }
-
- void Document::DeleteMark(int line, int markerNum) {
- static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
- DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
- NotifyModified(mh);
- }
-
- void Document::DeleteMarkFromHandle(int markerHandle) {
- static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
- DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
- mh.line = -1;
- NotifyModified(mh);
- }
-
- void Document::DeleteAllMarks(int markerNum) {
- bool someChanges = false;
- for (int line = 0; line < LinesTotal(); line++) {
- if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
- someChanges = true;
- }
- if (someChanges) {
- DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
- mh.line = -1;
- NotifyModified(mh);
- }
- }
-
- int Document::LineFromHandle(int markerHandle) {
- return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
- }
-
- int SCI_METHOD Document::LineStart(int line) const {
- return cb.LineStart(line);
- }
-
- bool Document::IsLineStartPosition(int position) const {
- return LineStart(LineFromPosition(position)) == position;
- }
-
- int SCI_METHOD Document::LineEnd(int line) const {
- if (line >= LinesTotal() - 1) {
- return LineStart(line + 1);
- } else {
- int position = LineStart(line + 1);
- if (SC_CP_UTF8 == dbcsCodePage) {
- unsigned char bytes[] = {
- static_cast<unsigned char>(cb.CharAt(position-3)),
- static_cast<unsigned char>(cb.CharAt(position-2)),
- static_cast<unsigned char>(cb.CharAt(position-1)),
- };
- if (UTF8IsSeparator(bytes)) {
- return position - UTF8SeparatorLength;
- }
- if (UTF8IsNEL(bytes+1)) {
- return position - UTF8NELLength;
- }
- }
- position--; // Back over CR or LF
- // When line terminator is CR+LF, may need to go back one more
- if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
- position--;
- }
- return position;
- }
- }
-
- void SCI_METHOD Document::SetErrorStatus(int status) {
- // Tell the watchers an error has occurred.
- for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
- it->watcher->NotifyErrorOccurred(this, it->userData, status);
- }
- }
-
- int SCI_METHOD Document::LineFromPosition(int pos) const {
- return cb.LineFromPosition(pos);
- }
-
- int Document::LineEndPosition(int position) const {
- return LineEnd(LineFromPosition(position));
- }
-
- bool Document::IsLineEndPosition(int position) const {
- return LineEnd(LineFromPosition(position)) == position;
- }
-
- bool Document::IsPositionInLineEnd(int position) const {
- return position >= LineEnd(LineFromPosition(position));
- }
-
- int Document::VCHomePosition(int position) const {
- int line = LineFromPosition(position);
- int startPosition = LineStart(line);
- int endLine = LineEnd(line);
- int startText = startPosition;
- while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
- startText++;
- if (position == startText)
- return startPosition;
- else
- return startText;
- }
-
- int SCI_METHOD Document::SetLevel(int line, int level) {
- int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
- if (prev != level) {
- DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
- LineStart(line), 0, 0, 0, line);
- mh.foldLevelNow = level;
- mh.foldLevelPrev = prev;
- NotifyModified(mh);
- }
- return prev;
- }
-
- int SCI_METHOD Document::GetLevel(int line) const {
- return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
- }
-
- void Document::ClearLevels() {
- static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
- }
-
- static bool IsSubordinate(int levelStart, int levelTry) {
- if (levelTry & SC_FOLDLEVELWHITEFLAG)
- return true;
- else
- return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
- }
-
- int Document::GetLastChild(int lineParent, int level, int lastLine) {
- if (level == -1)
- level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
- int maxLine = LinesTotal();
- int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
- int lineMaxSubord = lineParent;
- while (lineMaxSubord < maxLine - 1) {
- EnsureStyledTo(LineStart(lineMaxSubord + 2));
- if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
- break;
- if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
- break;
- lineMaxSubord++;
- }
- if (lineMaxSubord > lineParent) {
- if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
- // Have chewed up some whitespace that belongs to a parent so seek back
- if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
- lineMaxSubord--;
- }
- }
- }
- return lineMaxSubord;
- }
-
- int Document::GetFoldParent(int line) const {
- int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
- int lineLook = line - 1;
- while ((lineLook > 0) && (
- (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
- ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
- ) {
- lineLook--;
- }
- if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
- ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
- return lineLook;
- } else {
- return -1;
- }
- }
-
- void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
- int level = GetLevel(line);
- int lookLastLine = Platform::Maximum(line, lastLine) + 1;
-
- int lookLine = line;
- int lookLineLevel = level;
- int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
- while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
- ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
- lookLineLevel = GetLevel(--lookLine);
- lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
- }
-
- int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
- if (beginFoldBlock == -1) {
- highlightDelimiter.Clear();
- return;
- }
-
- int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
- int firstChangeableLineBefore = -1;
- if (endFoldBlock < line) {
- lookLine = beginFoldBlock - 1;
- lookLineLevel = GetLevel(lookLine);
- lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
- while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
- if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
- if (GetLastChild(lookLine, -1, lookLastLine) == line) {
- beginFoldBlock = lookLine;
- endFoldBlock = line;
- firstChangeableLineBefore = line - 1;
- }
- }
- if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
- break;
- lookLineLevel = GetLevel(--lookLine);
- lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
- }
- }
- if (firstChangeableLineBefore == -1) {
- for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
- lookLine >= beginFoldBlock;
- lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
- if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
- firstChangeableLineBefore = lookLine;
- break;
- }
- }
- }
- if (firstChangeableLineBefore == -1)
- firstChangeableLineBefore = beginFoldBlock - 1;
-
- int firstChangeableLineAfter = -1;
- for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
- lookLine <= endFoldBlock;
- lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
- if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
- firstChangeableLineAfter = lookLine;
- break;
- }
- }
- if (firstChangeableLineAfter == -1)
- firstChangeableLineAfter = endFoldBlock + 1;
-
- highlightDelimiter.beginFoldBlock = beginFoldBlock;
- highlightDelimiter.endFoldBlock = endFoldBlock;
- highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
- highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
- }
-
- int Document::ClampPositionIntoDocument(int pos) const {
- return Platform::Clamp(pos, 0, Length());
- }
-
- bool Document::IsCrLf(int pos) const {
- if (pos < 0)
- return false;
- if (pos >= (Length() - 1))
- return false;
- return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
- }
-
- int Document::LenChar(int pos) {
- if (pos < 0) {
- return 1;
- } else if (IsCrLf(pos)) {
- return 2;
- } else if (SC_CP_UTF8 == dbcsCodePage) {
- const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
- const int widthCharBytes = UTF8BytesOfLead[leadByte];
- int lengthDoc = Length();
- if ((pos + widthCharBytes) > lengthDoc)
- return lengthDoc - pos;
- else
- return widthCharBytes;
- } else if (dbcsCodePage) {
- return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
- } else {
- return 1;
- }
- }
-
- bool Document::InGoodUTF8(int pos, int &start, int &end) const {
- int trail = pos;
- while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
- trail--;
- start = (trail > 0) ? trail-1 : trail;
-
- const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
- const int widthCharBytes = UTF8BytesOfLead[leadByte];
- if (widthCharBytes == 1) {
- return false;
- } else {
- int trailBytes = widthCharBytes - 1;
- int len = pos - start;
- if (len > trailBytes)
- // pos too far from lead
- return false;
- char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
- for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
- charBytes[b] = cb.CharAt(static_cast<int>(start+b));
- int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
- if (utf8status & UTF8MaskInvalid)
- return false;
- end = start + widthCharBytes;
- return true;
- }
- }
-
- // Normalise a position so that it is not halfway through a two byte character.
- // This can occur in two situations -
- // When lines are terminated with \r\n pairs which should be treated as one character.
- // When displaying DBCS text such as Japanese.
- // If moving, move the position in the indicated direction.
- int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) const {
- //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
- // If out of range, just return minimum/maximum value.
- if (pos <= 0)
- return 0;
- if (pos >= Length())
- return Length();
-
- // PLATFORM_ASSERT(pos > 0 && pos < Length());
- if (checkLineEnd && IsCrLf(pos - 1)) {
- if (moveDir > 0)
- return pos + 1;
- else
- return pos - 1;
- }
-
- if (dbcsCodePage) {
- if (SC_CP_UTF8 == dbcsCodePage) {
- unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
- // If ch is not a trail byte then pos is valid intercharacter position
- if (UTF8IsTrailByte(ch)) {
- int startUTF = pos;
- int endUTF = pos;
- if (InGoodUTF8(pos, startUTF, endUTF)) {
- // ch is a trail byte within a UTF-8 character
- if (moveDir > 0)
- pos = endUTF;
- else
- pos = startUTF;
- }
- // Else invalid UTF-8 so return position of isolated trail byte
- }
- } else {
- // Anchor DBCS calculations at start of line because start of line can
- // not be a DBCS trail byte.
- int posStartLine = LineStart(LineFromPosition(pos));
- if (pos == posStartLine)
- return pos;
-
- // Step back until a non-lead-byte is found.
- int posCheck = pos;
- while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
- posCheck--;
-
- // Check from known start of character.
- while (posCheck < pos) {
- int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
- if (posCheck + mbsize == pos) {
- return pos;
- } else if (posCheck + mbsize > pos) {
- if (moveDir > 0) {
- return posCheck + mbsize;
- } else {
- return posCheck;
- }
- }
- posCheck += mbsize;
- }
- }
- }
-
- return pos;
- }
-
- // NextPosition moves between valid positions - it can not handle a position in the middle of a
- // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
- // A \r\n pair is treated as two characters.
- int Document::NextPosition(int pos, int moveDir) const {
- // If out of range, just return minimum/maximum value.
- int increment = (moveDir > 0) ? 1 : -1;
- if (pos + increment <= 0)
- return 0;
- if (pos + increment >= Length())
- return Length();
-
- if (dbcsCodePage) {
- if (SC_CP_UTF8 == dbcsCodePage) {
- if (increment == 1) {
- // Simple forward movement case so can avoid some checks
- const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
- if (UTF8IsAscii(leadByte)) {
- // Single byte character or invalid
- pos++;
- } else {
- const int widthCharBytes = UTF8BytesOfLead[leadByte];
- char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
- for (int b=1; b<widthCharBytes; b++)
- charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
- int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
- if (utf8status & UTF8MaskInvalid)
- pos++;
- else
- pos += utf8status & UTF8MaskWidth;
- }
- } else {
- // Examine byte before position
- pos--;
- unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
- // If ch is not a trail byte then pos is valid intercharacter position
- if (UTF8IsTrailByte(ch)) {
- // If ch is a trail byte in a valid UTF-8 character then return start of character
- int startUTF = pos;
- int endUTF = pos;
- if (InGoodUTF8(pos, startUTF, endUTF)) {
- pos = startUTF;
- }
- // Else invalid UTF-8 so return position of isolated trail byte
- }
- }
- } else {
- if (moveDir > 0) {
- int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
- pos += mbsize;
- if (pos > Length())
- pos = Length();
- } else {
- // Anchor DBCS calculations at start of line because start of line can
- // not be a DBCS trail byte.
- int posStartLine = LineStart(LineFromPosition(pos));
- // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
- // http://msdn.microsoft.com/en-us/library/cc194790.aspx
- if ((pos - 1) <= posStartLine) {
- return pos - 1;
- } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
- // Must actually be trail byte
- return pos - 2;
- } else {
- // Otherwise, step back until a non-lead-byte is found.
- int posTemp = pos - 1;
- while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
- ;
- // Now posTemp+1 must point to the beginning of a character,
- // so figure out whether we went back an even or an odd
- // number of bytes and go back 1 or 2 bytes, respectively.
- return (pos - 1 - ((pos - posTemp) & 1));
- }
- }
- }
- } else {
- pos += increment;
- }
-
- return pos;
- }
-
- bool Document::NextCharacter(int &pos, int moveDir) const {
- // Returns true if pos changed
- int posNext = NextPosition(pos, moveDir);
- if (posNext == pos) {
- return false;
- } else {
- pos = posNext;
- return true;
- }
- }
-
- // Return -1 on out-of-bounds
- int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
- int pos = positionStart;
- if (dbcsCodePage) {
- const int increment = (characterOffset > 0) ? 1 : -1;
- while (characterOffset != 0) {
- const int posNext = NextPosition(pos, increment);
- if (posNext == pos)
- return INVALID_POSITION;
- pos = posNext;
- characterOffset -= increment;
- }
- } else {
- pos = positionStart + characterOffset;
- if ((pos < 0) || (pos > Length()))
- return INVALID_POSITION;
- }
- return pos;
- }
-
- int Document::GetRelativePositionUTF16(int positionStart, int characterOffset) const {
- int pos = positionStart;
- if (dbcsCodePage) {
- const int increment = (characterOffset > 0) ? 1 : -1;
- while (characterOffset != 0) {
- const int posNext = NextPosition(pos, increment);
- if (posNext == pos)
- return INVALID_POSITION;
- if (abs(pos-posNext) > 3) // 4 byte character = 2*UTF16.
- characterOffset -= increment;
- pos = posNext;
- characterOffset -= increment;
- }
- } else {
- pos = positionStart + characterOffset;
- if ((pos < 0) || (pos > Length()))
- return INVALID_POSITION;
- }
- return pos;
- }
-
- int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
- int character;
- int bytesInCharacter = 1;
- if (dbcsCodePage) {
- const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
- if (SC_CP_UTF8 == dbcsCodePage) {
- if (UTF8IsAscii(leadByte)) {
- // Single byte character or invalid
- character = leadByte;
- } else {
- const int widthCharBytes = UTF8BytesOfLead[leadByte];
- unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
- for (int b=1; b<widthCharBytes; b++)
- charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
- int utf8status = UTF8Classify(charBytes, widthCharBytes);
- if (utf8status & UTF8MaskInvalid) {
- // Report as singleton surrogate values which are invalid Unicode
- character = 0xDC80 + leadByte;
- } else {
- bytesInCharacter = utf8status & UTF8MaskWidth;
- character = UnicodeFromUTF8(charBytes);
- }
- }
- } else {
- if (IsDBCSLeadByte(leadByte)) {
- bytesInCharacter = 2;
- character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
- } else {
- character = leadByte;
- }
- }
- } else {
- character = cb.CharAt(position);
- }
- if (pWidth) {
- *pWidth = bytesInCharacter;
- }
- return character;
- }
-
- int SCI_METHOD Document::CodePage() const {
- return dbcsCodePage;
- }
-
- bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
- // Byte ranges found in Wikipedia articles with relevant search strings in each case
- unsigned char uch = static_cast<unsigned char>(ch);
- switch (dbcsCodePage) {
- case 932:
- // Shift_jis
- return ((uch >= 0x81) && (uch <= 0x9F)) ||
- ((uch >= 0xE0) && (uch <= 0xFC));
- // Lead bytes F0 to FC may be a Microsoft addition.
- case 936:
- // GBK
- return (uch >= 0x81) && (uch <= 0xFE);
- case 949:
- // Korean Wansung KS C-5601-1987
- return (uch >= 0x81) && (uch <= 0xFE);
- case 950:
- // Big5
- return (uch >= 0x81) && (uch <= 0xFE);
- case 1361:
- // Korean Johab KS C-5601-1992
- return
- ((uch >= 0x84) && (uch <= 0xD3)) ||
- ((uch >= 0xD8) && (uch <= 0xDE)) ||
- ((uch >= 0xE0) && (uch <= 0xF9));
- }
- return false;
- }
-
- static inline bool IsSpaceOrTab(int ch) {
- return ch == ' ' || ch == '\t';
- }
-
- // Need to break text into segments near lengthSegment but taking into
- // account the encoding to not break inside a UTF-8 or DBCS character
- // and also trying to avoid breaking inside a pair of combining characters.
- // The segment length must always be long enough (more than 4 bytes)
- // so that there will be at least one whole character to make a segment.
- // For UTF-8, text must consist only of valid whole characters.
- // In preference order from best to worst:
- // 1) Break after space
- // 2) Break before punctuation
- // 3) Break after whole character
-
- int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
- if (length <= lengthSegment)
- return length;
- int lastSpaceBreak = -1;
- int lastPunctuationBreak = -1;
- int lastEncodingAllowedBreak = 0;
- for (int j=0; j < lengthSegment;) {
- unsigned char ch = static_cast<unsigned char>(text[j]);
- if (j > 0) {
- if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
- lastSpaceBreak = j;
- }
- if (ch < 'A') {
- lastPunctuationBreak = j;
- }
- }
- lastEncodingAllowedBreak = j;
-
- if (dbcsCodePage == SC_CP_UTF8) {
- j += UTF8BytesOfLead[ch];
- } else if (dbcsCodePage) {
- j += IsDBCSLeadByte(ch) ? 2 : 1;
- } else {
- j++;
- }
- }
- if (lastSpaceBreak >= 0) {
- return lastSpaceBreak;
- } else if (lastPunctuationBreak >= 0) {
- return lastPunctuationBreak;
- }
- return lastEncodingAllowedBreak;
- }
-
- EncodingFamily Document::CodePageFamily() const {
- if (SC_CP_UTF8 == dbcsCodePage)
- return efUnicode;
- else if (dbcsCodePage)
- return efDBCS;
- else
- return efEightBit;
- }
-
- void Document::ModifiedAt(int pos) {
- if (endStyled > pos)
- endStyled = pos;
- }
-
- void Document::CheckReadOnly() {
- if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
- enteredReadOnlyCount++;
- NotifyModifyAttempt();
- enteredReadOnlyCount--;
- }
- }
-
- // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
- // SetStyleAt does not change the persistent state of a document
-
- bool Document::DeleteChars(int pos, int len) {
- if (pos < 0)
- return false;
- if (len <= 0)
- return false;
- if ((pos + len) > Length())
- return false;
- CheckReadOnly();
- if (enteredModification != 0) {
- return false;
- } else {
- enteredModification++;
- if (!cb.IsReadOnly()) {
- NotifyModified(
- DocModification(
- SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
- pos, len,
- 0, 0));
- int prevLinesTotal = LinesTotal();
- bool startSavePoint = cb.IsSavePoint();
- bool startSequence = false;
- const char *text = cb.DeleteChars(pos, len, startSequence);
- if (startSavePoint && cb.IsCollectingUndo())
- NotifySavePoint(!startSavePoint);
- if ((pos < Length()) || (pos == 0))
- ModifiedAt(pos);
- else
- ModifiedAt(pos-1);
- NotifyModified(
- DocModification(
- SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
- pos, len,
- LinesTotal() - prevLinesTotal, text));
- }
- enteredModification--;
- }
- return !cb.IsReadOnly();
- }
-
- /**
- * Insert a string with a length.
- */
- int Document::InsertString(int position, const char *s, int insertLength) {
- if (insertLength <= 0) {
- return 0;
- }
- CheckReadOnly(); // Application may change read only state here
- if (cb.IsReadOnly()) {
- return 0;
- }
- if (enteredModification != 0) {
- return 0;
- }
- enteredModification++;
- insertionSet = false;
- insertion.clear();
- NotifyModified(
- DocModification(
- SC_MOD_INSERTCHECK,
- position, insertLength,
- 0, s));
- if (insertionSet) {
- s = insertion.c_str();
- insertLength = static_cast<int>(insertion.length());
- }
- NotifyModified(
- DocModification(
- SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
- position, insertLength,
- 0, s));
- int prevLinesTotal = LinesTotal();
- bool startSavePoint = cb.IsSavePoint();
- bool startSequence = false;
- const char *text = cb.InsertString(position, s, insertLength, startSequence);
- if (startSavePoint && cb.IsCollectingUndo())
- NotifySavePoint(!startSavePoint);
- ModifiedAt(position);
- NotifyModified(
- DocModification(
- SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
- position, insertLength,
- LinesTotal() - prevLinesTotal, text));
- if (insertionSet) { // Free memory as could be large
- std::string().swap(insertion);
- }
- enteredModification--;
- return insertLength;
- }
-
- void Document::ChangeInsertion(const char *s, int length) {
- insertionSet = true;
- insertion.assign(s, length);
- }
-
- int SCI_METHOD Document::AddData(char *data, int length) {
- try {
- int position = Length();
- InsertString(position, data, length);
- } catch (std::bad_alloc &) {
- return SC_STATUS_BADALLOC;
- } catch (...) {
- return SC_STATUS_FAILURE;
- }
- return 0;
- }
-
- void * SCI_METHOD Document::ConvertToDocument() {
- return this;
- }
-
- int Document::Undo() {
- int newPos = -1;
- CheckReadOnly();
- if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
- enteredModification++;
- if (!cb.IsReadOnly()) {
- bool startSavePoint = cb.IsSavePoint();
- bool multiLine = false;
- int steps = cb.StartUndo();
- //Platform::DebugPrintf("Steps=%d\n", steps);
- int coalescedRemovePos = -1;
- int coalescedRemoveLen = 0;
- int prevRemoveActionPos = -1;
- int prevRemoveActionLen = 0;
- for (int step = 0; step < steps; step++) {
- const int prevLinesTotal = LinesTotal();
- const Action &action = cb.GetUndoStep();
- if (action.at == removeAction) {
- NotifyModified(DocModification(
- SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
- } else if (action.at == containerAction) {
- DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
- dm.token = action.position;
- NotifyModified(dm);
- if (!action.mayCoalesce) {
- coalescedRemovePos = -1;
- coalescedRemoveLen = 0;
- prevRemoveActionPos = -1;
- prevRemoveActionLen = 0;
- }
- } else {
- NotifyModified(DocModification(
- SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
- }
- cb.PerformUndoStep();
- if (action.at != containerAction) {
- ModifiedAt(action.position);
- newPos = action.position;
- }
-
- int modFlags = SC_PERFORMED_UNDO;
- // With undo, an insertion action becomes a deletion notification
- if (action.at == removeAction) {
- newPos += action.lenData;
- modFlags |= SC_MOD_INSERTTEXT;
- if ((coalescedRemoveLen > 0) &&
- (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
- coalescedRemoveLen += action.lenData;
- newPos = coalescedRemovePos + coalescedRemoveLen;
- } else {
- coalescedRemovePos = action.position;
- coalescedRemoveLen = action.lenData;
- }
- prevRemoveActionPos = action.position;
- prevRemoveActionLen = action.lenData;
- } else if (action.at == insertAction) {
- modFlags |= SC_MOD_DELETETEXT;
- coalescedRemovePos = -1;
- coalescedRemoveLen = 0;
- prevRemoveActionPos = -1;
- prevRemoveActionLen = 0;
- }
- if (steps > 1)
- modFlags |= SC_MULTISTEPUNDOREDO;
- const int linesAdded = LinesTotal() - prevLinesTotal;
- if (linesAdded != 0)
- multiLine = true;
- if (step == steps - 1) {
- modFlags |= SC_LASTSTEPINUNDOREDO;
- if (multiLine)
- modFlags |= SC_MULTILINEUNDOREDO;
- }
- NotifyModified(DocModification(modFlags, action.position, action.lenData,
- linesAdded, action.data));
- }
-
- bool endSavePoint = cb.IsSavePoint();
- if (startSavePoint != endSavePoint)
- NotifySavePoint(endSavePoint);
- }
- enteredModification--;
- }
- return newPos;
- }
-
- int Document::Redo() {
- int newPos = -1;
- CheckReadOnly();
- if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
- enteredModification++;
- if (!cb.IsReadOnly()) {
- bool startSavePoint = cb.IsSavePoint();
- bool multiLine = false;
- int steps = cb.StartRedo();
- for (int step = 0; step < steps; step++) {
- const int prevLinesTotal = LinesTotal();
- const Action &action = cb.GetRedoStep();
- if (action.at == insertAction) {
- NotifyModified(DocModification(
- SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
- } else if (action.at == containerAction) {
- DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
- dm.token = action.position;
- NotifyModified(dm);
- } else {
- NotifyModified(DocModification(
- SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
- }
- cb.PerformRedoStep();
- if (action.at != containerAction) {
- ModifiedAt(action.position);
- newPos = action.position;
- }
-
- int modFlags = SC_PERFORMED_REDO;
- if (action.at == insertAction) {
- newPos += action.lenData;
- modFlags |= SC_MOD_INSERTTEXT;
- } else if (action.at == removeAction) {
- modFlags |= SC_MOD_DELETETEXT;
- }
- if (steps > 1)
- modFlags |= SC_MULTISTEPUNDOREDO;
- const int linesAdded = LinesTotal() - prevLinesTotal;
- if (linesAdded != 0)
- multiLine = true;
- if (step == steps - 1) {
- modFlags |= SC_LASTSTEPINUNDOREDO;
- if (multiLine)
- modFlags |= SC_MULTILINEUNDOREDO;
- }
- NotifyModified(
- DocModification(modFlags, action.position, action.lenData,
- linesAdded, action.data));
- }
-
- bool endSavePoint = cb.IsSavePoint();
- if (startSavePoint != endSavePoint)
- NotifySavePoint(endSavePoint);
- }
- enteredModification--;
- }
- return newPos;
- }
-
- void Document::DelChar(int pos) {
- DeleteChars(pos, LenChar(pos));
- }
-
- void Document::DelCharBack(int pos) {
- if (pos <= 0) {
- return;
- } else if (IsCrLf(pos - 2)) {
- DeleteChars(pos - 2, 2);
- } else if (dbcsCodePage) {
- int startChar = NextPosition(pos, -1);
- DeleteChars(startChar, pos - startChar);
- } else {
- DeleteChars(pos - 1, 1);
- }
- }
-
- static int NextTab(int pos, int tabSize) {
- return ((pos / tabSize) + 1) * tabSize;
- }
-
- static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
- std::string indentation;
- if (!insertSpaces) {
- while (indent >= tabSize) {
- indentation += '\t';
- indent -= tabSize;
- }
- }
- while (indent > 0) {
- indentation += ' ';
- indent--;
- }
- return indentation;
- }
-
- int SCI_METHOD Document::GetLineIndentation(int line) {
- int indent = 0;
- if ((line >= 0) && (line < LinesTotal())) {
- int lineStart = LineStart(line);
- int length = Length();
- for (int i = lineStart; i < length; i++) {
- char ch = cb.CharAt(i);
- if (ch == ' ')
- indent++;
- else if (ch == '\t')
- indent = NextTab(indent, tabInChars);
- else
- return indent;
- }
- }
- return indent;
- }
-
- int Document::SetLineIndentation(int line, int indent) {
- int indentOfLine = GetLineIndentation(line);
- if (indent < 0)
- indent = 0;
- if (indent != indentOfLine) {
- std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
- int thisLineStart = LineStart(line);
- int indentPos = GetLineIndentPosition(line);
- UndoGroup ug(this);
- DeleteChars(thisLineStart, indentPos - thisLineStart);
- return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
- static_cast<int>(linebuf.length()));
- } else {
- return GetLineIndentPosition(line);
- }
- }
-
- int Document::GetLineIndentPosition(int line) const {
- if (line < 0)
- return 0;
- int pos = LineStart(line);
- int length = Length();
- while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
- pos++;
- }
- return pos;
- }
-
- int Document::GetColumn(int pos) {
- int column = 0;
- int line = LineFromPosition(pos);
- if ((line >= 0) && (line < LinesTotal())) {
- for (int i = LineStart(line); i < pos;) {
- char ch = cb.CharAt(i);
- if (ch == '\t') {
- column = NextTab(column, tabInChars);
- i++;
- } else if (ch == '\r') {
- return column;
- } else if (ch == '\n') {
- return column;
- } else if (i >= Length()) {
- return column;
- } else {
- column++;
- i = NextPosition(i, 1);
- }
- }
- }
- return column;
- }
-
- int Document::CountCharacters(int startPos, int endPos) const {
- startPos = MovePositionOutsideChar(startPos, 1, false);
- endPos = MovePositionOutsideChar(endPos, -1, false);
- int count = 0;
- int i = startPos;
- while (i < endPos) {
- count++;
- if (IsCrLf(i))
- i++;
- i = NextPosition(i, 1);
- }
- return count;
- }
-
- int Document::CountUTF16(int startPos, int endPos) const {
- startPos = MovePositionOutsideChar(startPos, 1, false);
- endPos = MovePositionOutsideChar(endPos, -1, false);
- int count = 0;
- int i = startPos;
- while (i < endPos) {
- count++;
- const int next = NextPosition(i, 1);
- if ((next - i) > 3)
- count++;
- i = next;
- }
- return count;
- }
-
- int Document::FindColumn(int line, int column) {
- int position = LineStart(line);
- if ((line >= 0) && (line < LinesTotal())) {
- int columnCurrent = 0;
- while ((columnCurrent < column) && (position < Length())) {
- char ch = cb.CharAt(position);
- if (ch == '\t') {
- columnCurrent = NextTab(columnCurrent, tabInChars);
- if (columnCurrent > column)
- return position;
- position++;
- } else if (ch == '\r') {
- return position;
- } else if (ch == '\n') {
- return position;
- } else {
- columnCurrent++;
- position = NextPosition(position, 1);
- }
- }
- }
- return position;
- }
-
- void Document::Indent(bool forwards, int lineBottom, int lineTop) {
- // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
- for (int line = lineBottom; line >= lineTop; line--) {
- int indentOfLine = GetLineIndentation(line);
- if (forwards) {
- if (LineStart(line) < LineEnd(line)) {
- SetLineIndentation(line, indentOfLine + IndentSize());
- }
- } else {
- SetLineIndentation(line, indentOfLine - IndentSize());
- }
- }
- }
-
- // Convert line endings for a piece of text to a particular mode.
- // Stop at len or when a NUL is found.
- std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
- std::string dest;
- for (size_t i = 0; (i < len) && (s[i]); i++) {
- if (s[i] == '\n' || s[i] == '\r') {
- if (eolModeWanted == SC_EOL_CR) {
- dest.push_back('\r');
- } else if (eolModeWanted == SC_EOL_LF) {
- dest.push_back('\n');
- } else { // eolModeWanted == SC_EOL_CRLF
- dest.push_back('\r');
- dest.push_back('\n');
- }
- if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
- i++;
- }
- } else {
- dest.push_back(s[i]);
- }
- }
- return dest;
- }
-
- void Document::ConvertLineEnds(int eolModeSet) {
- UndoGroup ug(this);
-
- for (int pos = 0; pos < Length(); pos++) {
- if (cb.CharAt(pos) == '\r') {
- if (cb.CharAt(pos + 1) == '\n') {
- // CRLF
- if (eolModeSet == SC_EOL_CR) {
- DeleteChars(pos + 1, 1); // Delete the LF
- } else if (eolModeSet == SC_EOL_LF) {
- DeleteChars(pos, 1); // Delete the CR
- } else {
- pos++;
- }
- } else {
- // CR
- if (eolModeSet == SC_EOL_CRLF) {
- pos += InsertString(pos + 1, "\n", 1); // Insert LF
- } else if (eolModeSet == SC_EOL_LF) {
- pos += InsertString(pos, "\n", 1); // Insert LF
- DeleteChars(pos, 1); // Delete CR
- pos--;
- }
- }
- } else if (cb.CharAt(pos) == '\n') {
- // LF
- if (eolModeSet == SC_EOL_CRLF) {
- pos += InsertString(pos, "\r", 1); // Insert CR
- } else if (eolModeSet == SC_EOL_CR) {
- pos += InsertString(pos, "\r", 1); // Insert CR
- DeleteChars(pos, 1); // Delete LF
- pos--;
- }
- }
- }
-
- }
-
- bool Document::IsWhiteLine(int line) const {
- int currentChar = LineStart(line);
- int endLine = LineEnd(line);
- while (currentChar < endLine) {
- if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
- return false;
- }
- ++currentChar;
- }
- return true;
- }
-
- int Document::ParaUp(int pos) const {
- int line = LineFromPosition(pos);
- line--;
- while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
- line--;
- }
- while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
- line--;
- }
- line++;
- return LineStart(line);
- }
-
- int Document::ParaDown(int pos) const {
- int line = LineFromPosition(pos);
- while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
- line++;
- }
- while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
- line++;
- }
- if (line < LinesTotal())
- return LineStart(line);
- else // end of a document
- return LineEnd(line-1);
- }
-
- CharClassify::cc Document::WordCharClass(unsigned char ch) const {
- if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
- return CharClassify::ccWord;
- return charClass.GetClass(ch);
- }
-
- /**
- * Used by commmands that want to select whole words.
- * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
- */
- int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
- CharClassify::cc ccStart = CharClassify::ccWord;
- if (delta < 0) {
- if (!onlyWordCharacters)
- ccStart = WordCharClass(cb.CharAt(pos-1));
- while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
- pos--;
- } else {
- if (!onlyWordCharacters && pos < Length())
- ccStart = WordCharClass(cb.CharAt(pos));
- while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
- pos++;
- }
- return MovePositionOutsideChar(pos, delta, true);
- }
-
- /**
- * Find the start of the next word in either a forward (delta >= 0) or backwards direction
- * (delta < 0).
- * This is looking for a transition between character classes although there is also some
- * additional movement to transit white space.
- * Used by cursor movement by word commands.
- */
- int Document::NextWordStart(int pos, int delta) {
- if (delta < 0) {
- while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
- pos--;
- if (pos > 0) {
- CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
- while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
- pos--;
- }
- }
- } else {
- CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
- while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
- pos++;
- while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
- pos++;
- }
- return pos;
- }
-
- /**
- * Find the end of the next word in either a forward (delta >= 0) or backwards direction
- * (delta < 0).
- * This is looking for a transition between character classes although there is also some
- * additional movement to transit white space.
- * Used by cursor movement by word commands.
- */
- int Document::NextWordEnd(int pos, int delta) {
- if (delta < 0) {
- if (pos > 0) {
- CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
- if (ccStart != CharClassify::ccSpace) {
- while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
- pos--;
- }
- }
- while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
- pos--;
- }
- }
- } else {
- while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
- pos++;
- }
- if (pos < Length()) {
- CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
- while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
- pos++;
- }
- }
- }
- return pos;
- }
-
- /**
- * Check that the character at the given position is a word or punctuation character and that
- * the previous character is of a different character class.
- */
- bool Document::IsWordStartAt(int pos) const {
- if (pos > 0) {
- CharClassify::cc ccPos = WordCharClass(CharAt(pos));
- return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
- (ccPos != WordCharClass(CharAt(pos - 1)));
- }
- return true;
- }
-
- /**
- * Check that the character at the given position is a word or punctuation character and that
- * the next character is of a different character class.
- */
- bool Document::IsWordEndAt(int pos) const {
- if (pos < Length()) {
- CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
- return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
- (ccPrev != WordCharClass(CharAt(pos)));
- }
- return true;
- }
-
- /**
- * Check that the given range is has transitions between character classes at both
- * ends and where the characters on the inside are word or punctuation characters.
- */
- bool Document::IsWordAt(int start, int end) const {
- return (start < end) && IsWordStartAt(start) && IsWordEndAt(end);
- }
-
- bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
- return (!word && !wordStart) ||
- (word && IsWordAt(pos, pos + length)) ||
- (wordStart && IsWordStartAt(pos));
- }
-
- bool Document::HasCaseFolder(void) const {
- return pcf != 0;
- }
-
- void Document::SetCaseFolder(CaseFolder *pcf_) {
- delete pcf;
- pcf = pcf_;
- }
-
- Document::CharacterExtracted Document::ExtractCharacter(int position) const {
- const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
- if (UTF8IsAscii(leadByte)) {
- // Common case: ASCII character
- return CharacterExtracted(leadByte, 1);
- }
- const int widthCharBytes = UTF8BytesOfLead[leadByte];
- unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
- for (int b=1; b<widthCharBytes; b++)
- charBytes[b] = static_cast<unsigned char>(cb.CharAt(position + b));
- int utf8status = UTF8Classify(charBytes, widthCharBytes);
- if (utf8status & UTF8MaskInvalid) {
- // Treat as invalid and use up just one byte
- return CharacterExtracted(unicodeReplacementChar, 1);
- } else {
- return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
- }
- }
-
- /**
- * Find text in document, supporting both forward and backward
- * searches (just pass minPos > maxPos to do a backward search)
- * Has not been tested with backwards DBCS searches yet.
- */
- long Document::FindText(int minPos, int maxPos, const char *search,
- int flags, int *length) {
- if (*length <= 0)
- return minPos;
- const bool caseSensitive = (flags & SCFIND_MATCHCASE) != 0;
- const bool word = (flags & SCFIND_WHOLEWORD) != 0;
- const bool wordStart = (flags & SCFIND_WORDSTART) != 0;
- const bool regExp = (flags & SCFIND_REGEXP) != 0;
- if (regExp) {
- if (!regex)
- regex = CreateRegexSearch(&charClass);
- return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
- } else {
-
- const bool forward = minPos <= maxPos;
- const int increment = forward ? 1 : -1;
-
- // Range endpoints should not be inside DBCS characters, but just in case, move them.
- const int startPos = MovePositionOutsideChar(minPos, increment, false);
- const int endPos = MovePositionOutsideChar(maxPos, increment, false);
-
- // Compute actual search ranges needed
- const int lengthFind = *length;
-
- //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
- const int limitPos = Platform::Maximum(startPos, endPos);
- int pos = startPos;
- if (!forward) {
- // Back all of a character
- pos = NextPosition(pos, increment);
- }
- if (caseSensitive) {
- const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
- const char charStartSearch = search[0];
- while (forward ? (pos < endSearch) : (pos >= endSearch)) {
- if (CharAt(pos) == charStartSearch) {
- bool found = (pos + lengthFind) <= limitPos;
- for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
- found = CharAt(pos + indexSearch) == search[indexSearch];
- }
- if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
- return pos;
- }
- }
- if (!NextCharacter(pos, increment))
- break;
- }
- } else if (SC_CP_UTF8 == dbcsCodePage) {
- const size_t maxFoldingExpansion = 4;
- std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
- const int lenSearch = static_cast<int>(
- pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
- char bytes[UTF8MaxBytes + 1];
- char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
- while (forward ? (pos < endPos) : (pos >= endPos)) {
- int widthFirstCharacter = 0;
- int posIndexDocument = pos;
- int indexSearch = 0;
- bool characterMatches = true;
- for (;;) {
- const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
- bytes[0] = leadByte;
- int widthChar = 1;
- if (!UTF8IsAscii(leadByte)) {
- const int widthCharBytes = UTF8BytesOfLead[leadByte];
- for (int b=1; b<widthCharBytes; b++) {
- bytes[b] = cb.CharAt(posIndexDocument+b);
- }
- widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
- }
- if (!widthFirstCharacter)
- widthFirstCharacter = widthChar;
- if ((posIndexDocument + widthChar) > limitPos)
- break;
- const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
- folded[lenFlat] = 0;
- // Does folded match the buffer
- characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
- if (!characterMatches)
- break;
- posIndexDocument += widthChar;
- indexSearch += lenFlat;
- if (indexSearch >= lenSearch)
- break;
- }
- if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
- if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
- *length = posIndexDocument - pos;
- return pos;
- }
- }
- if (forward) {
- pos += widthFirstCharacter;
- } else {
- if (!NextCharacter(pos, increment))
- break;
- }
- }
- } else if (dbcsCodePage) {
- const size_t maxBytesCharacter = 2;
- const size_t maxFoldingExpansion = 4;
- std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
- const int lenSearch = static_cast<int>(
- pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
- while (forward ? (pos < endPos) : (pos >= endPos)) {
- int indexDocument = 0;
- int indexSearch = 0;
- bool characterMatches = true;
- while (characterMatches &&
- ((pos + indexDocument) < limitPos) &&
- (indexSearch < lenSearch)) {
- char bytes[maxBytesCharacter + 1];
- bytes[0] = cb.CharAt(pos + indexDocument);
- const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
- if (widthChar == 2)
- bytes[1] = cb.CharAt(pos + indexDocument + 1);
- if ((pos + indexDocument + widthChar) > limitPos)
- break;
- char folded[maxBytesCharacter * maxFoldingExpansion + 1];
- const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
- folded[lenFlat] = 0;
- // Does folded match the buffer
- characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
- indexDocument += widthChar;
- indexSearch += lenFlat;
- }
- if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
- if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
- *length = indexDocument;
- return pos;
- }
- }
- if (!NextCharacter(pos, increment))
- break;
- }
- } else {
- const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
- std::vector<char> searchThing(lengthFind + 1);
- pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
- while (forward ? (pos < endSearch) : (pos >= endSearch)) {
- bool found = (pos + lengthFind) <= limitPos;
- for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
- char ch = CharAt(pos + indexSearch);
- char folded[2];
- pcf->Fold(folded, sizeof(folded), &ch, 1);
- found = folded[0] == searchThing[indexSearch];
- }
- if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
- return pos;
- }
- if (!NextCharacter(pos, increment))
- break;
- }
- }
- }
- //Platform::DebugPrintf("Not found\n");
- return -1;
- }
-
- const char *Document::SubstituteByPosition(const char *text, int *length) {
- if (regex)
- return regex->SubstituteByPosition(this, text, length);
- else
- return 0;
- }
-
- int Document::LinesTotal() const {
- return cb.Lines();
- }
-
- void Document::SetDefaultCharClasses(bool includeWordClass) {
- charClass.SetDefaultCharClasses(includeWordClass);
- }
-
- void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
- charClass.SetCharClasses(chars, newCharClass);
- }
-
- int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
- return charClass.GetCharsOfClass(characterClass, buffer);
- }
-
- void SCI_METHOD Document::StartStyling(int position, char) {
- endStyled = position;
- }
-
- bool SCI_METHOD Document::SetStyleFor(int length, char style) {
- if (enteredStyling != 0) {
- return false;
- } else {
- enteredStyling++;
- int prevEndStyled = endStyled;
- if (cb.SetStyleFor(endStyled, length, style)) {
- DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
- prevEndStyled, length);
- NotifyModified(mh);
- }
- endStyled += length;
- enteredStyling--;
- return true;
- }
- }
-
- bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
- if (enteredStyling != 0) {
- return false;
- } else {
- enteredStyling++;
- bool didChange = false;
- int startMod = 0;
- int endMod = 0;
- for (int iPos = 0; iPos < length; iPos++, endStyled++) {
- PLATFORM_ASSERT(endStyled < Length());
- if (cb.SetStyleAt(endStyled, styles[iPos])) {
- if (!didChange) {
- startMod = endStyled;
- }
- didChange = true;
- endMod = endStyled;
- }
- }
- if (didChange) {
- DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
- startMod, endMod - startMod + 1);
- NotifyModified(mh);
- }
- enteredStyling--;
- return true;
- }
- }
-
- void Document::EnsureStyledTo(int pos) {
- if ((enteredStyling == 0) && (pos > GetEndStyled())) {
- IncrementStyleClock();
- if (pli && !pli->UseContainerLexing()) {
- int lineEndStyled = LineFromPosition(GetEndStyled());
- int endStyledTo = LineStart(lineEndStyled);
- pli->Colourise(endStyledTo, pos);
- } else {
- // Ask the watchers to style, and stop as soon as one responds.
- for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
- (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
- it->watcher->NotifyStyleNeeded(this, it->userData, pos);
- }
- }
- }
- }
-
- void Document::LexerChanged() {
- // Tell the watchers the lexer has changed.
- for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
- it->watcher->NotifyLexerChanged(this, it->userData);
- }
- }
-
- int SCI_METHOD Document::SetLineState(int line, int state) {
- int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
- if (state != statePrevious) {
- DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
- NotifyModified(mh);
- }
- return statePrevious;
- }
-
- int SCI_METHOD Document::GetLineState(int line) const {
- return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
- }
-
- int Document::GetMaxLineState() {
- return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
- }
-
- void SCI_METHOD Document::ChangeLexerState(int start, int end) {
- DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
- NotifyModified(mh);
- }
-
- StyledText Document::MarginStyledText(int line) const {
- LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
- return StyledText(pla->Length(line), pla->Text(line),
- pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
- }
-
- void Document::MarginSetText(int line, const char *text) {
- static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
- DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
- NotifyModified(mh);
- }
-
- void Document::MarginSetStyle(int line, int style) {
- static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
- NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
- }
-
- void Document::MarginSetStyles(int line, const unsigned char *styles) {
- static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
- NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
- }
-
- void Document::MarginClearAll() {
- int maxEditorLine = LinesTotal();
- for (int l=0; l<maxEditorLine; l++)
- MarginSetText(l, 0);
- // Free remaining data
- static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
- }
-
- StyledText Document::AnnotationStyledText(int line) const {
- LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
- return StyledText(pla->Length(line), pla->Text(line),
- pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
- }
-
- void Document::AnnotationSetText(int line, const char *text) {
- if (line >= 0 && line < LinesTotal()) {
- const int linesBefore = AnnotationLines(line);
- static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
- const int linesAfter = AnnotationLines(line);
- DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
- mh.annotationLinesAdded = linesAfter - linesBefore;
- NotifyModified(mh);
- }
- }
-
- void Document::AnnotationSetStyle(int line, int style) {
- static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
- DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
- NotifyModified(mh);
- }
-
- void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
- if (line >= 0 && line < LinesTotal()) {
- static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
- }
- }
-
- int Document::AnnotationLines(int line) const {
- return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
- }
-
- void Document::AnnotationClearAll() {
- int maxEditorLine = LinesTotal();
- for (int l=0; l<maxEditorLine; l++)
- AnnotationSetText(l, 0);
- // Free remaining data
- static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
- }
-
- void Document::IncrementStyleClock() {
- styleClock = (styleClock + 1) % 0x100000;
- }
-
- void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
- if (decorations.FillRange(position, value, fillLength)) {
- DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
- position, fillLength);
- NotifyModified(mh);
- }
- }
-
- bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
- WatcherWithUserData wwud(watcher, userData);
- std::vector<WatcherWithUserData>::iterator it =
- std::find(watchers.begin(), watchers.end(), wwud);
- if (it != watchers.end())
- return false;
- watchers.push_back(wwud);
- return true;
- }
-
- bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
- std::vector<WatcherWithUserData>::iterator it =
- std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
- if (it != watchers.end()) {
- watchers.erase(it);
- return true;
- }
- return false;
- }
-
- void Document::NotifyModifyAttempt() {
- for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
- it->watcher->NotifyModifyAttempt(this, it->userData);
- }
- }
-
- void Document::NotifySavePoint(bool atSavePoint) {
- for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
- it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
- }
- }
-
- void Document::NotifyModified(DocModification mh) {
- if (mh.modificationType & SC_MOD_INSERTTEXT) {
- decorations.InsertSpace(mh.position, mh.length);
- } else if (mh.modificationType & SC_MOD_DELETETEXT) {
- decorations.DeleteRange(mh.position, mh.length);
- }
- for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
- it->watcher->NotifyModified(this, mh, it->userData);
- }
- }
-
- bool Document::IsWordPartSeparator(char ch) const {
- return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
- }
-
- int Document::WordPartLeft(int pos) {
- if (pos > 0) {
- --pos;
- char startChar = cb.CharAt(pos);
- if (IsWordPartSeparator(startChar)) {
- while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
- --pos;
- }
- }
- if (pos > 0) {
- startChar = cb.CharAt(pos);
- --pos;
- if (IsLowerCase(startChar)) {
- while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
- --pos;
- if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
- ++pos;
- } else if (IsUpperCase(startChar)) {
- while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
- --pos;
- if (!IsUpperCase(cb.CharAt(pos)))
- ++pos;
- } else if (IsADigit(startChar)) {
- while (pos > 0 && IsADigit(cb.CharAt(pos)))
- --pos;
- if (!IsADigit(cb.CharAt(pos)))
- ++pos;
- } else if (IsPunctuation(startChar)) {
- while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
- --pos;
- if (!IsPunctuation(cb.CharAt(pos)))
- ++pos;
- } else if (isspacechar(startChar)) {
- while (pos > 0 && isspacechar(cb.CharAt(pos)))
- --pos;
- if (!isspacechar(cb.CharAt(pos)))
- ++pos;
- } else if (!IsASCII(startChar)) {
- while (pos > 0 && !IsASCII(cb.CharAt(pos)))
- --pos;
- if (IsASCII(cb.CharAt(pos)))
- ++pos;
- } else {
- ++pos;
- }
- }
- }
- return pos;
- }
-
- int Document::WordPartRight(int pos) {
- char startChar = cb.CharAt(pos);
- int length = Length();
- if (IsWordPartSeparator(startChar)) {
- while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
- ++pos;
- startChar = cb.CharAt(pos);
- }
- if (!IsASCII(startChar)) {
- while (pos < length && !IsASCII(cb.CharAt(pos)))
- ++pos;
- } else if (IsLowerCase(startChar)) {
- while (pos < length && IsLowerCase(cb.CharAt(pos)))
- ++pos;
- } else if (IsUpperCase(startChar)) {
- if (IsLowerCase(cb.CharAt(pos + 1))) {
- ++pos;
- while (pos < length && IsLowerCase(cb.CharAt(pos)))
- ++pos;
- } else {
- while (pos < length && IsUpperCase(cb.CharAt(pos)))
- ++pos;
- }
- if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
- --pos;
- } else if (IsADigit(startChar)) {
- while (pos < length && IsADigit(cb.CharAt(pos)))
- ++pos;
- } else if (IsPunctuation(startChar)) {
- while (pos < length && IsPunctuation(cb.CharAt(pos)))
- ++pos;
- } else if (isspacechar(startChar)) {
- while (pos < length && isspacechar(cb.CharAt(pos)))
- ++pos;
- } else {
- ++pos;
- }
- return pos;
- }
-
- bool IsLineEndChar(char c) {
- return (c == '\n' || c == '\r');
- }
-
- int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
- int sStart = cb.StyleAt(pos);
- if (delta < 0) {
- while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
- pos--;
- pos++;
- } else {
- while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
- pos++;
- }
- return pos;
- }
-
- static char BraceOpposite(char ch) {
- switch (ch) {
- case '(':
- return ')';
- case ')':
- return '(';
- case '[':
- return ']';
- case ']':
- return '[';
- case '{':
- return '}';
- case '}':
- return '{';
- case '<':
- return '>';
- case '>':
- return '<';
- default:
- return '\0';
- }
- }
-
- // TODO: should be able to extend styled region to find matching brace
- int Document::BraceMatch(int position, int /*maxReStyle*/) {
- char chBrace = CharAt(position);
- char chSeek = BraceOpposite(chBrace);
- if (chSeek == '\0')
- return - 1;
- char styBrace = static_cast<char>(StyleAt(position));
- int direction = -1;
- if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
- direction = 1;
- int depth = 1;
- position = NextPosition(position, direction);
- while ((position >= 0) && (position < Length())) {
- char chAtPos = CharAt(position);
- char styAtPos = static_cast<char>(StyleAt(position));
- if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
- if (chAtPos == chBrace)
- depth++;
- if (chAtPos == chSeek)
- depth--;
- if (depth == 0)
- return position;
- }
- int positionBeforeMove = position;
- position = NextPosition(position, direction);
- if (position == positionBeforeMove)
- break;
- }
- return - 1;
- }
-
- /**
- * Implementation of RegexSearchBase for the default built-in regular expression engine
- */
- class BuiltinRegex : public RegexSearchBase {
- public:
- explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
-
- virtual ~BuiltinRegex() {
- }
-
- virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
- bool caseSensitive, bool word, bool wordStart, int flags,
- int *length);
-
- virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
-
- private:
- RESearch search;
- std::string substituted;
- };
-
- namespace {
-
- /**
- * RESearchRange keeps track of search range.
- */
- class RESearchRange {
- public:
- const Document *doc;
- int increment;
- int startPos;
- int endPos;
- int lineRangeStart;
- int lineRangeEnd;
- int lineRangeBreak;
- RESearchRange(const Document *doc_, int minPos, int maxPos) : doc(doc_) {
- increment = (minPos <= maxPos) ? 1 : -1;
-
- // Range endpoints should not be inside DBCS characters, but just in case, move them.
- startPos = doc->MovePositionOutsideChar(minPos, 1, false);
- endPos = doc->MovePositionOutsideChar(maxPos, 1, false);
-
- lineRangeStart = doc->LineFromPosition(startPos);
- lineRangeEnd = doc->LineFromPosition(endPos);
- if ((increment == 1) &&
- (startPos >= doc->LineEnd(lineRangeStart)) &&
- (lineRangeStart < lineRangeEnd)) {
- // the start position is at end of line or between line end characters.
- lineRangeStart++;
- startPos = doc->LineStart(lineRangeStart);
- } else if ((increment == -1) &&
- (startPos <= doc->LineStart(lineRangeStart)) &&
- (lineRangeStart > lineRangeEnd)) {
- // the start position is at beginning of line.
- lineRangeStart--;
- startPos = doc->LineEnd(lineRangeStart);
- }
- lineRangeBreak = lineRangeEnd + increment;
- }
- Range LineRange(int line) const {
- Range range(doc->LineStart(line), doc->LineEnd(line));
- if (increment == 1) {
- if (line == lineRangeStart)
- range.start = startPos;
- if (line == lineRangeEnd)
- range.end = endPos;
- } else {
- if (line == lineRangeEnd)
- range.start = endPos;
- if (line == lineRangeStart)
- range.end = startPos;
- }
- return range;
- }
- };
-
- // Define a way for the Regular Expression code to access the document
- class DocumentIndexer : public CharacterIndexer {
- Document *pdoc;
- int end;
- public:
- DocumentIndexer(Document *pdoc_, int end_) :
- pdoc(pdoc_), end(end_) {
- }
-
- virtual ~DocumentIndexer() {
- }
-
- virtual char CharAt(int index) {
- if (index < 0 || index >= end)
- return 0;
- else
- return pdoc->CharAt(index);
- }
- };
-
- #ifdef CXX11_REGEX
-
- class ByteIterator : public std::iterator<std::bidirectional_iterator_tag, char> {
- public:
- const Document *doc;
- Position position;
- ByteIterator(const Document *doc_ = 0, Position position_ = 0) : doc(doc_), position(position_) {
- }
- ByteIterator(const ByteIterator &other) {
- doc = other.doc;
- position = other.position;
- }
- ByteIterator &operator=(const ByteIterator &other) {
- if (this != &other) {
- doc = other.doc;
- position = other.position;
- }
- return *this;
- }
- char operator*() const {
- return doc->CharAt(position);
- }
- ByteIterator &operator++() {
- position++;
- return *this;
- }
- ByteIterator operator++(int) {
- ByteIterator retVal(*this);
- position++;
- return retVal;
- }
- ByteIterator &operator--() {
- position--;
- return *this;
- }
- bool operator==(const ByteIterator &other) const {
- return doc == other.doc && position == other.position;
- }
- bool operator!=(const ByteIterator &other) const {
- return doc != other.doc || position != other.position;
- }
- int Pos() const {
- return position;
- }
- int PosRoundUp() const {
- return position;
- }
- };
-
- // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
- // Would be better to use sizeof(wchar_t) or similar to differentiate
- // but easier for now to hard-code platforms.
- // C++11 has char16_t and char32_t but neither Clang nor Visual C++
- // appear to allow specializing basic_regex over these.
-
- #ifdef _WIN32
- #define WCHAR_T_IS_16 1
- #else
- #define WCHAR_T_IS_16 0
- #endif
-
- #if WCHAR_T_IS_16
-
- // On Windows, report non-BMP characters as 2 separate surrogates as that
- // matches wregex since it is based on wchar_t.
- class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
- // These 3 fields determine the iterator position and are used for comparisons
- const Document *doc;
- Position position;
- size_t characterIndex;
- // Remaining fields are derived from the determining fields so are excluded in comparisons
- unsigned int lenBytes;
- size_t lenCharacters;
- wchar_t buffered[2];
- public:
- UTF8Iterator(const Document *doc_ = 0, Position position_ = 0) :
- doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0) {
- buffered[0] = 0;
- buffered[1] = 0;
- if (doc) {
- ReadCharacter();
- }
- }
- UTF8Iterator(const UTF8Iterator &other) {
- doc = other.doc;
- position = other.position;
- characterIndex = other.characterIndex;
- lenBytes = other.lenBytes;
- lenCharacters = other.lenCharacters;
- buffered[0] = other.buffered[0];
- buffered[1] = other.buffered[1];
- }
- UTF8Iterator &operator=(const UTF8Iterator &other) {
- if (this != &other) {
- doc = other.doc;
- position = other.position;
- characterIndex = other.characterIndex;
- lenBytes = other.lenBytes;
- lenCharacters = other.lenCharacters;
- buffered[0] = other.buffered[0];
- buffered[1] = other.buffered[1];
- }
- return *this;
- }
- wchar_t operator*() const {
- assert(lenCharacters != 0);
- return buffered[characterIndex];
- }
- UTF8Iterator &operator++() {
- if ((characterIndex + 1) < (lenCharacters)) {
- characterIndex++;
- } else {
- position += lenBytes;
- ReadCharacter();
- characterIndex = 0;
- }
- return *this;
- }
- UTF8Iterator operator++(int) {
- UTF8Iterator retVal(*this);
- if ((characterIndex + 1) < (lenCharacters)) {
- characterIndex++;
- } else {
- position += lenBytes;
- ReadCharacter();
- characterIndex = 0;
- }
- return retVal;
- }
- UTF8Iterator &operator--() {
- if (characterIndex) {
- characterIndex--;
- } else {
- position = doc->NextPosition(position, -1);
- ReadCharacter();
- characterIndex = lenCharacters - 1;
- }
- return *this;
- }
- bool operator==(const UTF8Iterator &other) const {
- // Only test the determining fields, not the character widths and values derived from this
- return doc == other.doc &&
- position == other.position &&
- characterIndex == other.characterIndex;
- }
- bool operator!=(const UTF8Iterator &other) const {
- // Only test the determining fields, not the character widths and values derived from this
- return doc != other.doc ||
- position != other.position ||
- characterIndex != other.characterIndex;
- }
- int Pos() const {
- return position;
- }
- int PosRoundUp() const {
- if (characterIndex)
- return position + lenBytes; // Force to end of character
- else
- return position;
- }
- private:
- void ReadCharacter() {
- Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
- lenBytes = charExtracted.widthBytes;
- if (charExtracted.character == unicodeReplacementChar) {
- lenCharacters = 1;
- buffered[0] = static_cast<wchar_t>(charExtracted.character);
- } else {
- lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
- }
- }
- };
-
- #else
-
- // On Unix, report non-BMP characters as single characters
-
- class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
- const Document *doc;
- Position position;
- public:
- UTF8Iterator(const Document *doc_=0, Position position_=0) : doc(doc_), position(position_) {
- }
- UTF8Iterator(const UTF8Iterator &other) {
- doc = other.doc;
- position = other.position;
- }
- UTF8Iterator &operator=(const UTF8Iterator &other) {
- if (this != &other) {
- doc = other.doc;
- position = other.position;
- }
- return *this;
- }
- wchar_t operator*() const {
- Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
- return charExtracted.character;
- }
- UTF8Iterator &operator++() {
- position = doc->NextPosition(position, 1);
- return *this;
- }
- UTF8Iterator operator++(int) {
- UTF8Iterator retVal(*this);
- position = doc->NextPosition(position, 1);
- return retVal;
- }
- UTF8Iterator &operator--() {
- position = doc->NextPosition(position, -1);
- return *this;
- }
- bool operator==(const UTF8Iterator &other) const {
- return doc == other.doc && position == other.position;
- }
- bool operator!=(const UTF8Iterator &other) const {
- return doc != other.doc || position != other.position;
- }
- int Pos() const {
- return position;
- }
- int PosRoundUp() const {
- return position;
- }
- };
-
- #endif
-
- std::regex_constants::match_flag_type MatchFlags(const Document *doc, int startPos, int endPos) {
- std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
- if (!doc->IsLineStartPosition(startPos))
- flagsMatch |= std::regex_constants::match_not_bol;
- if (!doc->IsLineEndPosition(endPos))
- flagsMatch |= std::regex_constants::match_not_eol;
- return flagsMatch;
- }
-
- template<typename Iterator, typename Regex>
- bool MatchOnLines(const Document *doc, const Regex ®exp, const RESearchRange &resr, RESearch &search) {
- bool matched = false;
- std::match_results<Iterator> match;
-
- // MSVC and libc++ have problems with ^ and $ matching line ends inside a range
- // If they didn't then the line by line iteration could be removed for the forwards
- // case and replaced with the following 4 lines:
- // Iterator uiStart(doc, startPos);
- // Iterator uiEnd(doc, endPos);
- // flagsMatch = MatchFlags(doc, startPos, endPos);
- // matched = std::regex_search(uiStart, uiEnd, match, regexp, flagsMatch);
-
- // Line by line.
- for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
- const Range lineRange = resr.LineRange(line);
- Iterator itStart(doc, lineRange.start);
- Iterator itEnd(doc, lineRange.end);
- std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end);
- matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
- // Check for the last match on this line.
- if (matched) {
- if (resr.increment == -1) {
- while (matched) {
- Iterator itNext(doc, match[0].second.PosRoundUp());
- flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end);
- std::match_results<Iterator> matchNext;
- matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch);
- if (matched) {
- if (match[0].first == match[0].second) {
- // Empty match means failure so exit
- return false;
- }
- match = matchNext;
- }
- }
- matched = true;
- }
- break;
- }
- }
- if (matched) {
- for (size_t co = 0; co < match.size(); co++) {
- search.bopat[co] = match[co].first.Pos();
- search.eopat[co] = match[co].second.PosRoundUp();
- size_t lenMatch = search.eopat[co] - search.bopat[co];
- search.pat[co].resize(lenMatch);
- for (size_t iPos = 0; iPos < lenMatch; iPos++) {
- search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]);
- }
- }
- }
- return matched;
- }
-
- long Cxx11RegexFindText(Document *doc, int minPos, int maxPos, const char *s,
- bool caseSensitive, int *length, RESearch &search) {
- const RESearchRange resr(doc, minPos, maxPos);
- try {
- //ElapsedTime et;
- std::regex::flag_type flagsRe = std::regex::ECMAScript;
- // Flags that apper to have no effect:
- // | std::regex::collate | std::regex::extended;
- if (!caseSensitive)
- flagsRe = flagsRe | std::regex::icase;
-
- // Clear the RESearch so can fill in matches
- search.Clear();
-
- bool matched = false;
- if (SC_CP_UTF8 == doc->dbcsCodePage) {
- unsigned int lenS = static_cast<unsigned int>(strlen(s));
- std::vector<wchar_t> ws(lenS + 1);
- #if WCHAR_T_IS_16
- size_t outLen = UTF16FromUTF8(s, lenS, &ws[0], lenS);
- #else
- size_t outLen = UTF32FromUTF8(s, lenS, reinterpret_cast<unsigned int *>(&ws[0]), lenS);
- #endif
- ws[outLen] = 0;
- std::wregex regexp;
- #if defined(__APPLE__)
- // Using a UTF-8 locale doesn't change to Unicode over a byte buffer so '.'
- // is one byte not one character.
- // However, on OS X this makes wregex act as Unicode
- std::locale localeU("en_US.UTF-8");
- regexp.imbue(localeU);
- #endif
- regexp.assign(&ws[0], flagsRe);
- matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
-
- } else {
- std::regex regexp;
- regexp.assign(s, flagsRe);
- matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
- }
-
- int posMatch = -1;
- if (matched) {
- posMatch = search.bopat[0];
- *length = search.eopat[0] - search.bopat[0];
- }
- // Example - search in doc/ScintillaHistory.html for
- // [[:upper:]]eta[[:space:]]
- // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
- //double durSearch = et.Duration(true);
- //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
- return posMatch;
- } catch (std::regex_error &) {
- // Failed to create regular expression
- throw RegexError();
- } catch (...) {
- // Failed in some other way
- return -1;
- }
- }
-
- #endif
-
- }
-
- long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
- bool caseSensitive, bool, bool, int flags,
- int *length) {
-
- #ifdef CXX11_REGEX
- if (flags & SCFIND_CXX11REGEX) {
- return Cxx11RegexFindText(doc, minPos, maxPos, s,
- caseSensitive, length, search);
- }
- #endif
-
- const RESearchRange resr(doc, minPos, maxPos);
-
- const bool posix = (flags & SCFIND_POSIX) != 0;
-
- const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
- if (errmsg) {
- return -1;
- }
- // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
- // Replace first '.' with '-' in each property file variable reference:
- // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
- // Replace: $(\1-\2)
- int pos = -1;
- int lenRet = 0;
- const char searchEnd = s[*length - 1];
- const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
- for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
- int startOfLine = doc->LineStart(line);
- int endOfLine = doc->LineEnd(line);
- if (resr.increment == 1) {
- if (line == resr.lineRangeStart) {
- if ((resr.startPos != startOfLine) && (s[0] == '^'))
- continue; // Can't match start of line if start position after start of line
- startOfLine = resr.startPos;
- }
- if (line == resr.lineRangeEnd) {
- if ((resr.endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
- continue; // Can't match end of line if end position before end of line
- endOfLine = resr.endPos;
- }
- } else {
- if (line == resr.lineRangeEnd) {
- if ((resr.endPos != startOfLine) && (s[0] == '^'))
- continue; // Can't match start of line if end position after start of line
- startOfLine = resr.endPos;
- }
- if (line == resr.lineRangeStart) {
- if ((resr.startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
- continue; // Can't match end of line if start position before end of line
- endOfLine = resr.startPos;
- }
- }
-
- DocumentIndexer di(doc, endOfLine);
- int success = search.Execute(di, startOfLine, endOfLine);
- if (success) {
- pos = search.bopat[0];
- // Ensure only whole characters selected
- search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
- lenRet = search.eopat[0] - search.bopat[0];
- // There can be only one start of a line, so no need to look for last match in line
- if ((resr.increment == -1) && (s[0] != '^')) {
- // Check for the last match on this line.
- int repetitions = 1000; // Break out of infinite loop
- while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
- success = search.Execute(di, pos+1, endOfLine);
- if (success) {
- if (search.eopat[0] <= minPos) {
- pos = search.bopat[0];
- lenRet = search.eopat[0] - search.bopat[0];
- } else {
- success = 0;
- }
- }
- }
- }
- break;
- }
- }
- *length = lenRet;
- return pos;
- }
-
- const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
- substituted.clear();
- DocumentIndexer di(doc, doc->Length());
- search.GrabMatches(di);
- for (int j = 0; j < *length; j++) {
- if (text[j] == '\\') {
- if (text[j + 1] >= '0' && text[j + 1] <= '9') {
- unsigned int patNum = text[j + 1] - '0';
- unsigned int len = search.eopat[patNum] - search.bopat[patNum];
- if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur
- substituted.append(search.pat[patNum].c_str(), len);
- j++;
- } else {
- j++;
- switch (text[j]) {
- case 'a':
- substituted.push_back('\a');
- break;
- case 'b':
- substituted.push_back('\b');
- break;
- case 'f':
- substituted.push_back('\f');
- break;
- case 'n':
- substituted.push_back('\n');
- break;
- case 'r':
- substituted.push_back('\r');
- break;
- case 't':
- substituted.push_back('\t');
- break;
- case 'v':
- substituted.push_back('\v');
- break;
- case '\\':
- substituted.push_back('\\');
- break;
- default:
- substituted.push_back('\\');
- j--;
- }
- }
- } else {
- substituted.push_back(text[j]);
- }
- }
- *length = static_cast<int>(substituted.length());
- return substituted.c_str();
- }
-
- #ifndef SCI_OWNREGEX
-
- #ifdef SCI_NAMESPACE
-
- RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
- return new BuiltinRegex(charClassTable);
- }
-
- #else
-
- RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
- return new BuiltinRegex(charClassTable);
- }
-
- #endif
-
- #endif