qTox  Version: nightly | Commit: bc751c8e1cac455f9690654fcfe0f560d2d7dfdd
textformatter.cpp
Go to the documentation of this file.
1 /*
2  Copyright © 2017-2019 by The qTox Project Contributors
3 
4  This file is part of qTox, a Qt-based graphical interface for Tox.
5 
6  qTox is libre software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  qTox is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with qTox. If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 #include "textformatter.h"
21 
22 #include <QRegularExpression>
23 #include <QVector>
24 
25 // clang-format off
26 
27 // Note: escaping of '\' is only needed because QStringLiteral is broken by linebreak
28 static const QString SINGLE_SIGN_PATTERN = QStringLiteral("(?<=^|\\s)"
29  "[%1]"
30  "(?!\\s)"
31  "([^%1\\n]+?)"
32  "(?<!\\s)"
33  "[%1]"
34  "(?=$|\\s)");
35 
36 static const QString SINGLE_SLASH_PATTERN = QStringLiteral("(?<=^|\\s)"
37  "/"
38  "(?!\\s)"
39  "([^/\\n]+?)"
40  "(?<!\\s)"
41  "/"
42  "(?=$|\\s)");
43 
44 static const QString DOUBLE_SIGN_PATTERN = QStringLiteral("(?<=^|\\s)"
45  "[%1]{2}"
46  "(?!\\s)"
47  "([^\\n]+?)"
48  "(?<!\\s)"
49  "[%1]{2}"
50  "(?=$|\\s)");
51 
52 static const QString MULTILINE_CODE = QStringLiteral("(?<=^|\\s)"
53  "```"
54  "(?!`)"
55  "((.|\\n)+?)"
56  "(?<!`)"
57  "```"
58  "(?=$|\\s)");
59 
60 #define REGEXP_WRAPPER_PAIR(pattern, wrapper)\
61 {QRegularExpression(pattern,QRegularExpression::UseUnicodePropertiesOption),QStringLiteral(wrapper)}
62 
63 static const QPair<QRegularExpression, QString> REGEX_TO_WRAPPER[] {
64  REGEXP_WRAPPER_PAIR(SINGLE_SLASH_PATTERN, "<i>%1</i>"),
65  REGEXP_WRAPPER_PAIR(SINGLE_SIGN_PATTERN.arg('*'), "<b>%1</b>"),
66  REGEXP_WRAPPER_PAIR(SINGLE_SIGN_PATTERN.arg('_'), "<u>%1</u>"),
67  REGEXP_WRAPPER_PAIR(SINGLE_SIGN_PATTERN.arg('~'), "<s>%1</s>"),
68  REGEXP_WRAPPER_PAIR(SINGLE_SIGN_PATTERN.arg('`'), "<font color=#595959><code>%1</code></font>"),
69  REGEXP_WRAPPER_PAIR(DOUBLE_SIGN_PATTERN.arg('*'), "<b>%1</b>"),
70  REGEXP_WRAPPER_PAIR(DOUBLE_SIGN_PATTERN.arg('/'), "<i>%1</i>"),
71  REGEXP_WRAPPER_PAIR(DOUBLE_SIGN_PATTERN.arg('_'), "<u>%1</u>"),
72  REGEXP_WRAPPER_PAIR(DOUBLE_SIGN_PATTERN.arg('~'), "<s>%1</s>"),
73  REGEXP_WRAPPER_PAIR(MULTILINE_CODE, "<font color=#595959><code>%1</code></font>"),
74 };
75 
76 #undef REGEXP_WRAPPER_PAIR
77 
78 static const QString HREF_WRAPPER = QStringLiteral(R"(<a href="%1">%1</a>)");
79 static const QString WWW_WRAPPER = QStringLiteral(R"(<a href="http://%1">%1</a>)");
80 
81 static const QVector<QRegularExpression> WWW_WORD_PATTERN = {
82  QRegularExpression(QStringLiteral(R"((?<=^|\s)\S*((www\.)\S+))"))
83 };
84 
85 static const QVector<QRegularExpression> URI_WORD_PATTERNS = {
86  // Note: This does not match only strictly valid URLs, but we broaden search to any string following scheme to
87  // allow UTF-8 "IRI"s instead of ASCII-only URLs
88  QRegularExpression(QStringLiteral(R"((?<=^|\s)\S*((((http[s]?)|ftp)://)\S+))")),
89  QRegularExpression(QStringLiteral(R"((?<=^|\s)\S*((file|smb)://([\S| ]*)))")),
90  QRegularExpression(QStringLiteral(R"((?<=^|\s)\S*(tox:[a-zA-Z\d]{76}))")),
91  QRegularExpression(QStringLiteral(R"((?<=^|\s)\S*(mailto:\S+@\S+\.\S+))")),
92  QRegularExpression(QStringLiteral(R"((?<=^|\s)\S*(magnet:[?]((xt(.\d)?=urn:)|(mt=)|(kt=)|(tr=)|(dn=)|(xl=)|(xs=)|(as=)|(x.))[\S| ]+))")),
93  QRegularExpression(QStringLiteral(R"((?<=^|\s)\S*(gemini://\S+))")),
94 };
95 
96 
97 // clang-format on
98 
99 struct MatchingUri {
100  bool valid{false};
101  int length{0};
102 };
103 
104 // pairs of characters that are ignored when surrounding a URI
105 static const QPair<QString, QString> URI_WRAPPING_CHARS[] = {
106  {QString("("), QString(")")},
107  {QString("["), QString("]")},
108  {QString("&quot;"), QString("&quot;")},
109  {QString("'"), QString("'")}
110 };
111 
112 // characters which are ignored from the end of URI
113 static const QChar URI_ENDING_CHARS[] = {
114  QChar::fromLatin1('?'),
115  QChar::fromLatin1('.'),
116  QChar::fromLatin1('!'),
117  QChar::fromLatin1(':'),
118  QChar::fromLatin1(',')
119 };
120 
126 MatchingUri stripSurroundingChars(const QStringRef wrappedUri, const int startOfBareUri)
127 {
128  bool matchFound;
129  int curValidationStartPos = 0;
130  int curValidationEndPos = wrappedUri.length();
131  do {
132  matchFound = false;
133  for (auto const& surroundChars : URI_WRAPPING_CHARS)
134  {
135  const int openingCharLength = surroundChars.first.length();
136  const int closingCharLength = surroundChars.second.length();
137  if (surroundChars.first == wrappedUri.mid(curValidationStartPos, openingCharLength) &&
138  surroundChars.second == wrappedUri.mid(curValidationEndPos - closingCharLength, closingCharLength)) {
139  curValidationStartPos += openingCharLength;
140  curValidationEndPos -= closingCharLength;
141  matchFound = true;
142  break;
143  }
144  }
145  for (QChar const endChar : URI_ENDING_CHARS) {
146  const int charLength = 1;
147  if (endChar == wrappedUri.at(curValidationEndPos - charLength)) {
148  curValidationEndPos -= charLength;
149  matchFound = true;
150  break;
151  }
152  }
153  } while (matchFound);
154  MatchingUri strippedMatch;
155  if (startOfBareUri != curValidationStartPos) {
156  strippedMatch.valid = false;
157  } else {
158  strippedMatch.valid = true;
159  strippedMatch.length = curValidationEndPos - startOfBareUri;
160  }
161  return strippedMatch;
162 }
163 
172 QString highlight(const QString& message, const QVector<QRegularExpression>& patterns, const QString& wrapper)
173 {
174  QString result = message;
175  for (const QRegularExpression& exp : patterns) {
176  const int startLength = result.length();
177  int offset = 0;
178  QRegularExpressionMatchIterator iter = exp.globalMatch(result);
179  while (iter.hasNext()) {
180  const QRegularExpressionMatch match = iter.next();
181  const int uriWithWrapMatch{0};
182  const int uriWithoutWrapMatch{1};
183  MatchingUri matchUri = stripSurroundingChars(match.capturedRef(uriWithWrapMatch),
184  match.capturedStart(uriWithoutWrapMatch) - match.capturedStart(uriWithWrapMatch));
185  if (!matchUri.valid) {
186  continue;
187  }
188  const QString wrappedURL = wrapper.arg(match.captured(uriWithoutWrapMatch).left(matchUri.length));
189  result.replace(match.capturedStart(uriWithoutWrapMatch) + offset, matchUri.length, wrappedURL);
190  offset = result.length() - startLength;
191  }
192  }
193  return result;
194 }
195 
201 QString highlightURI(const QString& message)
202 {
203  QString result = highlight(message, URI_WORD_PATTERNS, HREF_WRAPPER);
204  result = highlight(result, WWW_WORD_PATTERN, WWW_WRAPPER);
205  return result;
206 }
207 
213 static bool isTagIntersection(const QString& str)
214 {
215  const QRegularExpression TAG_PATTERN("(?<=<)/?[a-zA-Z0-9]+(?=>)");
216 
217  int openingTagCount = 0;
218  int closingTagCount = 0;
219 
220  QRegularExpressionMatchIterator iter = TAG_PATTERN.globalMatch(str);
221  while (iter.hasNext()) {
222  iter.next().captured()[0] == '/' ? ++closingTagCount : ++openingTagCount;
223  }
224  return openingTagCount != closingTagCount;
225 }
226 
234 QString applyMarkdown(const QString& message, bool showFormattingSymbols)
235 {
236  QString result = message;
237  for (const QPair<QRegularExpression, QString>& pair : REGEX_TO_WRAPPER) {
238  QRegularExpressionMatchIterator iter = pair.first.globalMatch(result);
239  int offset = 0;
240  while (iter.hasNext()) {
241  const QRegularExpressionMatch match = iter.next();
242  QString captured = match.captured(!showFormattingSymbols);
243  if (isTagIntersection(captured)) {
244  continue;
245  }
246 
247  const int length = match.capturedLength();
248  const QString wrappedText = pair.second.arg(captured);
249  const int startPos = match.capturedStart() + offset;
250  result.replace(startPos, length, wrappedText);
251  offset += wrappedText.length() - length;
252  }
253  }
254  return result;
255 }
textformatter.h
applyMarkdown
QString applyMarkdown(const QString &message, bool showFormattingSymbols)
HistMessageContentType::message
@ message
REGEXP_WRAPPER_PAIR
#define REGEXP_WRAPPER_PAIR(pattern, wrapper)
Definition: textformatter.cpp:60
highlightURI
QString highlightURI(const QString &message)