我有这样的代码,它将较长的行分解成等长字符串数组--保留单词--它还考虑到像[[u;#fff;]some text]这样的格式,它分割文本,以便每个字符串可以独立地转换为html:
var format_re = /\[\[([!gbiuso]*;[^;\]]*;[^;\]]*(?:;|[^\]()]*);?[^\]]*)\]([^\]]*\\\][^\]]*|[^\]]*|[^\[]*\[[^\]]*)\]?/gi;
var format_begin_re = /(\[\[[!gbiuso]*;[^;]*;[^\]]*\])/i;
var format_last_re = /\[\[[!gbiuso]*;[^;]*;[^\]]*\]?$/i;
$.terminal.split_equal = function(str, length, words) {
var formatting = false;
var in_text = false;
var prev_format = '';
var result = [];
// add format text as 5th paramter to formatting it's used for
// data attribute in format function
var array = str.replace(format_re, function(_, format, text) {
var semicolons = format.match(/;/g).length;
// missing semicolons
if (semicolons == 2) {
semicolons = ';;';
} else if (semicolons == 3) {
semicolons = ';';
} else {
semicolons = '';
}
// return '[[' + format + ']' + text + ']';
// closing braket will break formatting so we need to escape
// those using html entity equvalent
return '[[' + format + semicolons +
text.replace(/\\\]/g, ']').replace(/\n/g, '\\n') + ']' +
text + ']';
}).split(/\n/g);
for (var i = 0, len = array.length; i < len; ++i) {
if (array[i] === '') {
result.push('');
continue;
}
var line = array[i];
var first_index = 0;
var count = 0;
var space = -1;
for (var j=0, jlen=line.length; j<jlen; ++j) {
if (line[j] === '[' && line[j+1] === '[') {
formatting = true;
} else if (formatting && line[j] === ']') {
if (in_text) {
formatting = false;
in_text = false;
} else {
in_text = true;
}
} else if ((formatting && in_text) || !formatting) {
if (line[j] === '&') { // treat entity as one character
var m = line.substring(j).match(/^(&[^;]+;)/);
if (!m) {
// should never happen if used by terminal,
// because it always calls $.terminal.encode
// before this function
throw new Error("Unclosed html entity in line " +
(i+1) + ' at char ' + (j+1));
}
j+=m[1].length-2; // because continue adds 1 to j
// if entity is at the end there is no next loop
// issue #77
if (j === jlen-1) {
result.push(output + m[1]);
}
continue;
} else if (line[j] === ']' && line[j-1] === '\\') {
// escape \] counts as one character
--count;
} else {
++count;
}
}
function is_space() {
return line.substring(j-6, j) == ' ' ||
line.substring(j-1, j) == ' ';
}
if (is_space() && ((formatting && in_text) || !formatting)) {
space = j;
}
if ((count === length || j === jlen-1) &&
((formatting && in_text) || !formatting)) {
var output;
var after = line.substring(space, j+length+1);
var text = $('<span>' + after + '</span>').text();
var can_break = text.match(/\s/);
if (words && space != -1 && j !== jlen-1 && can_break) {
// get text to last space
output = line.substring(first_index, space);
j = space-1;
space = -1;
} else {
output = line.substring(first_index, j+1);
}
if (words) {
output = output.replace(/^( |\s)+|( |\s)+$/g, '');
}
first_index = j+1;
count = 0;
if (prev_format) {
output = prev_format + output;
if (output.match(']')) {
prev_format = '';
}
}
// Fix output if formatting not closed
var matched = output.match(format_re);
if (matched) {
var last = matched[matched.length-1];
if (last[last.length-1] !== ']') {
prev_format = last.match(format_begin_re)[1];
output += ']';
} else if (output.match(format_last_re)) {
var line_len = output.length;
// why this line ???
//var f_len = line_len-last[last.length-1].length;
output = output.replace(format_last_re, '');
prev_format = last.match(format_begin_re)[1];
}
}
result.push(output);
}
}
}
return result;
};它几乎正常工作,但有些线条比它应该喜欢的更短:
is cracker.The term在此小提琴中,当您取消格式设置,选中复选框时,它就会正常工作。我工作了几个小时,不知道为什么这条线更短,任何帮助都会非常感谢。
发布于 2015-12-28 21:19:50
下面是如何修复原始代码:
在第40行之后添加以下内容:
in_text = false;代码使用in_text标志来确定当前位置是否处于常规文本中。但是,当它进入格式化标记区域时,它没有清除标志。这就是问题中用超短线描述的主要问题的原因所在.
将第76/77行的if语句更改为:
if (is_space() && ((formatting && in_text) || !formatting || (line[j] === '[' && line[j+1] === '['))) {这解决了一个较小的问题,在常规文本和格式化文本之间的空格上没有出现换行现象。
这里的工作小提琴:https://jsfiddle.net/2w10xp3m/1/
发布于 2015-12-28 21:16:43
我想我已经用一种简单得多的方法解决了这个问题。首先拆分所有单词,然后重新组装行,同时跟踪当前格式。见JsFiddle。
JavaScript
$.terminal.split_equal = function(str, length, words) {
var result = [],
currentFormat = null,
currentLine = '',
currentLineLengthWithoutFormatting = 0;
// 1. Split words on
words = str.split(/ /g);
// 2. Re-assemble lines while keeping track of current formats
words.forEach(function(word) {
// Keep track of current format
var format = word.match(/^\[\[([^\]]+)\]/g),
wordWithFormatting, wordLength;
if (format !== null && format[0]) {
currentFormat = format[0];
word = word.slice(format[0].length);
}
// Apply current format to each word separatly
wordLength = word.length;
wordWithFormatting = (currentFormat || '') + word;
if (currentFormat) {
if (word.indexOf(']') !== -1) {
wordLength--;
currentFormat = null;
} else {
wordWithFormatting += ']';
}
}
// Assemble line
if (currentLineLengthWithoutFormatting + wordLength <= length) {
// Word still fits on current line
if (currentLineLengthWithoutFormatting > 0) {
currentLine += ' ';
currentLineLengthWithoutFormatting++;
}
} else {
// Need to start new line
result.push(currentLine);
currentLine = '';
currentLineLengthWithoutFormatting = 0;
}
currentLine += wordWithFormatting;
currentLineLengthWithoutFormatting += wordLength;
});
if (currentLineLengthWithoutFormatting > 0)
result.push(currentLine);
return result;
};发布于 2019-07-18 20:51:18
npm包段落建设者将连续的文本分割成所谓的段落,这些段落分布均匀,字数大致相同。这个段落的概念似乎是你所要寻找的。
您可以定义段落的字数。您可以将段落原则扩展到页面,考虑到页面平均包含的字符数和空格大致相同。
此段落生成器节点脚本从连续文本生成段落。它输出一个文本,其中每个段落的大小大致相同,在文本中提供了均匀的段落分布。它不会对"1.2“这样的数字进行拆分。
有一个选项可以定义段落之间的中断字符,或者可以将段落提取到字符串数组中,您可以从中应用html标记<p>。检查其文件以获得进一步的澄清。
https://stackoverflow.com/questions/34474825
复制相似问题