Skip to content

Commit

Permalink
Fix balena push "Segmentation fault" on Windows (replace 'mmmagic' wi…
Browse files Browse the repository at this point in the history
…th 'isBinaryFile')

Connects-to: #1611
Change-type: patch
  • Loading branch information
pdcastro committed Feb 12, 2020
1 parent 69714a6 commit 3be23df
Show file tree
Hide file tree
Showing 6 changed files with 175 additions and 63 deletions.
1 change: 0 additions & 1 deletion automation/build-bin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ async function buildPkg() {
// [platform, [source path], [destination path]]
['*', ['open', 'xdg-open'], ['xdg-open']],
['darwin', ['denymount', 'bin', 'denymount'], ['denymount']],
['win32', ['mmmagic', 'magic', 'magic.mgc'], ['mmmagic', 'magic.mgc']],
];
await Bluebird.map(paths, ([platform, source, dest]) => {
if (platform === '*' || platform === process.platform) {
Expand Down
157 changes: 129 additions & 28 deletions lib/utils/eol-conversion.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,35 +15,19 @@
* limitations under the License.
*/

import { fs } from 'mz';
import Logger = require('./logger');

const globalLogger = Logger.getLogger();

// Define file size threshold (bytes) over which analysis/conversion is not performed.
const LARGE_FILE_THRESHOLD = 10 * 1000 * 1000;

// The list of encodings to convert is intentionally conservative for now
// Note that `convertEolInPlace()` only works with UTF-8 or single-byte encodings
const CONVERTIBLE_ENCODINGS = ['ascii', 'utf-8'];

/**
* Attempt to detect the encoding of a data buffer
* @param data
*/
async function detectEncoding(data: Buffer): Promise<string> {
const mmmagic = await import('mmmagic');
// Instantiate mmmagic for mime encoding analysis
const magic = new mmmagic.Magic(mmmagic.MAGIC_MIME_ENCODING);

// Promisify magic.detect
// For some reason, got 'Illegal Invocation' when using:
// const detectEncoding = promisify(magic.detect);
return new Promise((resolve, reject) => {
magic.detect(data, (err, encoding) => {
if (err) {
return reject(err);
}
// mmmagic reports ascii as 'us-ascii', but node Buffer uses 'ascii'
encoding = encoding === 'us-ascii' ? 'ascii' : encoding;
return resolve(encoding);
});
});
}
// Maximum number of bytes to consider when detecting the file encoding
const DETECT_MAX_BYTES = 1024;

/**
* Convert EOL (CRLF → LF) in place, i.e. modifying the input buffer.
Expand Down Expand Up @@ -90,10 +74,7 @@ export async function readFileWithEolConversion(
filepath: string,
convertEol: boolean,
): Promise<Buffer> {
const { fs } = await import('mz');
const fileBuffer = await fs.readFile(filepath);
const Logger = await import('./logger');
const globalLogger = Logger.getLogger();

// Skip processing of very large files
const fileStats = await fs.stat(filepath);
Expand All @@ -111,7 +92,7 @@ export async function readFileWithEolConversion(
}

// Skip further processing of files that don't contain CRLF
if (!fileBuffer.includes('\r\n', 0, encoding)) {
if (!fileBuffer.includes('\r\n')) {
return fileBuffer;
}

Expand All @@ -136,3 +117,123 @@ export async function readFileWithEolConversion(
return fileBuffer;
}
}

/**
* Attempt to detect the encoding of a data buffer.
* Code copied and modified from the npm package 'isbinaryfile' (MIT licence)
* https://github.com/gjtorikian/isBinaryFile/blob/master/src/index.ts
*
* @returns one of the possible values: '' (empty file), 'utf-8', 'utf-16',
* 'utf-32', 'gb-18030', 'pdf', and 'binary'.
*
* Note: pure ASCII data is identified as 'utf-8' (ASCII is indeed a subset
* of UTF-8).
*
* @param fileBuffer File contents whose encoding should be detected
* @param bytesRead Optional "file size" if smaller than the buffer size
*/
export async function detectEncoding(
fileBuffer: Buffer,
bytesRead?: number,
): Promise<string> {
bytesRead = bytesRead ?? fileBuffer.length;
// empty file
if (bytesRead === 0) {
return '';
}

const totalBytes = Math.min(bytesRead, DETECT_MAX_BYTES);

// UTF-8 BOM
if (
bytesRead >= 3 &&
fileBuffer[0] === 0xef &&
fileBuffer[1] === 0xbb &&
fileBuffer[2] === 0xbf
) {
return 'utf-8';
}

// UTF-32 BOM
if (
bytesRead >= 4 &&
fileBuffer[0] === 0x00 &&
fileBuffer[1] === 0x00 &&
fileBuffer[2] === 0xfe &&
fileBuffer[3] === 0xff
) {
return 'utf-32';
}

// UTF-32 LE BOM
if (
bytesRead >= 4 &&
fileBuffer[0] === 0xff &&
fileBuffer[1] === 0xfe &&
fileBuffer[2] === 0x00 &&
fileBuffer[3] === 0x00
) {
return 'utf-32';
}

// GB BOM (https://en.wikipedia.org/wiki/GB_18030)
if (
bytesRead >= 4 &&
fileBuffer[0] === 0x84 &&
fileBuffer[1] === 0x31 &&
fileBuffer[2] === 0x95 &&
fileBuffer[3] === 0x33
) {
return 'gb-18030';
}

if (totalBytes >= 5 && fileBuffer.slice(0, 5).toString() === '%PDF-') {
/* PDF. This is binary. */
return 'pdf';
}

// UTF-16 BE BOM
if (bytesRead >= 2 && fileBuffer[0] === 0xfe && fileBuffer[1] === 0xff) {
return 'utf-16';
}

// UTF-16 LE BOM
if (bytesRead >= 2 && fileBuffer[0] === 0xff && fileBuffer[1] === 0xfe) {
return 'utf-16';
}

for (let i = 0; i < totalBytes; i++) {
let c = fileBuffer[i];
if (c === 0) {
// NULL byte
return 'binary';
} else if (c === 27) {
// ESC character used in ANSI escape sequences for text color (log files)
continue;
} else if ((c < 7 || c > 14) && (c < 32 || c > 127)) {
// UTF-8 detection
if (c > 193 && c < 224 && i + 1 < totalBytes) {
i++;
c = fileBuffer[i];
if (c > 127 && c < 192) {
continue;
}
} else if (c > 223 && c < 240 && i + 2 < totalBytes) {
i++;
c = fileBuffer[i];
if (
c > 127 &&
c < 192 &&
fileBuffer[i + 1] > 127 &&
fileBuffer[i + 1] < 192
) {
i++;
continue;
}
}
return 'binary';
}
}

return 'utf-8';
}
17 changes: 0 additions & 17 deletions npm-shrinkwrap.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@
"@types/lodash": "4.14.112",
"@types/mixpanel": "2.14.0",
"@types/mkdirp": "0.5.2",
"@types/mmmagic": "0.4.16-alpha",
"@types/mocha": "^5.2.7",
"@types/mz": "0.0.32",
"@types/net-keepalive": "^0.4.0",
Expand Down Expand Up @@ -205,7 +204,6 @@
"minimatch": "^3.0.4",
"mixpanel": "^0.10.3",
"mkdirp": "^0.5.1",
"mmmagic": "^0.5.3",
"moment": "^2.24.0",
"moment-duration-format": "^2.3.2",
"mz": "^2.7.0",
Expand Down
14 changes: 0 additions & 14 deletions patches/mmmagic+0.5.3.patch

This file was deleted.

47 changes: 46 additions & 1 deletion tests/utils/eol-conversion.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,13 @@
*/

import { expect } from 'chai';
import { fs } from 'mz';
import * as path from 'path';

import { convertEolInPlace } from '../../build/utils/eol-conversion';
import {
convertEolInPlace,
detectEncoding,
} from '../../build/utils/eol-conversion';

describe('convertEolInPlace() function', function() {
it('should return expected values', () => {
Expand Down Expand Up @@ -53,3 +58,43 @@ describe('convertEolInPlace() function', function() {
}
});
});

describe('detectEncoding() function', function() {
it('should correctly detect the encoding of a few selected files', async () => {
const sampleBinary = [
'lzma-native/binding-v4.0.6-node-v64-darwin-x64/lzma_native.node',
'net-keepalive/node_modules/ffi-napi/build/Release/ffi_bindings.node',
'net-keepalive/node_modules/ffi-napi/build/Release/nothing.node',
'ext2fs/build/Release/bindings.node',
'drivelist/build/Release/drivelist.node',
'resin-cli-visuals/node_modules/drivelist/build/Release/drivelist.node',
'@balena.io/usb/build/Release/usb_bindings.node',
'xxhash/build/Release/hash.node',
'mountutils/build/Release/MountUtils.node',
'fsevents/build/Release/fse.node',
];
const sampleText = [
'node_modules/.bin/etcher-image-write',
'node_modules/.bin/mocha',
'node_modules/.bin/rimraf',
'node_modules/.bin/gulp',
'node_modules/.bin/prettier',
'node_modules/.bin/coffeelint',
'node_modules/.bin/tsc',
'node_modules/.bin/resin-lint',
'node_modules/.bin/balena-preload',
'node_modules/.bin/catch-uncommitted',
];

for (const fname of sampleBinary) {
const buf = await fs.readFile(path.join('node_modules', fname));
const encoding = await detectEncoding(buf);
expect(encoding).to.equal('binary');
}
for (const fname of sampleText) {
const buf = await fs.readFile(fname);
const encoding = await detectEncoding(buf);
expect(encoding).to.equal('utf-8');
}
});
});

0 comments on commit 3be23df

Please sign in to comment.