Skip to content

Commit

Permalink
convert bash's ANSI-C quoted strings to their value
Browse files Browse the repository at this point in the history
  • Loading branch information
verhovsky committed Apr 7, 2021
1 parent 6c60415 commit 73ef3f9
Show file tree
Hide file tree
Showing 7 changed files with 212 additions and 12 deletions.
26 changes: 24 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ $ node example.js --foo=99.3
* default: `true`
* key: `parse-positional-numbers`

Should positional keys that look like numbers be treated as such.
Should positional keys that look like numbers be treated as such?

```console
$ node example.js 99.3
Expand All @@ -262,6 +262,28 @@ $ node example.js 99.3
{ _: ['99.3'] }
```

### parse bash ANSI-C strings

* default: `false`
* key: `parse-bash-ansi-c-strings`

Should arguments that look like [ANSI-C quoted strings](https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html) (a bash-only feature) be treated as such?

_if disabled:_

```console
> example.js $'hello\nworld'
{ _: [''] }
```

_if enabled:_

```console
$ node example.js 99.3
{ _: ['99.3'] }
```


### boolean negation

* default: `true`
Expand All @@ -274,7 +296,7 @@ $ node example.js --no-foo
{ _: [], foo: false }
```

_if disabled:_
_If disabled:_

```console
$ node example.js --no-foo
Expand Down
3 changes: 2 additions & 1 deletion lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { format } from 'util'
import { readFileSync } from 'fs'
import { normalize, resolve } from 'path'
import { ArgsInput, Arguments, Parser, Options, DetailedArguments } from './yargs-parser-types.js'
import { camelCase, decamelize, looksLikeNumber } from './string-utils.js'
import { camelCase, decamelize, looksLikeNumber, parseAnsiCQuotedString } from './string-utils.js'
import { YargsParser } from './yargs-parser.js'

// See https://github.com/yargs/yargs-parser#supported-nodejs-versions for our
Expand Down Expand Up @@ -51,4 +51,5 @@ yargsParser.detailed = function (args: ArgsInput, opts?: Partial<Options>): Deta
yargsParser.camelCase = camelCase
yargsParser.decamelize = decamelize
yargsParser.looksLikeNumber = looksLikeNumber
yargsParser.parseAnsiCQuotedString = parseAnsiCQuotedString
export default yargsParser
55 changes: 55 additions & 0 deletions lib/string-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,58 @@ export function looksLikeNumber (x: null | undefined | number | string): boolean
if (x.length > 1 && x[0] === '0') return false
return /^[-]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(x)
}

// ANSI-C quoted strings are a bash-only feature and have the form $'some text'
// https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html
export function parseAnsiCQuotedString (str: string): string {
function unescapeChar (x: string): string {
switch (x.slice(0, 2)) {
case '\\\\':
return '\\'
case '\\a':
return '\a' // eslint-disable-line
case '\\b':
return '\b'
case '\\e':
return '\u001b'
case '\\E':
return '\u001b'
case '\\f':
return '\f'
case '\\n':
return '\n'
case '\\r':
return '\r'
case '\\t':
return '\t'
case '\\v':
return '\v'
case "\\'":
return "'"
case '\\"':
return '"'
case '\\?':
return '?'
case '\\c':
// NOTE: bash handles all characters but we don't.
if (x.codePointAt(2)! > 1 && x.codePointAt(2)! < 127) {
// NOTE: if this returns a 0x00 (null) character, it will cause bash to
// terminate the string at that character, but we just return the null
// character in the result.
return x[2] === '?' ? '\x7F' : String.fromCodePoint(x[2].toUpperCase().codePointAt(0)! & 0b00011111)
}
throw Error("Unexpected control character in ANSI-C quoted string: '\\u{" + x.codePointAt(2)!.toString(16) + "}'")
case '\\x':
case '\\u':
case '\\U':
// Hexadecimal character literal
// NOTE: unlike bash, this will throw if the the code point is greater than 10FFFF
return String.fromCodePoint(parseInt(x.slice(2), 16))
}
// Octal character literal
return String.fromCodePoint(parseInt(x.slice(1), 8) % 256)
}

const ANSI_BACKSLASHES = /\\(\\|a|b|e|E|f|n|r|t|v|'|"|\?|[0-7]{1,3}|x[0-9A-Fa-f]{1,2}|u[0-9A-Fa-f]{1,4}|U[0-9A-Fa-f]{1,8}|c\\\\|c.)/gs
return str.substring(2, str.length - 1).replace(ANSI_BACKSLASHES, unescapeChar)
}
7 changes: 5 additions & 2 deletions lib/yargs-parser-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,12 @@ export interface Configuration {
'nargs-eats-options': boolean;
/** The prefix to use for negated boolean variables. Default is `'no-'` */
'negation-prefix': string;
/** Should positional values that look like numbers be parsed? Default is `true` */
'parse-positional-numbers': boolean;
/** Should positional values that look ANSI-C strings (a bash-only feature) be parsed? Default is `false` */
'parse-bash-ansi-c-strings': boolean;
/** Should keys that look like numbers be treated as such? Default is `true` */
'parse-numbers': boolean;
/** Should positional values that look like numbers be parsed? Default is `true` */
'parse-positional-numbers': boolean;
/** Should unparsed flags be stored in -- or _? Default is `false` */
'populate--': boolean;
/** Should a placeholder be added for keys not set via the corresponding CLI argument? Default is `false` */
Expand Down Expand Up @@ -149,6 +151,7 @@ export interface Parser {
camelCase(str: string): string;
decamelize(str: string, joinString?: string): string;
looksLikeNumber(x: null | undefined | number | string): boolean;
parseAnsiCQuotedString(str: string): string;
}

export type StringFlag = Dictionary<string[]>;
Expand Down
25 changes: 19 additions & 6 deletions lib/yargs-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,12 @@ import type {
ValueOf,
YargsParserMixin
} from './yargs-parser-types.js'
import { camelCase, decamelize, looksLikeNumber } from './string-utils.js'
import {
camelCase,
decamelize,
looksLikeNumber,
parseAnsiCQuotedString
} from './string-utils.js'

let mixin: YargsParserMixin
export class YargsParser {
Expand Down Expand Up @@ -69,6 +74,7 @@ export class YargsParser {
'negation-prefix': 'no-',
'parse-numbers': true,
'parse-positional-numbers': true,
'parse-bash-ansi-c-strings': false,
'populate--': false,
'set-placeholder-key': false,
'short-option-groups': true,
Expand Down Expand Up @@ -600,11 +606,18 @@ export class YargsParser {

function processValue (key: string, val: any) {
// strings may be quoted, clean this up as we assign values.
if (typeof val === 'string' &&
(val[0] === "'" || val[0] === '"') &&
val[val.length - 1] === val[0]
) {
val = val.substring(1, val.length - 1)
if (typeof val === 'string') {
if ((val[0] === "'" || val[0] === '"') &&
val[val.length - 1] === val[0]
) {
val = val.substring(1, val.length - 1)
} else if (configuration['parse-bash-ansi-c-strings'] && val.slice(0, 2) === "$'" && val[val.length - 1] === "'") {
try {
val = parseAnsiCQuotedString(val)
} catch (err) {
error = err
}
}
}

// handle parsing boolean arguments --foo=true --bar false.
Expand Down
14 changes: 13 additions & 1 deletion test/string-utils.cjs
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
/* global describe, it */

const { strictEqual } = require('assert')
const { camelCase, decamelize, looksLikeNumber } = require('../build/index.cjs')
const {
camelCase,
decamelize,
looksLikeNumber,
parseAnsiCQuotedString
} = require('../build/index.cjs')

describe('string-utils', function () {
describe('camelCase', () => {
Expand Down Expand Up @@ -33,4 +38,11 @@ describe('string-utils', function () {
strictEqual(looksLikeNumber('apple'), false)
})
})

describe('parseAnsiCQuotedString', () => {
it('lets a control code literal eat a backslash in ANSI-C quoted strings', async () => {
const testCase = "$'\\c\\ '"
await check(testCase)
})
})
})
94 changes: 94 additions & 0 deletions test/yargs-parser.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -3589,6 +3589,100 @@ describe('yargs-parser', function () {
})
})

// see: https://github.com/yargs/yargs-parser/issues/346
describe('ANSI-C quoted strings', () => {
it('does not parse ANSI-C quoted strings by default', function () {
const args = parser(["$'\\n'"])
args._[0].should.equal("$'\\n'")
const args2 = parser("--foo $'\\t'")
args2.foo.should.equal("$'\\t'")
})

it('handles bash ANSI-C quoted strings', () => {
const args = parser("--foo $'text with \\n newline'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args.foo.should.equal('text with \n newline')

// Double quotes shouldn't work
const args2 = parser('--foo $"text without \\n newline"', {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args2.foo.should.equal('$"text without \\n newline"')

const characters = '\\\\' + '\\a' + '\\b' + '\\e' + '\\E' + '\\f' + '\\n' + '\\r' + '\\t' + '\\v' + "\\'" + '\\"' + '\\?'
const args3 = parser("--foo $'" + characters + "'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args3.foo.should.equal('\\\a\b\u001b\u001b\f\n\r\t\v\'"?') // eslint-disable-line

const args4 = parser("--foo $'text \\xFFFF with \\xFF hex'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args4.foo.should.equal('text \u00FFFF with \u00FF hex')
const args5 = parser("--foo $'text \\uFFFFFF\\uFFFF with \\uFF hex'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args5.foo.should.equal('text \uFFFFFF\uFFFF with \u00FF hex')
const args6 = parser("--foo $'text \\U10FFFF\\UFFFF with \\U00FF hex'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
const longCodePoint = String.fromCodePoint(0x10FFFF)
args6.foo.should.equal(`text ${longCodePoint}\uFFFF with \u00FF hex`)

const args7 = parser("--foo $'text \\cAB \\cz with \\c12 control \\c011 chars'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args7.foo.should.equal('text \u0001B \u001A with \u00112 control \u001011 chars')

const args8 = parser("--foo $'text \\0 \\001 with \\12 \\123 \\129 octal'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args8.foo.should.equal('text \u0000 \u0001 with \u000A \u0053 \u000A9 octal')
})

it('handles backslashes in ANSI-C quoted strings', () => {
const args = parser("--foo $'\\c\\t'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
// A double backslash in a control code should be treated like a single backslash
const args2 = parser("--foo $'\\c\\\\t'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args.foo.should.equal(args2.foo)
args.foo.should.equal('\x1Ct')
})

it('throws error for unsupported characters in control code escapes', () => {
const args = parser.detailed("--foo $'\\c\u007F'", {
configuration: {
'parse-bash-ansi-c-strings': true
}
})
args.error.message.should.match(/Unexpected control character/)
})
})

// see: https://github.com/yargs/yargs-parser/issues/144
it('number/string types should use default when no right-hand value', () => {
let argv = parser(['--foo'], {
Expand Down

0 comments on commit 73ef3f9

Please sign in to comment.