stringmagic/man/string_magic.Rd at master · cran/stringmagic

History

1237 lines (1008 loc) · 62.7 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

% Generated by roxygen2: do not edit by hand

% Please edit documentation in R/string_magic_doc.R, R/string_magic_main.R

\name{string_magic}

\alias{string_magic}

\alias{.string_magic}

\alias{sma}

\title{String interpolation with operation chaining}

\usage{

string_magic(

...,

.envir = parent.frame(),

.data = list(),

.sep = "",

.vectorize = FALSE,

.delim = c("{", "}"),

.last = NULL,

.post = NULL,

.nest = FALSE,

.collapse = NULL,

.invisible = FALSE,

.default = NULL,

.trigger = TRUE,

.check = TRUE,

.class = NULL,

.help = NULL,

.namespace = NULL

)

.string_magic(

...,

.envir = parent.frame(),

.data = list(),

.sep = "",

.vectorize = FALSE,

.delim = c("{", "}"),

.collapse = NULL,

.last = NULL,

.nest = FALSE,

.trigger = TRUE,

.namespace = NULL

)

sma(

...,

.envir = parent.frame(),

.data = list(),

.sep = "",

.vectorize = FALSE,

.delim = c("{", "}"),

.last = NULL,

.post = NULL,

.nest = FALSE,

.collapse = NULL,

.invisible = FALSE,

.default = NULL,

.trigger = TRUE,

.check = TRUE,

.class = NULL,

.help = NULL,

.namespace = NULL

)

}

\arguments{

\item{...}{Character scalars that will be collapsed with the argument \code{sep}. Note that

named arguments are used for substitution.

To interpolate, you can

use \code{"{x}"} within each character string to insert the value of \code{x} in the string.

You can add string operations in each \code{"{}"} instance with the syntax \code{"'arg'op ? x"}

(resp. \code{"'arg'op ! x"}) to apply the operation \code{'op'} with the argument \code{'arg'} to \code{x}

(resp. the verbatim of \code{x}). Otherwise, what to say? Ah, nesting is enabled, and since

there's over 50 operators, it's a bit complicated to sort you out in this small space.

Use the argument \code{.help = "keyword"} (or \code{.help = TRUE}) to obtain a selective help

from the main documentation.

Note that in interpolations you have access to the special variables: \code{.now} and \code{.date}

to get the current time; and the special function \code{.now("format")} to

format the time. Ex: \code{.now('\%Y-\%m \%H:\%m')}.}

\item{.envir}{An environment used to evaluate the variables in \code{"{}"}. By default the variables are

evaluated using the environment from where the function is called or using the named

arguments passed to the function.}

\item{.data}{A list used to evaluate the variables in \code{"{}"}. Default is the empty list.

By default the variables are evaluated using the environment from where the

function is called or using the named arguments passed to the function.}

\item{.sep}{Character scalar, default is the empty string \code{""}. It is used to collapse all

the elements in \code{...} before applying any operation.}

\item{.vectorize}{Logical scalar, default is \code{FALSE}. If \code{TRUE}, Further, elements in \code{...} are

NOT collapsed together, but instead vectorised.}

\item{.delim}{Character vector of length 1 or 2. Default is \code{c("{", "}")}. Defines

the opening and the closing delimiters for interpolation.

If of length 1, it must be of the form: 1) the opening delimiter,

2) a single space, 3) the closing delimiter. Ex: \code{".[ ]"} is equivalent to \code{c(".[", "]")}.

The default value is equivalent to \code{"{ }"}.

[ ]: R:\%20

[", "]: R:\%22,\%20\%22}

\item{.last}{Character scalar, a function, or \code{NULL} (default). If provided and character:

it must be an \code{string_magic} chain of operations of the form \code{"'arg1'op1, op2, etc"}. All these operations

are applied just before returning the vector. If a function,

it will be applied to the resulting vector.}

\item{.post}{Function or \code{NULL} (default). If not \code{NULL}, this function will be applied

after all the processing, just before returning the object. This function can have

extra arguments which will be caught directly in the \code{...} argument of \code{string_magic}.

For example if \code{.post = head}, you can directly pass the argument \code{n = 3} to \code{string_magic}'s arguments.}

\item{.nest}{Logical, default is \code{FALSE}. If \code{TRUE}, it will nest the original string within

interpolation delimiters, so that you can apply operations directly on the string. Example:

\code{string_magic("upper ! hello")} returns "upper ! hello", while \code{string_magic("upper ! hello", .nest = TRUE)}

returns \code{"HELLO"}.}

\item{.collapse}{Character scalar, default is \code{NULL}. If provided, the character vector

that should be returned is collapsed with the value of this argument. This leads

to return a string of length 1.}

\item{.invisible}{Logical scalar, default is \code{FALSE}. Whether the object returned should be

invisible (i.e. not printed on the console).}

\item{.default}{Character scalar or \code{NULL} (default). If provided, it must be

a sequence of \code{string_magic} operations. It will be applied as a default to any interpolation.

Ex: if \code{x = 1:2}, then \code{string_magic("x = {x}", .default = "enum")} leads to "x = 1 and 2",

and is equivalent to \code{string_magic("x = {enum?x}")}. Note that this default operations does

not apply to nested expressions. That is \code{string_magic("{!x{1:2}}", .default = "enum")} leads

to \code{c("x1", "x2")} and NOT \code{"x1 and 2"}.}

\item{.trigger}{Logical, default is \code{TRUE}. If \code{FALSE}, this function is not run. Can be

useful in debugging situations where we want conditional evaluations.}

\item{.check}{Logical scalar, default is \code{TRUE}. Whether to enable error-handling (i.e.

human readable error messages).

Without error-handling you can save something of the order of 40us. Useful only

in long loops.}

\item{.class}{Character vector representing the class to give to the object returned.

By default it is \code{NULL}. Note that the class \code{string_magic} has a specific \code{print} method, usually

nicer for small vectors (it \code{\link[base:cat]{base::cat()}}s the elements).}

\item{.help}{Character scalar or \code{TRUE}, default is \code{NULL}. This argument

is used to generate a dynamic help on the console. If \code{TRUE}, the user can select which

topic to read from the main documentation, with the possibility to search for keywords and

navigate the help pages. If a character scalar, then a regex search is perfomed on the main

documentation and any section containining a match is displayed. The user can easily

navigate across matches.}

\item{.namespace}{Character scalar or \code{NULL} (default). \strong{Only useful for package developers.}

As a regular end-user you shouldn't care! If your package uses \code{string_magic}, you should care.

It is useful \strong{only} if your package uses 'custom' \code{string_magic} operations, set with

\code{\link[=string_magic_register_fun]{string_magic_register_fun()}} or \code{\link[=string_magic_register_ops]{string_magic_register_ops()}}.}

}

\value{

It returns a character vector whose length depends on the elements and operations in the interpolations.

}

\description{

This is firstly a string interpolation tool. On top of this it can apply, and chain, over 50

basic string operations to the interpolated variables. Advanced support for pluralization.

}

\details{

There are over 50 basic string operations, it supports pluralization, string operations can be

nested, operations can be applied group-wise or conditionally and

operators have sensible defaults.

You can also declare your own operations with \code{\link[=string_magic_register_fun]{string_magic_register_fun()}} or \code{\link[=string_magic_register_ops]{string_magic_register_ops()}}.

They will be seamlessly integrated to \code{string_magic}.

The function \code{.string_magic} (prefixed with a dot) is a leaner version of the function \code{string_magic}.

It does the same operations but with the following differences:

\itemize{

\item there is no error handling: meaning that the error messages, if any, will be poor and

hard to understand

\item default options are not applied: hence the user must always explicitly provide the arguments

}

This leads to a faster processing time (of about 50 microseconds) at the cost of user experience.

If you want to change the default values of \code{string_magic} (like changing the delimiter), use

the function \code{\link[=string_magic_alias]{string_magic_alias()}}.

Use the argument \code{.help} to which

you can pass keywords or regular expressions and fecth select pieces from the main documentation.

}

\section{Functions}{

\itemize{

\item \code{string_magic()}: String interpolation with operation chaining

\item \code{.string_magic()}: A simpler version of \code{string_magic} without any error handling to save a few micro seconds

\item \code{sma()}: Alias to \code{string_magic}

}}

\section{Interpolation and string operations}{

Principle:

To interpolate a variable, say \code{x}, simply use \code{{x}}. For example \verb{x = "world"; string_magic("hello \{x\}")} leads

to "hello world".

To any interpolation you can add operations. Taking the previous example, say we want to display

"hello W O R L D". This means upper casing all letters of the interpolated variable and adding a space between

each of them. Do you think we can do that? Of course yes:

\code{string_magic("hello {upper, ''s, c ? x}")}. And that's it.

Now let's explain what happened. Within the \code{{}} \emph{box}, we first write a set of

operations, here "upper, ''s, c", then add "?" and finally write

the variable to interpolate, "x". The operations (explained in more details

below) are \code{upper}, upper-casing all letters, ''s: splitting

with the empty string, 'c': concatenating with spaces the vector of string that was just split.

The question mark means that the expression coming after it is to be evaluated

(this is opposed to the exclamation mark presented next).

The syntax is always the same: \code{{operations ? expression}}, where the operations section

is a \emph{comma separated} list of operations.

These operations are of the form \verb{'arg'op}, with \code{arg} the argument to the operator

code \code{op}. These operations are performed sequantially from left to right.

Some operations, like \code{upper}, accept options. You attach options to an operation

with a dot followed by the option name. Formally: \code{op.option1.option2}, etc.

Example: \verb{x = "hi there. what's up? fine." ; string_magic("He said: \{upper.sentence, Q ? x\}")}.

Leads to: \verb{He said: "Hi there. What's up? Fine."}.

Both operators and options are partially matched. So \code{string_magic("He said: {up.s, Q ? x}")} would

also work.

}

\section{Verbatim interpolation and nesting}{

Principle:

Instead of interpolating a variable, say \code{x}, with \code{{x}}, you can use an exclamation

mark to trigger varbatim evaluation.

For example \code{string_magic("hello {!x}")} would lead to "hello x". It's a

bit disappointing, right? What's the point of doing that? Wait until the next two paragraphs.

Verbatim evaluation is a powerful way to apply operations to plain text. For example:

\code{string_magic("hello {upper, ''s, c ! world}")} leads to "hello W O R L D".

(A note in passing. The spaces surrounding the exclamation mark are non necessary,

but when one space is present on both sides of the \code{!}, then the verbatim

expression only begins after it. Ex: \code{"{upper! hi}"} leads to " HI" while \code{"{upper ! hi}"}

leads to "HI" and \code{"{upper ! hi}"} leads to " HI".)

The second advantage of verbatim evaluations is \emph{nesting}. Anything in a verbatim

expression is evaluated with the function \code{string_magic}.

This means that any \emph{box} will be evaluated as previously described. Let's

give an example. You want to write the expression of a polynomial of order n: a + bx + cx^2 + etc.

You can do that with nesting. Assume we have \code{n = 2}.

Then \code{string_magic("poly({n}): {' + 'c ! {letters[1 + 0:n]}x^{0:n}}")} leads to

"poly(2): ax^0 + bx^1 + cx^2".

How does it work? The verbatim expression (the one following the exclamation mark),

here \code{"{letters[1 + 0:n]}x^{0:n}"}, is evaluated with \code{string_magic}.

\code{string_magic("{letters[1 + 0:n]}x^{0:n}")} leads to the vector c("ax^0", "bx^1", "cx^2").

The operation \verb{' + 'c} then concatenates (or collapses) that vector with ' + '.

This value is then appended to the previous string.

We could refine by adding a cleaning operation in which we replace "x^0" and "^1"

by the empty string. Let's do it:

\code{string_magic("poly({n}): {' + 'c, 'x\\\\^0|\\\\^1'r ! {letters[1 + 0:n]}x^{0:n}}")} leads to

"poly(2): a + bx + cx^2", what we wanted.

You can try to write a function to express the polynomial as before: although it is

a simple task, my guess is that it will require more typing.

}

\section{Operations}{

General syntax:

As seen in the previous sections, within a \emph{box} (i.e. \code{"{}"}), multiple operations

can be performed.

We can do so by stacking the operations codes and in a comma separated enumeration.

Operations can have arguments, and operations can also have options. The general

syntax, with argument and options, is:

\verb{\{'arg1'op1.optionA.optionB, arg2 op2.optionC, }arg3\verb{op3, 51op4 ? x\}}

The argument can appear in four forms: a) inside single or double quotes just

before the operation name (\code{arg1} above),

b) verbatim, separated with a space, just before the operation name (\code{arg2} above),

c) inside bactick quotes the argument is evaluated from the environment (\code{arg3} above),

or d) when the argument is an integer it can be juxtaposed to the opeation name (like in \code{op4} above).

The options are always dot separated and attached to the operation name, they are

specific to each operation.

Both the operation name and the option names are partially matched.

}

\section{Basic string operations}{

This section describes some of the most common string operations: extracting, replacing, collapsing, splitting, etc.

These functions accept generic flags ("ignore", "fixed", "word") in their patterns (syntax: "flags/pattern").

Please see the dedicated section for more information on flags.

\itemize{

\item s, split, S, Split: splits the string according to a pattern.

The operations have different defaults: \code{' '}

for \code{s} and 'split', and \code{',[ \\t\\n]*'} for \code{S} and 'Split' (i.e. comma separation).

Ex.1: \code{string_magic("{S ! romeo, juliet}")} leads to the vector c("romeo", "juliet").

Ex.2: \code{string_magic("{'f/+'s, '-'c ! 5 + 2} = 3")} leads to "5 - 2 = 3" (note the flag "fixed" in \code{s}'s pattern).

\item c, C: to concatenate multiple strings into a single one. The two operations are

identical, only their default change. c: default is \code{' '}, C: default is \code{', | and '}.

The syntax of the argument is 's1' or 's1|s2'. s1 is the string used to concatenate

(think \code{paste(x, collapse = s1)}). In arguments of the form \code{'s1|s2'}, \code{s2} will be used to concatenate the last two elements.

Ex.1: \verb{x = 1:4; string_magic("Et \{' et 'c ? x\}!")} leads to "Et 1 et 2 et 3 et 4!".

Ex.2: \code{string_magic("Choose: {', | or 'c ? 2:4}?")} leads to "Choose: 2, 3 or 4?".

\item x, X: extracts patterns from a string. Both have the same default: \code{'[[:alnum:]]+'}.

\code{x} extracts the first match while \code{X} extracts \strong{all} the matches.

Ex.1: \verb{x = c("6 feet under", "mahogany") ; string_magic("\{'\\\\w\{3\}'x ? x\}")} leads to the vector c("fee", "mah").

Ex2.: \verb{x = c("6 feet under", "mahogany") ; string_magic("\{'\\\\w\{3\}'X ? x\}")} leads to the

vector c("fee", "und", "mah", "oga").

\item extract: extracts multiple patterns from a string, this is an alias to the operation \code{X} described above.

Use the option "first" to extract only the first match for each string (behavior becomes like \code{x}).

Ex: \verb{x = c("margo: 32, 1m75", "luke doe: 27, 1m71") ; string_magic("\{'^\\\\w+'extract ? x\} is \{'\\\\d+'extract.first ? x\}")}

leads to c("margo is 32", "luke is 27").

\item r, R: replacement within a string. The two operations are identical and have no default.

The syntax is \code{'old'} or \code{'old => new'} with \code{'old'} the pattern to find and \code{new} the replacement. If \code{new} is missing, it is

considered the empty string. This operation also accepts the flag "total" which instruct to

replace the fulll string in case the pattern is found.

Ex.1: \code{string_magic("{'e'r ! Where is the letter e?}")} leads to "Whr is th lttr ?".

Ex.2: \code{string_magic("{'(?<!\\\\b)e => a'R ! Where is the letter e?}")} leads to "Whara is tha lattar e?".

Ex.3: \code{string_magic("{'t/e => here'r ! Where is the letter e?}")} leads to "here".

\item clean: replacement with a string. Similar to the operation \code{r}, except that here the comma is

a pattern separator, see detailed explanations in \code{\link[=string_clean]{string_clean()}}. Ex: \code{string_magic("{'f/[, ]'clean ! x[a]}")}

leads to "xa".

\item get: restricts the string only to values respecting a pattern. This operation has no default.

Accepts the options "equal" and "in".

By default it uses the same syntax as \code{\link[=string_get]{string_get()}} so that you can use regex flags and

include logical operations with \code{' & '} and \code{' | '} to detect patterns.

If the option "equal" is used, a simple string equality with the argument is tested (hence

no flags are accepted). If the option "in" is used, the argument is first split with respect to commas

and then set inclusion is tested.

Example: \verb{x = row.names(mtcars) ; string_magic("Mercedes models: \{'Merc & [[:alpha:]]$'get, '^.+ 'r, C ? x\}")}

leads to "Mercedes models: 240D, 280C, 450SE, 450SL and 450SLC".

\item is: detects if a pattern is present in a string, returns a logical vector. This operation has no default.

Accepts the options "equal" and "in".

By default it uses the same syntax as \code{\link[=string_is]{string_is()}} so that you can use regex flags and

include logical operations with \code{' & '} and \code{' | '} to detect patterns.

If the option "equal" is used, a simple string equality with the argument is tested (hence

no flags are accepted). If the option "in" is used, the argument is first split with respect to commas

and then set inclusion is tested.

Mostly useful as the final operation in a \code{\link[=string_ops]{string_ops()}} call.

Example: \verb{x = c("Mark", "Lucas") ; string_magic("Mark? \{'i/mark'is, C ? x\}")} leads to "Mark? TRUE and FALSE".

\item which: returns the index of string containing a specified pattern. With no default, can be applied

to a logical vector directly.

By default it uses the same syntax as string_which() so that you can use regex flags and

include logical operations with \code{' & '} and \code{' | '} to detect patterns.

If the option "equal" is used, a simple string equality with the argument is tested (hence

no flags are accepted). If the option "in" is used, the argument is first split with respect to commas

and then set inclusion is tested.

Mostly useful as the final operation in a \code{\link[=string_ops]{string_ops()}} call.

Ex.1: \verb{x = c("Mark", "Lucas") ; string_magic("Mark is number \{'i/mark'which ? x\}.")} leads to

"Mark is number 1.".

}

\section{Operations changing t 527D he length or the order}{

\itemize{

\item first: keeps only the first \code{n} elements. Example: \code{string_magic("First 3 numbers: {3 first, C ? mtcars$mpg}.")}

leads to "First 3 numbers: 21, 21 and 22.8.". Negative numbers as argument remove the

first \code{n} values. You can add a second argument in the form \verb{'n1|n2'first} in which case the first \code{n1} and last

\code{n2} values are kept; \code{n1} and \code{n2} must be positive numbers.

\item K, Ko, KO: keeps only the first \code{n} elements; has more options than \code{first}. The syntax is \verb{'n'K},

\verb{'n|s'K}, \verb{'n||s'K}. The values Ko and KO only accept the two first syntax (with \code{n} only).

\code{n} provides the number of elements to keep. If \code{s} is provided and the number of

elements are greater than \code{n}, then in 'n|s' the string \code{s} is added at the end, and

if 'n||s' the string s replaces the nth element.

The string \code{s} accepts specials values:

\itemize{

\item \verb{:n:} or \verb{:N:} which gives the total number of items in digits or letters (N)

\item \verb{:rest:} or \verb{:REST:} which gives the number of elements that have been truncated in digits or letters (REST)

Ex: \code{string_magic("{'3|:rest: others'K ? 1:200}")} leads to the vector \code{c("1", "2", "3", "197 others")}.

\item The operator 'n'Ko is like \verb{'n||:rest: others'K} and 'n'KO is like \verb{'n||:REST: others'K}.

}

\item last: keeps only the last \code{n} elements. Example: \code{string_magic("Last 3 numbers: {3 last, C ? mtcars$mpg}.")}

leads to "Last 3 numbers: 19.7, 15 and 21.4.". Negative numbers as argument remove the

last \code{n} values.

\item sort: sorts the vector in increasing order. Accepts optional arguments and the option "num".

Example: \verb{x = c("sort", "me") ; string_magic("\{sort, c ? x\}")} leads to "me sort".

If an argument is provided, it must be a regex pattern that will be applied to

the vector using \code{\link[=string_clean]{string_clean()}}. The sorting will be applied to the modified version of the vector

and the original vector will be ordered according to this sorting.

Ex: \code{x = c("Jon Snow", "Khal Drogo")}; \code{string_magic("{'.+ 'sort, C?x}")} leads to

"Khal Drogo and Jon Snow". The option "num" sorts over a numeric version

(with silent conversion) of the vector and reorders the original vector accordingly.

Values which could not be converted are last.

\strong{Important note}: the sorting operation is applied before any character conversion.

If previous operations were applied, it is likely that numeric data were transformed to character.

Note the difference: \verb{x = c(20, 100, 10); string_magic("\{sort, ' + 'c ? x\}")} leads to "10 + 20 + 100"

while \code{string_magic("{n, sort, ' + 'c ? x}")} leads to "10 + 100 + 20" because the operation "n"

first transformed the numeric vector into character.

\item dsort: sorts the vector in decreasing order. It accepts an optional argument and

the option "num". Example: \code{string_magic("5 = {dsort, ' + 'c ? 2:3}")}

leads to "5 = 3 + 2". See the operation "sort" for a description of the argument and the option.

\item rev: reverses the vector. Example: \code{string_magic("{rev, ''c ? 1:3}")} leads to "321".

\item unik: makes the string vector unique. Example: \code{string_magic("Iris species: {unik, C ? iris$Species}.")}

leads to "Iris species: setosa, versicolor and virginica.".

\item table: computes the frequency of each element and attaches each element to its frequency.

Accepts an argument which must be a character string representing a \code{string_magic} interpolation

with the following variables: \code{x} (the element), \code{n} (its count) and \code{s} (its share). The default is \code{'{x} ({n ? n})'}. By default the resulting string vector is sorted by decreasing frequency.

You can change how the vector is sorted with five options: \code{sort} (sorts on the elements),

\code{dsort} (decreasing sort), \code{fsort} (sorts on frequency),

\code{dfsort} (decreasing sort on freq. -- default),

\code{nosort} (keeps the order of the first elements). Note that you can combine several sorts

(to resolve the ties of elements with same frequencies).

Example: \code{string_magic("Freq. of months: {'{x} ({n})'table, enum ? month.name[airquality$Month]}.")}

\item each: repeats each element of the vector \code{n} times. Option "c" then collapses the full vector

with the empty string as a separator. Ex.1: \code{string_magic("{/x, y}{2 each ? 1:2}")} leads to the

vector \code{c("x1", "y1", "x2", "y2")}. Ex.2: \code{string_magic("Large number: 1{5 each.c ! 0}")} leads to

"Large number: 100000".

\item times: repeats the vector sequence \code{n} times. Option "c" then collapses the full vector

with the empty string as a separator. Example: \code{string_magic("What{6 times.c ! ?}")} leads to "What??????".

\item rm: removes elements from the vector. Options: "empty", "blank", "noalpha", "noalnum", "all".

The \emph{optional} argument represents the pattern used to detect strings to be deleted.

Ex.1: \code{x = c("Luke", "Charles")}; \code{string_magic("{'i/lu'rm ? x}")} leads to "charles". By default it removes

empty strings. Option "blank" removes strings containing only blank characters (spaces, tab, newline).

Option "noalpha" removes strings not containing letters. Option "noalnum" removes strings not

containing alpha numeric characters. Option "all" removes all strings (useful in conditions, see

the dedicated section). If an argument is provided, only the options "empty" and "blank" are available.

Ex.2: \code{x = c("I want to enter.", "Age?", "21")}; \code{string_magic("Nightclub conversation: {rm.noalpha, c ! - {x}}")}

leads to "Nightclub conversation: - I want to enter. - Age?"

\item nuke: removes all elements, equivalent to \code{rm.all} but possibly more explicit (not sure).

Useful in conditions, see the dedicated section.

Example: \verb{x = c(5, 7, 453, 647); string_magic("Small numbers only: \{if(.>20 ; nuke), C ? x\}")} leads

to "Small numbers only: 5 and 7";

\item insert: inserts a new element to the vector. Options: "right" and "both". Option "right" adds

the new element to the right. Option "both" inserts the new element on the two sides of the vector.

Example: \code{string_magic("{'3'insert.right, ' + 'c ? 1:2}")} leads to "1 + 2 + 3".

\item \code{dp} or \code{deparse}: Deparses an object and keeps only the first characters of

the deparsed string. Accepts a number as argument. In that case only the first \code{n}

characters are kept. Accepts option \code{long}: in that case all the

lines of the deparsed object are first collapsed.

Example: \verb{fml = y ~ x1 + x2; string_magic("The estimated model is \{dp ? fml\}.")}

}

\section{Formatting operations}{

\itemize{

\item lower: lower cases the full string.

\item upper: upper cases the full string. Options: "first" and "sentence".

Option "first" upper cases only the first character. Option "sentence"

upper cases the first letter after punctuation.

Ex: \verb{x = "hi. how are you? fine." ; string_magic("\{upper.sentence ? x\}")} leads

to "Hi. How are you? Fine.".

\item title: applies a title case to the string. Options: "force" and "ignore".

Option "force" first puts everything to lowercase before applying the title case.

Option "ignore" ignores a few small prepositions ("a", "the", "of", etc).

Ex: \verb{x = "bryan is in the KITCHEN" ; string_magic("\{title.force.ignore ? x\}")} leads to "Bryan Is in the Kitchen".

\item ws: normalizes whitespaces (WS). It trims the whitespaces on the edges and transforms any succession

of whitespaces into a single one. Can also be used to further clean the string with its options.

Options: "punct", "digit", "isolated". Option "punct" cleans the punctuation. Option "digit" cleans digits.

Option "isolated" cleans isolated letters. WS normalization always come after any of these options.

\strong{Important note:} punctuation (or digits) are replaced with WS and \strong{not}

the empty string. This means that \code{string_magic("ws.punct ! Meg's car")} will become "Meg s car".

\item trimws: trims the white spaces on both ends of the strings.

\item q, Q, bq: to add quotes to the strings. q: single quotes, Q: double quotes, bq:

back quotes. \verb{x = c("Mark", "Pam"); string_magic("Hello \{q, C ? x\}!")} leads to "Hello 'Mark' and 'Pam'!".

\item format, Format: applies the base R's function \code{\link[base:format]{base::format()}} to the string.

By default, the values are left aligned, \emph{even numbers} (differently from \code{\link[base:format]{base::format()}}'s behavior).

The upper case command (\code{Format}) applies right alignment. Options: "0", "zero", "left", "right", "center".

Options "0" or "zero" fills the blanks with 0s: useful to format numbers. Option "right" right aligns,

and "center" centers the strings. Default is left alignment.

Ex: \verb{x = c(1, 12345); string_magic("left: \{format.0, q, C ? x\}, right: \{Format, q, C ? x\}")}

leads to "left: '000001' and '12,345', right: ' 1' and '12,345'".

\item \%: applies \code{\link[base:sprintf]{base::sprintf()}} formatting. The syntax is 'arg'\% with arg an sprintf formatting,

or directly the sprint formatting, e.g. \verb{\% 5s}. Example: \code{string_magic("pi = {\%.3f ? pi}")} leads

to "pi = 3.142".

\item stopwords: removes basic English stopwords (the snowball list is used).

The stopwords are replaced with an empty space but the left and right WS are

untouched. So WS normalization may be needed (see operation \code{ws}).

\verb{x = c("He is tall", "He isn't young"); string_magic("Is he \{stop, ws, C ? x\}?")} leads to "Is he tall and young?".

\item ascii: turns all letters into ASCII with transliteration. Failed translations

are transformed into question marks. Options: "silent", "utf8". By default, if some conversion fails

a warning is prompted. Option "silent" disables the warning in case of failed conversion. The conversion

is done with \code{\link[base:iconv]{base::iconv()}}, option "utf8" indicates that the source endocing is UTF-8, can be useful

in some cases.

\item round, r0 to r6: formats numbers by rounding at a given level. Options: \code{0} to \code{9}, \code{int}, \code{nocomma}, \code{s0}-\code{s9}.

Option \code{0} to \code{9} controls the number of digits to round at.

Option \code{int} is whether to preserve integers from formattting.

Option \code{nocomma} conrtols whether to drop the comma separating the thousands.

\code{s0}-\code{s9} also keeps a given number of significant digits.

\item signif, s0 to s6: formats numbers by displaying a certain number of significant digits. Options: \code{0} to \code{9}, \code{int}, \code{nocomma}, \code{r0}-\code{r9}.

Option \code{0} to \code{9} controls the number of significant digits to display.

Option \code{int} is whether to preserve integers from formattting.

Option \code{nocomma} conrtols whether to drop the comma separating the thousands.

\code{r0}-\code{r9} also rounds at a given number of decimals.

\item n: formats integers by adding a comma to separate thousands. Options: "letter", "upper", "0", "zero".

The option "letter" writes the number in letters (large numbers keep their numeric format). The option

"upper" is like the option "letter" but uppercases the first letter. Options "0" or "zero" left pads

numeric vectors with 0s. Ex.1: \verb{x = 5; string_magic("He's \{N ? x\} years old.")} leads to "He's five years old.".

Ex.2: \verb{x = c(5, 12, 52123); string_magic("She owes \{n.0, '$'paste, C ? x\}.")} leads to

"She owes $5, $12 and $52,123.".

\item N: same as \code{n} but automatically adds the option "letter".

\item nth: when applied to a number, these operators write them as a rank. Options: "letter",

"upper", "compact".

Ex.1: \verb{n = c(3, 7); string_magic("They finished \{nth, enum ? n\}!")} leads to "They finished 3rd and 7th!".

Option "letter" tries to write the numbers in letters, but note that it stops at 20. Option "upper"

is the same as "letter" but uppercases the first letter. Option "compact" aggregates

consecutive sequences in the form "start_n_th to end_n_th".

Ex.2: \code{string_magic("They arrived {nth.compact ? 5:20}.")} leads to "They arrived 5th to 20th.".

Nth: same as \code{nth}, but automatically adds the option "letter". Example:

\verb{n = c(3, 7); string_magic("They finished \{Nth, enum ? n\}!")} leads to "They finished third and seventh!".

\item ntimes: write numbers in the form \code{n} times. Options: "letter", "upper". Option

"letter" writes the number in letters (up to 100). Option "upper" does the same as "letter"

and uppercases the first letter. Example: \code{string_magic("They lost {C ! {ntimes ? c(1, 12)} against {S!Real, Barcelona}}.")}

leads to "They lost once against Real and 12 times against Barcelona.".

\item Ntimes: same as \code{ntimes} but automatically adds the option "letter".

Example: \verb{x = 5; string_magic("This paper was rejected \{Ntimes ? x\}...")} leads to

"This paper was rejected five times...".

\item firstchar, lastchar: to select the first/last characters of each element.

Ex: \code{string_magic("{19 firstchar, 9 lastchar ! This is a very long sentence}")} leads to "very long".

Negative numbers remove the first/last characters.

\item k: to keep only the first n characters (like \code{firstchar} but with more options). The

argument can be of the form \verb{'n'k}, \verb{'n|s'k} or \verb{'n||s'k} with \code{n} a number and \code{s} a string.

\code{n} provides the number of characters to keep. Optionnaly, only for strings whose

length is greater than \code{n}, after truncation, the string \code{s} can be appended at the end.

The difference between 'n|s' and 'n||s' is that in the second case the strings

will always be of maximum size \code{n}, while in the first case they can be of length \code{n + nchar(s)}.

Ex: \code{string_magic("{4k ! long sentence}")} leads to "long", \code{string_magic("{'4|..'k ! long sentence}") }

leads to "long..", \code{string_magic("{'4||..'k ! long sentence}")} leads to "lo..".

\item fill, align (alias), width (alias): fills the character strings up to a size in order

to fit a given width. Options: "left", "right", "center".

Accepts arguments of the form \code{'n'} or \code{'n|s'}, with \code{n} a number and \code{s} a symbol.

Default is left-alignment of the strings.

Option "right" right aligns and "center" centers the strings. When using \code{'n|s'}, the symbol \code{s}

is used for the filling. By default if no argument is provided, the

maximum size of the character string is used. See help for \code{\link[=string_fill]{string_fill()}} for more information.

Ex.1: \code{string_magic("Numbers: {'5|0'fill.right, C ? c(1, 55)}")} leads to "Numbers: 00001 and 00055".

\item paste, append: pastes some character to all elements of the string. This operation has no default.

Options: "left", "both", "right", "front", "back", "delete". By default, a string is pasted on the left.

By default, it pastes on the left. Option "right" pastes on the right and "both" pastes on both sides. Option "front" only

pastes on the first element while option "back" only pastes on the last element. Option "delete"

first replaces all elements with the empty string.

Example: \code{string_magic("6 = {'|'paste.both, ' + 'c ? -3:-1}")} leads to "6 = |-3| + |-2| + |-1|".

The argument can be of the form \code{s} or \code{s1|s2}. If of the second form, this is equivalent

to chaining two \code{paste} operations, once on the left and once on the right: \verb{'s1'paste, 's2'paste.right}.

\item join: joins lines ending with a double backslash. Ex: \code{x = "the sun \\\\\\n is shining"};

\code{string_magic("{join ? x}")} leads to "the sun is shining".

\item escape: adds backslashes in front of specific characters. Options \code{"nl"}, \code{"tab"}.

Option \code{"nl"} escapes the newlines (\verb{\\n}), leading them to be displayed as \code{"\\\\\\\\n"}.

Option \code{"tab"} does the same for tabs (\code{"\\t"}). This is useful to make the value free

of space formatters.

The default behavior is to escape both newlines and tabs.

}

\section{Other operations}{

\itemize{

\item num: converts to numeric. Options: "warn", "soft", "rm", "clear". By default, the conversion

is performed silently and elements that failed to convert are turned into NA.

Option "warns" displays a warning if the conversion to numeric fails.

Option "soft" does not convert if the conversion of at least one element fails.

Option "rm" converts and removes the elements that could not be converted.

Option "clear" turns failed conversions into the empty string, and hence lead to a character vector.

Example: \verb{x = c(5, "six"); string_magic("Compare \{num, C, q ? x\} with \{num.rm, C, q ? x\}.")} leads to

"Compare '5 and NA' with '5'.", and \code{string_magic("Compare {num.soft, C, q ? x} with {clear, C, q ? x}.")}

leads to "Compare '5 and six' with '5 and '.".

\item enum: enumerates the elements. It creates a single string containing the comma

separated list of elements.

If there are more than 7 elements, only the first 6 are shown and the number of

items left is written.

For example \code{string_magic("enum ? 1:5")} leads to "1, 2, 3, 4, and 5".

You can add the following options by appending the letter to enum after a dot:

\itemize{

\item q, Q, or bq: to quote the elements

\item or, nor: to finish with an 'or' (or 'nor') instead of an 'and'

\item comma: to finish the enumeration with ", " instead of ", and".

\item i, I, a, A, 1: to enumerate with this prefix, like in: i) one, and ii) two

\item a number: to tell the number of items to display

Ex.1: \verb{x = c("Marv", "Nancy"); string_magic("The main characters are \{enum ? x\}.")} leads to

"The main characters are Marv and Nancy.".

Ex.2: \verb{x = c("orange", "milk", "rice"); string_magic("Shopping list: \{enum.i.q ? x\}.")} leads to

"Shopping list: i) 'orange', ii) 'milk', and iii) 'rice'."

}

\item len: gives the length of the vector. Options "letter", "upper", "num".

Option "letter" writes the length in words (up to 100). Option "upper" is the same

as letter but uppercases the first letter.

By default, commas are added to separate thousands. Use uption "num" to preserve

a regular numeric format.

Example: \code{string_magic("Size = {len ? 1:5000}")} leads to "Size = 5,000".

\item swidth: stands for screen width. Formats the string to fit a given width

by cutting at word boundaries and adding newlines appropriately.

Accepts arguments of the form \code{'n'} or \code{'n|s'}, with \code{n} a number and \code{s} a string.

An argument of the form \code{'n|s'} will add \code{s} at the beginning of each line. Further,

by default a trailing white space is added to \code{s}; to remove this

behavior, add an underscore at the end of it.

The argument \code{n} is either an integer giving the target character

width (minimum is 15), or it can be a fraction expressing the

target size as a fraction of the current screen. Finally it can be an expression that

uses the variable \code{.sw} which will capture the value of the current screen width.

Ex.1: \code{string_magic("{15 swidth ! this is a long sentence}")} leads to "this is a long\\nsentence".

Ex.2: \code{string_magic("{15 swidth.#> ! this is a long sentence}")} leads to "#> this is a long\\n#> sentence".

\item difftime: displays a formatted time difference. Option "silent" does not report a warning if the

operation fails. It accepts either objects of class \code{POSIXt} or \code{difftime}.

Example: \verb{x = Sys.time() ; Sys.sleep(0.5) ; string_magic("Time: \{difftime ? x\}")} leads to something

like "Time: 514ms".

}

\section{Group-wise operations}{

In \code{string_magic}, the splitting operation \code{s} (or \code{S}) keeps a memory of the strings

that were split. Use the tilde operator, of the form \verb{~(op1, op2)}, to apply operations

group-wise, to each of the split strings.

Better with an example. \code{x = c("Oreste, Hermione", "Hermione, Pyrrhus", "Pyrrhus, Andromaque") ;}

\code{string_magic("Troubles ahead: {S, ~(' loves 'c), C ? x}.")} leads to

"Troubles ahead: Oreste loves Hermione, Hermione loves Pyrrhus and Pyrrhus loves Andromaque.".

Almost all operations can be applied group-wise (although only operations changing the order or

the length of the strings really matter).

}

\section{Conditional operations}{

There are two operators to apply operations conditionally: \code{if} and \code{vif}, the latter

standing for \emph{verbatim if}.

The syntax of \code{if} is \verb{if(cond ; ops_true ; ops_false)} with \code{cond} a

condition (i.e. logical operation) on the value being interpolated, \code{ops_true} a comma-separated

sequence of operations if the condition is \code{TRUE} and \code{ops_false} an \emph{optional} a sequence of

operations if the condition is \code{FALSE}.

Ex.1: Let's take a sentence, delete words of less than 4 characters, and trim

words of 7+ characters.

x = "Songe Cephise a cette nuit cruelle qui fut pour tout un peuple une nuit eternelle"

\code{string_magic("{' 's, if(.nchar<=4 ; nuke ; '7|..'k), c ? x}")}.

Let's break it down. First the sentence is split w.r.t. spaces, leading to a vector

of words. Then we use the special variable \code{.nchar} in \code{if}'s condition to refer

to the number of characters of the current vector (the words). The words with

less than 4 characters are nuked (i.e. removed), and the other words are

trimmed at 7 characters. Finally the modified vector of words is collapsed with

the function \code{c}, leading to the result.

The condition \code{cond} accepts the following special values: \code{.} (the dot), \code{.nchar}, \code{.C}, \code{.len}, \code{.N}.

The dot, \code{.}, refers to the current vector. \code{.nchar} represent the number of characters

of the current vector (equivalent to \code{nchar(.)}). \code{.C} is an alias to \code{.nchar}.

\code{.len} represent the length of the current vector (equivalent to \code{length(.)}).

\code{.N} is an alias to \code{.len}.

If a condition leads to a result of length 1, then the operations are applied to

the full string vector and not element-wise (as was the case in Ex.1). Contrary to element-wise conditions

for which operations modifying the length of the vectors are forbidden (apart from nuking),

such operations are fine in full-string conditions.

Ex.2: \code{x = string_magic("x{1:10}")}; \code{string_magic("y = {if(.N>4 ; 3 first, '...'insert.right), ' + 'c ? x}")}

leads to "y = x1 + x2 + x3 + ...". the same opration applied to \code{x = string_magic("x{1:4}")}

leads to "y = x1 + x2 + x3 + x4".

For \code{vif}, the syntax is \verb{vif(cond ; verb_true ; verb_false)} with \code{verb_true}

a verbatim value with which the vector will be replaced if the condition is \code{TRUE}.

This is similar for \code{verb_false}. The condition works as in \code{if}.

Ex.3: \verb{x = c(1, 25, 12, 6) ; string_magic("Values: \{vif(.<10 ; <10), C ? x\}")} leads to

"Values: <10, 25, 12 and <10". As we can see values lower than 10 are replaced

with "<10" while other values are not modified.

Ex.4: \code{x = string_magic("x{1:10}")}; \code{string_magic("y = {vif(.N>4 ; {S!{x[1]}, ..., {last?x}}), ' + 'c ? x}")}

leads to "y = x1 + ... + x10".

Let's break it down. If the length of the vector is greater than 4 (here it's 10), then

the full string is replaced with \code{"{S!{x[1]}, ..., {last?x}}"}. Interpolation applies to

such string. Hence the split operation \code{S} breaks the string w.r.t.

the commas (default behavior), leading to the vector \code{c("{x[1]}", "...", "{last?x}")}. Since the

string contains curly brackets, interpolation is applied again. This leads to

the vector \code{c("x1", "...", "x10")}. Finally, this vector is collapsed with ' + ' leading

to the final string.

Note that there are many ways to get to the same result. Here is another example:

\code{string_magic("y = {vif(.N>4 ; {x[1]} + ... + {last?x} ; {' + 'c ? x}) ? x}")}.

The \code{vif} condition allows the use of '.' to refer to the current value in

\code{verb_true} and \code{verb_false}, as illustrated by the last example:

Ex.5: \code{string_magic("{4 last, vif(. \%\% 2 ; x{.} ; y{rev?.}), C ? 1:11}")}

leads to "y10, x9, y8 and x11".

}

\section{Special interpolation}{

if-else:

Using an ampersand ("&") as the first character of an interpolation leads to an \emph{if-else} operation.

Using two ampersands ("&&") leads to a slightly different operation described at the end of this section.

The syntax is as follows: \verb{\{&cond ; verb_true ; verb_false\}} with \code{cond} a

condition (i.e. logical operation) on the value being interpolated, \code{verb_true}

a verbatim value with which the vector will be replaced if the condition is \code{TRUE} and

\code{verb_false} an \emph{optional} verbatim value with which the vector will be replaced if the condition is \code{FALSE}.

If not provided, \code{verb_false} is considered to be the empty string unless the operator is

the double ampersand described at the end of this section.

Note that in \code{cond}, you can use the function \code{len}, an alias to \code{length}.

Ex.1: \code{x = 1:5}; \code{string_magic("x is {&len(x)<10 ; short ; {`log10(.N)-1`times, ''c ! very }long}")}

leads to "x is short". With \code{x = 1:50}, it leads to "x is long", and to "x is very very long"

if \code{x = 1:5000}.

If a condition leads to a result of length 1, the full string is replaced by the verbatim

expression. Further, this expression will be interpolated if requested. This was the case

in Ex.1 where \code{verb_false} was interpolated.

If the condition's length is greater than 1, then each logical values equal to \code{TRUE} is replaced

by \code{verb_true}, and \code{FALSE} or \code{NA} values are replaced with \code{verb_false}. Note,

importantly, that \strong{no interpolation is perfomed in that case}.

Ex.2: \verb{x = 1:3 ; string_magic("x is \{&x == 2 ; two ; not two\}")} leads to the vector

\code{c("x is not two", "x is two", "x is not two")}.

In that example, when x is odd, it is replaced with "odd", and when even it is

replaced with the elements of y.

Using the two ampersand operator ("&&") is like the simple ampersand version but the

default for \code{verb_false} is the variable used in the condition itself. So the syntax is

\verb{\{&&cond ; verb_true\}} and \emph{it does not accept} \code{verb_false}.

Ex.3: \verb{i = 3 ; string_magic("i = \{&&i == 3 ; three\}")} leads to "i = three", and to "i = 5" if \code{i = 5}.

Pluralization:

There is advanced support for pluralization which greatly facilitates the writing of messages

in natural language.

There are two ways to pluralize: over length or over value. To trigger a "pluralization" interpolation

use as first character:

\itemize{

\item \code{$} to pluralize over the length of a variable (see Ex.2)

\item \verb{#} to pluralize over the value of a variable (see Ex.1)

}

Ex.1: \verb{x = 5; string_magic("I bought \{N?x\} book\{#s\}.")} leads to "I bought five books.".

If \code{x = 1}, this leads to "I bought one book.".

The syntax is \verb{\{#plural_ops ? variable\}} or \verb{\{#plural_ops\}} where \code{plural_ops} are

specific pluralization operations which will be described below.

The pluralization is perfomed \emph{always} with respect to the value of a variable.

You can either add the variable explicitly (\verb{\{#plural_ops ? variable\}}) or refer

to it implicitly (\verb{\{#plural_ops\}}). If implicit, then the algorithm will look at the

previous variable that was interpolated and pluralize over it. This is exaclty what happens in

Ex.1 where \code{x} was interpolated in \code{{N?x}} and plural operation \code{s} in \verb{\{#s\}} then applied to

\code{x}. It was equivalent to have \verb{\{#s ? x\}}. If a variable wasn't interpolated before, then

the next interpolated variable will be used (see Ex.2). If no variable is interpolated

at all, an error is thrown.

Ex.2: \verb{x = c("J.", "M."); string_magic("My BFF\{$s, are\} \{C?x\}!")} leads to "My BFFs are J. and M.!".

If "x = "S.", this leads to "My BFF is S.!".

Pluralizing accepts the following operations:

\itemize{

\item s, es: adds an "s" (or "es") if it is plural (> 1), nothing otherwise. Accepts the option \code{0} or \code{zero} which

treats a 0-length or a 0-value as plural.

\item y or ies: adds an 'y' if singular and 'ies' if plural (>1). Accepts the option \code{0} or \code{zero} which

treats a 0-length or a 0-value as plural.

\item enum: enumerates the elements (see help for the regular operation \code{enum})

\item n, N, len, Len: add the number of elements ("len") or the value ("n") of the variable as a formatted number or

in letters (upper case versions). Accepts the options \code{letter} (to write in letter)

and \code{upper} (to uppercase the first letter).

\item nth, ntimes: writes the value of the variable as an order (nth) or a frequence (ntimes). Accepts the option \code{letter}

to write the numbers in letters (uppercase version of the operator does the same).

\item is, or any verb: conjugates the verb appropriately

}

You can chain operations, in that case a whitespace is automatically added between them.

Ex.3: \verb{x = c(7, 3, 18); string_magic("The winning number\{$s, is, enum ? sort(x)\}.")}

leads to "The winning numbers are 3, 7 and 18.". With \code{x = 7} this leads to

"The winning number is 7.".

On top of the previous operations, there is a special operation allowing to add verbatim text depending on

the situation. The syntax is as follows:

\itemize{

\item \verb{(s1;s2)}: adds verbatim 's1' if singular and 's2' if plural (>1)

\item \verb{(s1;s2;s3)}: adds verbatim 's1' if zero, 's2' if singular (=1) and 's3' if plural

\item \verb{(s1;;s3)}: adds verbatim 's1' if zero, 's3' if singular or plural (i.e. >=1)

}

These case-dependent verbatim values \strong{are interpolated} (if appropriate). In these interpolations

you need not refer explicitly to the variable for pluralization interpolations.

Ex.4: \verb{x = 3; string_magic("\{#(Sorry, nothing found.;;\{#N.upper\} match\{#es, were\} found.)?x\}")} leads to

"Three matches were found.". If "x = 1", this leads to "One match was found." and if "x = 0" this leads

to "Sorry, nothing found.".

}

\section{Escaping and special cases}{

The opening and closing brakets, \code{{}}, are special characters and cannot be used as regular text.

To bypass their special meaning, you need to escape them with a double backslash.

Ex.1: \code{string_magic("open = \\\\\\\\{, close = }")} leads to \code{"open = {, close = }"}.

Ex.2: \code{string_magic("many {5 times.c ! \\\\\\\\}}")} leads to \verb{many \}\}\}\}\}}.

You only need to escape the special delimiters which the algorithm is currently looking for.

As you can see, you don't need to escape the closing bracket in Ex.1 since no box

was open. On the other hand, you need to escape it in Ex.2.

Alternatively, use the argument \code{.delim} to set custom delimiters.

Ex.3: \code{string_magic("I {'can {write} {{what}} I want'}")} leads to \code{"I can {write} {{what}} I want"}.

Since \code{{expr}} evaluates \code{expr}, the stuff inside the \emph{box}, you can pass a

character string and it will stay untouched.

In the few operations expecting a semi-colon (if-else and pluralization), it can also be

escaped with a double backslash.

In interpolations, the exclamation mark (\code{!}) signals a verbatim expression. But what

if you use it to mean the logical operation \emph{not} in an operation-free interpolation?

In that case, you need a hack: use a question mark (\verb{?}) first to indicate to the

algorithm that you want to evaluate the expression.

Ex.4: \code{string_magic("{!TRUE} is {?!TRUE}")} leads to "TRUE is FALSE". The first expression is

taken verbatim while the second is evaluated.

}

\section{Generic regular expression flags}{

All \code{stringmagic} functions support generic flags in regular-expression patterns.

The flags are useful to quickly give extra instructions, similarly to \emph{usual}

\href{https://javascript.info/regexp-introduction}{regular expression flags}.

Here the syntax is "flag1, flag2/pattern". That is: flags are a comma separated list of flag-names

separated from the pattern with a slash (\code{/}). Example: \code{string_which(c("hello...", "world"), "fixed/.")} returns \code{1}.

Here the flag "fixed" removes the regular expression meaning of "." which would have otherwise meant \emph{"any character"}.

The no-flag verion \code{string_which(c("hello...", "world"), ".")} returns \code{1:2}.

Alternatively, and this is recommended, you can collate the initials of the flags instead of using a

comma separated list. For example: "if/dt[" will apply the flags "ignore" and "fixed" to the pattern "dt[".

The four flags always available are: "ignore", "fixed", "word" and "magic".

\itemize{

\item "ignore" instructs to ignore the case. Technically, it adds the perl-flag "(?i)"

at the beginning of the pattern.

\item "fixed" removes the regular expression interpretation, so that the characters ".", "$", "^", "["

(among others) lose their special meaning and are treated for what they are: simple characters.

\item "word" adds word boundaries (\code{"\\\\b"} in regex language) to the pattern. Further, the comma (\code{","})

becomes a word separator. Technically, "word/one, two" is treated as "\\b(one|two)\\b". Example:

\code{string_clean("Am I ambushed?", "wi/am")} leads to " I ambushed?" thanks to the flags "ignore" and "word".

\item "magic" allows to interpolate variables inside the pattern before regex interpretation.

For example if \code{letters = "aiou"} then \code{string_clean("My great goose!", "magic/[{letters}] => e")}

leads to \code{"My greet geese!"}

}

\examples{

# BASIC USAGE ####

x = c("Romeo", "Juliet")

# {x} inserts x

string_magic("Hello {x}!")

# elements in ... are collapsed with "" (default)

string_magic("Hello {x[1]}, ",

"how is {x[2]} doing?")

# Splitting a comma separated string

# The mechanism is explained later

string_vec("J. Mills, David, Agnes, Dr Strong")

# Nota: this is equivalent to (explained later)

string_magic("{', *'S ! J. Mills, David, Agnes, Dr Strong}")

# Applying low level operations to strings

# Two main syntax:

# A) expression evaluation

# {operation ? x}

# | |

# | \-> the expression to be evaluated

# \-> ? means that the expression will be evaluated

# B) verbatim

# {operation ! x}

# | |

# | \-> the expression taken as verbatim (here 'x')

# \-> ! means that the expression is taken as verbatim

# operation: usually 'arg'op with op an operation code.

# Example: splitting

x = "hello dear"

string_magic("{' 's ? x}")

# x is split by ' '

string_magic("{' 's ! hello dear}")

# 'hello dear' is split by ' '

# had we used ?, there would have been an error

# There are 50+ string operators

# Operators usually have a default value

# Operations can have options

# Operations can be chained by separating them with a comma

# Example: default of 's' is ' ' + chaining with collapse

string_magic("{s, ' my 'c ! hello dear}")

# Nesting

# {operations ! s1{expr}s2}

# | |

# | \-> expr will be interpolated then added to the string

# \-> nesting requires verbatim evaluation: '!'

string_magic("The variables are: {C ! x{1:4}}.")

# This one is ugly but it shows triple nesting

string_magic("The variables are: {ws, C ! {2 times ! x{1:4}}{','s, 4 each ! ,_sq}}.")

# Splitting

# s: split with fixed pattern, default is ' '

string_magic("{s ! a b c}")

string_magic("{' b 's !a b c}")

# S: same as 's' but default is ',[ \t\n]*'

string_magic("{S !a, b, c}")

string_magic("{'[[:punct:] ]+'S ! a! b; c}")

# add regex flags: e.g. fixed search

string_magic("{'f/.'s ! hi.there}")

# Collapsing

# c and C do the same, their default is different

# syntax: 's1|s2' with

# - s1 the string used for collapsing

# - s2 (optional) the string used for the last collapse

# c: default is ' '

string_magic("{c ? 1:3}")

# C: default is ', | and '

string_magic("{C ? 1:3}")

string_magic("{', | or 'c ? 1:4}")

# Extraction

# extract: to extract patterns (option first)

# x: alias to extract.first

# X: alias to extract

# syntax: 'pattern'x

# Default is '[[:alnum:]]+'

x = "This years is... 2020"

string_magic("{x ? x}") # similar to string_magic("{extract.first ? x}")

string_magic("{X ? x}") # similar to string_magic("{extract ? x}")

string_magic("{'\\\\d+'x ? x}")

# STRING FORMATTING ####

# upper, lower, title

# upper case the first letter

string_magic("{upper.first ! julia mills}")

# title case

string_magic("{title ! julia mills}")

# upper all letters

string_magic("{upper ! julia mills}")

# lower case

string_magic("{lower ! JULIA MILLS}")

# q, Q, bq: single, double, back quote

string_magic("{S, q, C ! Julia, David, Wilkins}")

View remainder of file in raw view

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

string_magic.Rd

Latest commit

History

string_magic.Rd

File metadata and controls