| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
|
|
| |
| |
| |
| |
|
|
| #include <config.h> |
|
|
| #include <stdio.h> |
| #include <getopt.h> |
| #include <sys/types.h> |
| #include "system.h" |
|
|
| #include "assure.h" |
| #include "fadvise.h" |
| #include "getndelim2.h" |
|
|
| #include "set-fields.h" |
|
|
| |
| #define PROGRAM_NAME "cut" |
|
|
| #define AUTHORS \ |
| proper_name ("David M. Ihnat"), \ |
| proper_name ("David MacKenzie"), \ |
| proper_name ("Jim Meyering") |
|
|
| #define FATAL_ERROR(Message) \ |
| do \ |
| { \ |
| error (0, 0, (Message)); \ |
| usage (EXIT_FAILURE); \ |
| } \ |
| while (0) |
|
|
|
|
| |
| |
| |
| |
| static struct field_range_pair *current_rp; |
|
|
| |
| |
| |
| |
| |
| |
| |
| static char *field_1_buffer; |
|
|
| |
| static size_t field_1_bufsize; |
|
|
| |
| |
| |
| static bool suppress_non_delimited; |
|
|
| |
| |
| static bool complement; |
|
|
| |
| static unsigned char delim; |
|
|
| |
| static unsigned char line_delim = '\n'; |
|
|
| |
| static size_t output_delimiter_length; |
|
|
| |
| |
| static char *output_delimiter_string; |
|
|
| |
| static char output_delimiter_default[1]; |
|
|
| |
| static bool have_read_stdin; |
|
|
| |
| |
| enum |
| { |
| OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1, |
| COMPLEMENT_OPTION |
| }; |
|
|
| static struct option const longopts[] = |
| { |
| {"bytes", required_argument, nullptr, 'b'}, |
| {"characters", required_argument, nullptr, 'c'}, |
| {"fields", required_argument, nullptr, 'f'}, |
| {"delimiter", required_argument, nullptr, 'd'}, |
| {"only-delimited", no_argument, nullptr, 's'}, |
| {"output-delimiter", required_argument, nullptr, OUTPUT_DELIMITER_OPTION}, |
| {"complement", no_argument, nullptr, COMPLEMENT_OPTION}, |
| {"zero-terminated", no_argument, nullptr, 'z'}, |
| {GETOPT_HELP_OPTION_DECL}, |
| {GETOPT_VERSION_OPTION_DECL}, |
| {nullptr, 0, nullptr, 0} |
| }; |
|
|
| void |
| usage (int status) |
| { |
| if (status != EXIT_SUCCESS) |
| emit_try_help (); |
| else |
| { |
| printf (_("\ |
| Usage: %s OPTION... [FILE]...\n\ |
| "), |
| program_name); |
| fputs (_("\ |
| Print selected parts of lines from each FILE to standard output.\n\ |
| "), stdout); |
|
|
| emit_stdin_note (); |
| emit_mandatory_arg_note (); |
|
|
| fputs (_("\ |
| -b, --bytes=LIST select only these bytes\n\ |
| -c, --characters=LIST select only these characters\n\ |
| -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\ |
| "), stdout); |
| fputs (_("\ |
| -f, --fields=LIST select only these fields; also print any line\n\ |
| that contains no delimiter character, unless\n\ |
| the -s option is specified\n\ |
| -n (ignored)\n\ |
| "), stdout); |
| fputs (_("\ |
| --complement complement the set of selected bytes, characters\n\ |
| or fields\n\ |
| "), stdout); |
| fputs (_("\ |
| -s, --only-delimited do not print lines not containing delimiters\n\ |
| --output-delimiter=STRING use STRING as the output delimiter\n\ |
| the default is to use the input delimiter\n\ |
| "), stdout); |
| fputs (_("\ |
| -z, --zero-terminated line delimiter is NUL, not newline\n\ |
| "), stdout); |
| fputs (HELP_OPTION_DESCRIPTION, stdout); |
| fputs (VERSION_OPTION_DESCRIPTION, stdout); |
| fputs (_("\ |
| \n\ |
| Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\ |
| range, or many ranges separated by commas. Selected input is written\n\ |
| in the same order that it is read, and is written exactly once.\n\ |
| "), stdout); |
| fputs (_("\ |
| Each range is one of:\n\ |
| \n\ |
| N N'th byte, character or field, counted from 1\n\ |
| N- from N'th byte, character or field, to end of line\n\ |
| N-M from N'th to M'th (included) byte, character or field\n\ |
| -M from first to M'th (included) byte, character or field\n\ |
| "), stdout); |
| emit_ancillary_info (PROGRAM_NAME); |
| } |
| exit (status); |
| } |
|
|
|
|
| |
| |
|
|
| static inline void |
| next_item (uintmax_t *item_idx) |
| { |
| (*item_idx)++; |
| if ((*item_idx) > current_rp->hi) |
| current_rp++; |
| } |
|
|
| |
|
|
| static inline bool |
| print_kth (uintmax_t k) |
| { |
| return current_rp->lo <= k; |
| } |
|
|
| |
|
|
| static inline bool |
| is_range_start_index (uintmax_t k) |
| { |
| return k == current_rp->lo; |
| } |
|
|
| |
|
|
| static void |
| cut_bytes (FILE *stream) |
| { |
| uintmax_t byte_idx; |
| |
| |
| bool print_delimiter; |
|
|
| byte_idx = 0; |
| print_delimiter = false; |
| current_rp = frp; |
| while (true) |
| { |
| int c; |
|
|
| c = getc (stream); |
|
|
| if (c == line_delim) |
| { |
| if (putchar (c) < 0) |
| write_error (); |
| byte_idx = 0; |
| print_delimiter = false; |
| current_rp = frp; |
| } |
| else if (c == EOF) |
| { |
| if (byte_idx > 0) |
| { |
| if (putchar (line_delim) < 0) |
| write_error (); |
| } |
| break; |
| } |
| else |
| { |
| next_item (&byte_idx); |
| if (print_kth (byte_idx)) |
| { |
| if (output_delimiter_string != output_delimiter_default) |
| { |
| if (print_delimiter && is_range_start_index (byte_idx)) |
| { |
| if (fwrite (output_delimiter_string, sizeof (char), |
| output_delimiter_length, stdout) |
| != output_delimiter_length) |
| write_error (); |
| } |
| print_delimiter = true; |
| } |
|
|
| if (putchar (c) < 0) |
| write_error (); |
| } |
| } |
| } |
| } |
|
|
| |
|
|
| static void |
| cut_fields (FILE *stream) |
| { |
| int c; |
| uintmax_t field_idx = 1; |
| bool found_any_selected_field = false; |
| bool buffer_first_field; |
|
|
| current_rp = frp; |
|
|
| c = getc (stream); |
| if (c == EOF) |
| return; |
|
|
| ungetc (c, stream); |
| c = 0; |
|
|
| |
| |
| |
| |
| |
| |
| buffer_first_field = (suppress_non_delimited ^ !print_kth (1)); |
|
|
| while (true) |
| { |
| if (field_idx == 1 && buffer_first_field) |
| { |
| ssize_t len; |
| size_t n_bytes; |
|
|
| len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0, |
| GETNLINE_NO_LIMIT, delim, line_delim, stream); |
| if (len < 0) |
| { |
| free (field_1_buffer); |
| field_1_buffer = nullptr; |
| if (ferror (stream) || feof (stream)) |
| break; |
| xalloc_die (); |
| } |
|
|
| n_bytes = len; |
| affirm (n_bytes != 0); |
|
|
| c = 0; |
|
|
| |
| |
| |
| if (to_uchar (field_1_buffer[n_bytes - 1]) != delim) |
| { |
| if (suppress_non_delimited) |
| { |
| |
| } |
| else |
| { |
| if (fwrite (field_1_buffer, sizeof (char), n_bytes, stdout) |
| != n_bytes) |
| write_error (); |
| |
| if (field_1_buffer[n_bytes - 1] != line_delim) |
| { |
| if (putchar (line_delim) < 0) |
| write_error (); |
| } |
| c = line_delim; |
| } |
| continue; |
| } |
|
|
| if (print_kth (1)) |
| { |
| |
| if (fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout) |
| != n_bytes - 1) |
| write_error (); |
|
|
| |
| if (delim == line_delim) |
| { |
| int last_c = getc (stream); |
| if (last_c != EOF) |
| { |
| ungetc (last_c, stream); |
| found_any_selected_field = true; |
| } |
| } |
| else |
| { |
| found_any_selected_field = true; |
| } |
| } |
| next_item (&field_idx); |
| } |
|
|
| int prev_c = c; |
|
|
| if (print_kth (field_idx)) |
| { |
| if (found_any_selected_field) |
| { |
| if (fwrite (output_delimiter_string, sizeof (char), |
| output_delimiter_length, stdout) |
| != output_delimiter_length) |
| write_error (); |
| } |
| found_any_selected_field = true; |
|
|
| while ((c = getc (stream)) != delim && c != line_delim && c != EOF) |
| { |
| if (putchar (c) < 0) |
| write_error (); |
| prev_c = c; |
| } |
| } |
| else |
| { |
| while ((c = getc (stream)) != delim && c != line_delim && c != EOF) |
| prev_c = c; |
| } |
|
|
| |
| if (delim == line_delim && c == delim) |
| { |
| int last_c = getc (stream); |
| if (last_c != EOF) |
| ungetc (last_c, stream); |
| else |
| c = last_c; |
| } |
|
|
| if (c == delim) |
| next_item (&field_idx); |
| else if (c == line_delim || c == EOF) |
| { |
| if (found_any_selected_field |
| || !(suppress_non_delimited && field_idx == 1)) |
| { |
| |
| if (c == line_delim || prev_c != line_delim |
| || delim == line_delim) |
| { |
| if (putchar (line_delim) < 0) |
| write_error (); |
| } |
| } |
| if (c == EOF) |
| break; |
|
|
| |
| field_idx = 1; |
| current_rp = frp; |
| found_any_selected_field = false; |
| } |
| } |
| } |
|
|
| |
| |
|
|
| static bool |
| cut_file (char const *file, void (*cut_stream) (FILE *)) |
| { |
| FILE *stream; |
|
|
| if (streq (file, "-")) |
| { |
| have_read_stdin = true; |
| stream = stdin; |
| assume (stream); |
| } |
| else |
| { |
| stream = fopen (file, "r"); |
| if (stream == nullptr) |
| { |
| error (0, errno, "%s", quotef (file)); |
| return false; |
| } |
| } |
|
|
| fadvise (stream, FADVISE_SEQUENTIAL); |
|
|
| cut_stream (stream); |
|
|
| int err = errno; |
| if (!ferror (stream)) |
| err = 0; |
| if (streq (file, "-")) |
| clearerr (stream); |
| else if (fclose (stream) == EOF) |
| err = errno; |
| if (err) |
| { |
| error (0, err, "%s", quotef (file)); |
| return false; |
| } |
| return true; |
| } |
|
|
| int |
| main (int argc, char **argv) |
| { |
| int optc; |
| bool ok; |
| bool delim_specified = false; |
| bool byte_mode = false; |
| char *spec_list_string = nullptr; |
|
|
| initialize_main (&argc, &argv); |
| set_program_name (argv[0]); |
| setlocale (LC_ALL, ""); |
| bindtextdomain (PACKAGE, LOCALEDIR); |
| textdomain (PACKAGE); |
|
|
| atexit (close_stdout); |
|
|
| |
| suppress_non_delimited = false; |
|
|
| delim = '\0'; |
| have_read_stdin = false; |
|
|
| while ((optc = getopt_long (argc, argv, "b:c:d:f:nsz", longopts, nullptr)) |
| != -1) |
| { |
| switch (optc) |
| { |
| case 'b': |
| case 'c': |
| |
| byte_mode = true; |
| FALLTHROUGH; |
| case 'f': |
| |
| if (spec_list_string) |
| FATAL_ERROR (_("only one list may be specified")); |
| spec_list_string = optarg; |
| break; |
|
|
| case 'd': |
| |
| |
| if (optarg[0] != '\0' && optarg[1] != '\0') |
| FATAL_ERROR (_("the delimiter must be a single character")); |
| delim = optarg[0]; |
| delim_specified = true; |
| break; |
|
|
| case OUTPUT_DELIMITER_OPTION: |
| |
| |
| output_delimiter_length = (optarg[0] == '\0' |
| ? 1 : strlen (optarg)); |
| output_delimiter_string = optarg; |
| break; |
|
|
| case 'n': |
| break; |
|
|
| case 's': |
| suppress_non_delimited = true; |
| break; |
|
|
| case 'z': |
| line_delim = '\0'; |
| break; |
|
|
| case COMPLEMENT_OPTION: |
| complement = true; |
| break; |
|
|
| case_GETOPT_HELP_CHAR; |
| case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); |
| default: |
| usage (EXIT_FAILURE); |
| } |
| } |
|
|
| if (!spec_list_string) |
| FATAL_ERROR (_("you must specify a list of bytes, characters, or fields")); |
|
|
| if (byte_mode) |
| { |
| if (delim_specified) |
| FATAL_ERROR (_("an input delimiter may be specified only\ |
| when operating on fields")); |
|
|
| if (suppress_non_delimited) |
| FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\ |
| \tonly when operating on fields")); |
| } |
|
|
| set_fields (spec_list_string, |
| ((byte_mode ? SETFLD_ERRMSG_USE_POS : 0) |
| | (complement ? SETFLD_COMPLEMENT : 0))); |
|
|
| if (!delim_specified) |
| delim = '\t'; |
|
|
| if (output_delimiter_string == nullptr) |
| { |
| output_delimiter_default[0] = delim; |
| output_delimiter_string = output_delimiter_default; |
| output_delimiter_length = 1; |
| } |
|
|
| void (*cut_stream) (FILE *) = byte_mode ? cut_bytes : cut_fields; |
| if (optind == argc) |
| ok = cut_file ("-", cut_stream); |
| else |
| for (ok = true; optind < argc; optind++) |
| ok &= cut_file (argv[optind], cut_stream); |
|
|
|
|
| if (have_read_stdin && fclose (stdin) == EOF) |
| { |
| error (0, errno, "-"); |
| ok = false; |
| } |
|
|
| return ok ? EXIT_SUCCESS : EXIT_FAILURE; |
| } |
|
|