pax_global_header00006660000000000000000000000064151504544370014522gustar00rootroot0000000000000052 comment=0a48df61be5c5dfb83ed91a1791d9ffec4f934ef pg_csv-1.0.2/000077500000000000000000000000001515045443700130035ustar00rootroot00000000000000pg_csv-1.0.2/.clang-format000066400000000000000000000020021515045443700153500ustar00rootroot00000000000000AlignAfterOpenBracket: Align AlignConsecutiveAssignments: true AlignConsecutiveDeclarations: true AlignOperands: true AllowAllParametersOfDeclarationOnNextLine: false AllowShortFunctionsOnASingleLine: Empty AllowShortLoopsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false BinPackArguments: true BinPackParameters: true ColumnLimit: 100 IndentPPDirectives: AfterHash MaxEmptyLinesToKeep: 1 PointerAlignment: Right SpaceBeforeAssignmentOperators: true SpaceBeforeParens: ControlStatements SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 BracedInitializerIndentWidth: 2 # includes IncludeBlocks: Preserve SortIncludes: true # indentation IndentWidth: 2 TabWidth: 2 UseTab: Never # if AllowShortIfStatementsOnASingleLine: true # case IndentCaseLabels: false AllowShortCaseLabelsOnASingleLine: true AlignConsecutiveShortCaseStatements: Enabled: true AcrossEmptyLines: true AcrossComments: true AlignCaseColons: true pg_csv-1.0.2/.github/000077500000000000000000000000001515045443700143435ustar00rootroot00000000000000pg_csv-1.0.2/.github/workflows/000077500000000000000000000000001515045443700164005ustar00rootroot00000000000000pg_csv-1.0.2/.github/workflows/ci.yaml000066400000000000000000000050711515045443700176620ustar00rootroot00000000000000name: CI on: [push, pull_request] jobs: test: runs-on: ubuntu-latest strategy: matrix: pg-version: ['12', '13', '14', '15', '16', '17'] steps: - uses: actions/checkout@v4 - name: Install Nix uses: cachix/install-nix-action@v30 with: nix_path: nixpkgs=channel:nixos-unstable - name: Use Cachix Cache uses: cachix/cachix-action@v10 with: name: nxpg authToken: ${{ secrets.CACHIX_AUTH_TOKEN }} - name: Build run: nix-shell --run "xpg -v ${{ matrix.pg-version }} build" - name: Run tests run: nix-shell --run "xpg -v ${{ matrix.pg-version }} test" - if: ${{ failure() }} run: | cat regression.out cat regression.diffs loadtest: strategy: matrix: kind: ['postgrest', 'csv_agg', 'csv_agg_options'] name: Loadtest runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v4 - name: Install Nix uses: cachix/install-nix-action@v30 with: nix_path: nixpkgs=channel:nixos-unstable - name: Use Cachix Cache uses: cachix/cachix-action@v10 with: name: nxpg authToken: ${{ secrets.CACHIX_AUTH_TOKEN }} - name: Run loadtest run: nix-shell --run "pg_csv-loadtest ${{ matrix.kind }}" >> "$GITHUB_STEP_SUMMARY" coverage: runs-on: ubuntu-latest strategy: matrix: pg-version: ['17'] steps: - uses: actions/checkout@v4 - name: Install Nix uses: cachix/install-nix-action@v30 with: nix_path: nixpkgs=channel:nixos-unstable - name: Use Cachix Cache uses: cachix/cachix-action@v10 with: name: nxpg authToken: ${{ secrets.CACHIX_AUTH_TOKEN }} - name: Run coverage run: nix-shell --run "xpg -v ${{ matrix.pg-version }} coverage" - name: Send coverage to Coveralls uses: coverallsapp/github-action@v2.3.6 with: github-token: ${{ secrets.GITHUB_TOKEN }} files: ./build-${{ matrix.pg-version }}/coverage.info style: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Install Nix uses: cachix/install-nix-action@v30 with: nix_path: nixpkgs=channel:nixos-unstable - name: Use Cachix Cache uses: cachix/cachix-action@v10 with: name: nxpg authToken: ${{ secrets.CACHIX_AUTH_TOKEN }} - name: Run style check run: nix-shell --run "pg_csv-style-check" pg_csv-1.0.2/.gitignore000066400000000000000000000001621515045443700147720ustar00rootroot00000000000000*.control regression.* results/ *.so *.o *.bc *.diffs pgbench_log.* .history pg_csv--*.sql !pg_csv--*--*.sql tags pg_csv-1.0.2/LICENSE000066400000000000000000000020401515045443700140040ustar00rootroot00000000000000Copyright (c) 2025 Steve Chavez Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. pg_csv-1.0.2/Makefile000066400000000000000000000040121515045443700144400ustar00rootroot00000000000000SRC_DIR = src # the `-Wno`s quiet C90 warnings PG_CFLAGS = -std=c11 -Wextra -Wall -Werror \ -Wno-declaration-after-statement \ -Wno-vla \ -Wno-long-long ifeq ($(COVERAGE), 1) PG_CFLAGS += --coverage endif ifeq ($(CC),gcc) GCC_MAJ := $(firstword $(subst ., ,$(shell $(CC) -dumpfullversion -dumpversion))) GCC_GE14 = $(shell test $(GCC_MAJ) -ge 14; echo $$?) ifeq ($(GCC_GE14),0) PG_CFLAGS += -Wmissing-variable-declarations endif endif UNAME_S := $(shell uname -s) ifeq ($(UNAME_S),Darwin) SHARED_EXT := dylib else SHARED_EXT := so endif EXTENSION = pg_csv EXTVERSION = 1.0.1 DATA = $(wildcard sql/*--*.sql) EXTRA_CLEAN = sql/$(EXTENSION)--$(EXTVERSION).sql $(EXTENSION).control TESTS = $(wildcard test/sql/*.sql) REGRESS = $(patsubst test/sql/%.sql,%,$(TESTS)) REGRESS_OPTS = --inputdir=test MODULE_big = $(EXTENSION) SRC = $(wildcard $(SRC_DIR)/*.c) ifdef BUILD_DIR OBJS = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o, $(SRC)) else OBJS = $(patsubst $(SRC_DIR)/%.c, src/%.o, $(SRC)) # if no BUILD_DIR, just build on src so standard PGXS `make` works endif PG_CONFIG = pg_config PG_CPPFLAGS := $(CPPFLAGS) -DEXTVERSION=\"$(EXTVERSION)\" all: sql/$(EXTENSION)--$(EXTVERSION).sql $(EXTENSION).control build: $(BUILD_DIR)/$(EXTENSION).$(SHARED_EXT) sql/$(EXTENSION)--$(EXTVERSION).sql $(EXTENSION).control $(BUILD_DIR)/.gitignore: sql/$(EXTENSION)--$(EXTVERSION).sql $(EXTENSION).control mkdir -p $(BUILD_DIR)/extension cp $(EXTENSION).control $(BUILD_DIR)/extension cp sql/$(EXTENSION)--$(EXTVERSION).sql $(BUILD_DIR)/extension echo "*" > $(BUILD_DIR)/.gitignore $(BUILD_DIR)/%.o: $(SRC_DIR)/%.c $(BUILD_DIR)/.gitignore $(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@ $(BUILD_DIR)/$(EXTENSION).$(SHARED_EXT): $(EXTENSION).$(SHARED_EXT) mv $? $@ sql/$(EXTENSION)--$(EXTVERSION).sql: sql/$(EXTENSION).sql cp $< $@ $(EXTENSION).control: $(EXTENSION).control.in sed "s/@EXTVERSION@/$(EXTVERSION)/g" $(EXTENSION).control.in > $@ PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) .PHONY: test test: make installcheck pg_csv-1.0.2/README.md000066400000000000000000000110601515045443700142600ustar00rootroot00000000000000# pg_csv ![PostgreSQL version](https://img.shields.io/badge/postgresql-12+-blue.svg) [![Coverage Status](https://coveralls.io/repos/github/PostgREST/pg_csv/badge.svg)](https://coveralls.io/github/PostgREST/pg_csv) [![Tests](https://github.com/PostgREST/pg_csv/actions/workflows/ci.yaml/badge.svg)](https://github.com/PostgREST/pg_csv/actions) Postgres has CSV support on the [COPY](https://www.postgresql.org/docs/current/sql-copy.html) command, but `COPY` has problems: - It uses a special protocol, so it doesn't work with other standard features like [prepared statements](https://www.postgresql.org/docs/current/sql-prepare.html), [pipeline mode](https://www.postgresql.org/docs/current/libpq-pipeline-mode.html#LIBPQ-PIPELINE-USING) or [pgbench](https://www.postgresql.org/docs/current/pgbench.html). - Is not composable. You can't use COPY inside CTEs, subqueries, view definitions or as function arguments. `pg_csv` offers flexible CSV processing as a solution. - Includes a CSV aggregate that composes with SQL expressions. - Native C extension, x2 times faster than SQL queries that try to output CSV (see our [CI results](https://github.com/PostgREST/pg_csv/actions/runs/17367727912)). - No dependencies except Postgres. ## Installation PostgreSQL >= 12 is supported. Clone this repo and run: ```bash make && make install ``` To install the extension: ```psql create extension pg_csv; ``` ## csv_agg Aggregate that builds a CSV respecting [RFC 4180](https://www.ietf.org/rfc/rfc4180.txt), quoting as required. ```sql create table projects as select * from ( values (1, 'Death Star OS', 1), (2, 'Windows 95 Rebooted', 1), (3, 'Project "Comma,Please"', 2), (4, 'Escape ""Plan""', 2), (NULL, 'NULL & Void', NULL) ) as _(id, name, client_id); ``` ```sql select csv_agg(x) from projects x; csv_agg -------------------------------- id,name,client_id + 1,Death Star OS,1 + 2,Windows 95 Rebooted,1 + 3,"Project ""Comma,Please""",2+ 4,"Escape """"Plan""""",2 + ,NULL & Void, (1 row) ``` ### Custom Delimiter Custom delimiters can be used to produce different formats like pipe-separated values, tab-separated values or semicolon-separated values. ```sql select csv_agg(x, csv_options(delimiter := '|')) from projects x; csv_agg ----------------------------- id|name|client_id + 1|Death Star OS|1 + 2|Windows 95 Rebooted|1 + 3|Open Source Lightsabers|2+ 4|Galactic Payroll System|2+ 7|Bugzilla Revival|3 (1 row) select csv_agg(x, csv_options(delimiter := E'\t')) from projects x; csv_agg ----------------------------------- id name client_id + 1 Death Star OS 1 + 2 Windows 95 Rebooted 1+ 3 Open Source Lightsabers 2+ 4 Galactic Payroll System 2+ 7 Bugzilla Revival 3 (1 row) ``` > [!NOTE] > - Newline, carriage return and double quotes are not supported as delimiters to maintain the integrity of the separated values format. > - The delimiter can only be a single char, if a longer string is specified only the first char will be used. > - Why use a `csv_options` constructor function instead of extra arguments? Aggregates don't support named arguments in postgres, see a discussion on https://github.com/PostgREST/pg_csv/pull/2#issuecomment-3155740589. ### BOM You can include a byte-order mark (BOM) to make the CSV compatible with Excel. ```sql select csv_agg(x, csv_options(bom := true)) from projects x; csv_agg ------------------- id,name,client_id+ 1,Death Star OS,1 2,Windows 95 Rebooted,1 3,Open Source Lightsabers,2 4,Galactic Payroll System,2 5,Bugzilla Revival,3 (1 row) ``` ### Header You can omit or include the CSV header. ```sql select csv_agg(x, csv_options(header := false)) from projects x; csv_agg ----------------------------- 1,Death Star OS,1 + 2,Windows 95 Rebooted,1 + 3,Open Source Lightsabers,2+ 4,Galactic Payroll System,2+ 7,Bugzilla Revival,3 (1 row) ``` ### Null string NULL values are represented by an empty string by default. This can be changed with the `nullstr` option. ```sql SELECT csv_agg(x, csv_options(nullstr:='')) AS body FROM projects x; body -------------------------------- id,name,client_id + 1,Death Star OS,1 + 2,Windows 95 Rebooted,1 + 3,"Project ""Comma,Please""",2+ 4,"Escape """"Plan""""",2 + ,NULL & Void, (1 row) ``` ## Limitations - For large bulk exports and imports, `COPY ... CSV` should still be preferred as its faster due to streaming support. pg_csv-1.0.2/bench/000077500000000000000000000000001515045443700140625ustar00rootroot00000000000000pg_csv-1.0.2/bench/csv_agg.sql000066400000000000000000000001521515045443700162120ustar00rootroot00000000000000\set lim random(1000, 2000) select csv_agg(t) from ( select * from orders_customers limit :lim ) as t; pg_csv-1.0.2/bench/csv_agg_options.sql000066400000000000000000000002641515045443700177710ustar00rootroot00000000000000\set lim random(1000, 2000) select csv_agg(t, csv_options(delimiter:='|', bom:=true, header:=false, nullstr:='')) from ( select * from orders_customers limit :lim ) as t; pg_csv-1.0.2/bench/init.sql000066400000000000000000000112561515045443700155530ustar00rootroot00000000000000-- based on the northwind database https://github.com/pthom/northwind_psql -- the idea is to use the aggregate over a relation with lots of columns to test the performance create extension if not exists pg_csv; CREATE TABLE customers ( customer_id CHAR(5) PRIMARY KEY, company_name TEXT NOT NULL, contact_name TEXT, contact_title TEXT, address TEXT, city TEXT, region TEXT, postal_code TEXT, country TEXT, phone TEXT, fax TEXT ); CREATE TABLE orders ( order_id BIGSERIAL PRIMARY KEY, customer_id CHAR(5) NOT NULL REFERENCES customers(customer_id) ON DELETE CASCADE, employee_id SMALLINT, order_date DATE, required_date DATE, shipped_date DATE, freight NUMERIC(10,2) DEFAULT 0 CHECK (freight >= 0), ship_name TEXT, ship_address TEXT, ship_city TEXT, ship_region TEXT, ship_postal_code TEXT, ship_country TEXT ); -- generate seed data -- three groups of 100 by city/country INSERT INTO customers ( customer_id, company_name, contact_name, contact_title, address, city, region, postal_code, country, phone, fax ) SELECT ('C' || lpad(i::text, 4, '0'))::char(5) AS customer_id, 'Company ' || i AS company_name, 'Contact ' || i AS contact_name, CASE WHEN i <= 100 THEN 'Owner' WHEN i <= 200 THEN 'Sales Manager' ELSE 'Purchasing' END AS contact_title, i::text || ' Main Street' AS address, CASE WHEN i <= 100 THEN 'Seattle' WHEN i <= 200 THEN 'London' ELSE 'Sao Paulo' END AS city, CASE WHEN i <= 100 THEN 'WA' WHEN i <= 200 THEN NULL ELSE 'SP' END AS region, (10000 + i)::text AS postal_code, CASE WHEN i <= 100 THEN 'USA' WHEN i <= 200 THEN 'UK' ELSE 'Brazil' END AS country, '+1-555-' || lpad(i::text, 4, '0') AS phone, CASE WHEN right(i::text, 1) IN ('0','5') THEN NULL ELSE '+1-555-' || lpad((i + 1000)::text, 4, '0') END AS fax FROM generate_series(1, 300) AS s(i); -- 2700 orders, 9 orders per customer WITH base AS ( SELECT c.customer_id, c.company_name, c.address, c.city, c.region, c.postal_code, c.country FROM customers c ) INSERT INTO orders ( customer_id, employee_id, order_date, required_date, shipped_date, freight, ship_name, ship_address, ship_city, ship_region, ship_postal_code, ship_country ) SELECT b.customer_id, n::smallint AS employee_id, (DATE '2024-01-01' + (n || ' day')::interval)::date AS order_date, (DATE '2024-01-01' + ((n + 7) || ' day')::interval)::date AS required_date, CASE WHEN n = 9 THEN NULL ELSE (DATE '2024-01-01' + ((n + 3) || ' day')::interval)::date END AS shipped_date, (10 + n)::numeric(10,2) AS freight, b.company_name AS ship_name, b.address AS ship_address, b.city AS ship_city, b.region AS ship_region, b.postal_code AS ship_postal_code, b.country AS ship_country FROM base b CROSS JOIN generate_series(1, 9) AS n; -- create a view to have more columns CREATE OR REPLACE VIEW orders_customers AS SELECT o.order_id, o.customer_id, c.company_name, c.contact_name, c.contact_title, c.address AS customer_address, c.city AS customer_city, c.region AS customer_region, c.postal_code AS customer_postal_code, c.country AS customer_country, c.phone, c.fax, o.employee_id, o.order_date, o.required_date, o.shipped_date, o.freight, o.ship_name, o.ship_address, o.ship_city, o.ship_region, o.ship_postal_code, o.ship_country FROM orders o JOIN customers c USING (customer_id); pg_csv-1.0.2/bench/postgrest.sql000066400000000000000000000007101515045443700166330ustar00rootroot00000000000000\set lim random(1000, 2000) with pgrst_source as ( select * from orders_customers limit :lim ) select (select coalesce(string_agg(a.k, ','), '') from (select json_object_keys(r)::text as k from (select row_to_json(hh) as r from pgrst_source as hh limit 1) _) a) || e'\n' || coalesce(string_agg(substring(_postgrest_t::text, 2, length(_postgrest_t::text) - 2), e'\n'), '') as body from ( select * from pgrst_source limit :lim) _postgrest_t; pg_csv-1.0.2/pg_csv.control.in000066400000000000000000000001271515045443700162730ustar00rootroot00000000000000default_version = '@EXTVERSION@' relocatable = true module_pathname = '$libdir/pg_csv' pg_csv-1.0.2/shell.nix000066400000000000000000000023651515045443700146400ustar00rootroot00000000000000with import (builtins.fetchTarball { name = "2025-06-16"; url = "https://github.com/NixOS/nixpkgs/archive/e6f23dc08d3624daab7094b701aa3954923c6bbb.tar.gz"; sha256 = "sha256:0m0xmk8sjb5gv2pq7s8w7qxf7qggqsd3rxzv3xrqkhfimy2x7bnx"; }) {}; mkShellNoCC { buildInputs = let xpg = import (fetchFromGitHub { owner = "steve-chavez"; repo = "xpg"; rev = "v1.5.2"; sha256 = "sha256-NwhOi/BAZX0JdtFhtV3wgjagNTO5Kmq2Oy3sa+GyDv8="; }); style = writeShellScriptBin "pg_csv-style" '' ${clang-tools}/bin/clang-format -i src/* ''; styleCheck = writeShellScriptBin "pg_csv-style-check" '' ${clang-tools}/bin/clang-format -i src/* ${git}/bin/git diff-index --exit-code HEAD -- '*.c' ''; loadtest = writeShellScriptBin "pg_csv-loadtest" '' set -euo pipefail file=./bench/$1.sql cat <delimiter = ','; csv_opts->bom = false; csv_opts->header = true; csv_opts->nullstr = NULL; if (opts_hdr == NULL) return; TupleDesc desc = lookup_rowtype_tupdesc(HeapTupleHeaderGetTypeId(opts_hdr), HeapTupleHeaderGetTypMod(opts_hdr)); Datum values[csv_options_count]; bool nulls[csv_options_count]; heap_deform_tuple( &(HeapTupleData){.t_len = HeapTupleHeaderGetDatumLength(opts_hdr), .t_data = opts_hdr}, desc, values, nulls); if (!nulls[0]) { csv_opts->delimiter = DatumGetChar(values[0]); if (is_reserved(csv_opts->delimiter)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("delimiter cannot be newline, carriage return or " "double quote"))); } if (!nulls[1]) { csv_opts->bom = DatumGetBool(values[1]); } if (!nulls[2]) { csv_opts->header = DatumGetBool(values[2]); } if (!nulls[3]) { csv_opts->nullstr = DatumGetTextPP(values[3]); } ReleaseTupleDesc(desc); } void csv_append_field(StringInfo buf, const char *s, size_t n, char delim) { if (!needs_quote(s, n, delim)) { appendBinaryStringInfo(buf, s, n); } else { appendStringInfoChar(buf, DQUOTE); for (size_t j = 0; j < n; j++) { char c = s[j]; if (c == DQUOTE) appendStringInfoChar(buf, DQUOTE); appendStringInfoChar(buf, c); } appendStringInfoChar(buf, DQUOTE); } } pg_csv-1.0.2/src/aggs.h000066400000000000000000000012551515045443700146670ustar00rootroot00000000000000#ifndef AGGS_H #define AGGS_H // mirrors the SQL csv_options type typedef struct { char delimiter; bool bom; bool header; text *nullstr; } CsvOptions; #define csv_options_count 4 typedef struct { StringInfoData accum_buf; bool header_done; bool first_row; TupleDesc tupdesc; int nullstr_len; CsvOptions *options; char *cached_nullstr; } CsvAggState; extern const char NEWLINE; extern const char BOM[3]; extern const char DQUOTE; extern const char CR; void parse_csv_options(HeapTupleHeader opts_hdr, CsvOptions *csv_opts); void csv_append_field(StringInfo buf, const char *s, size_t n, char delim); #endif pg_csv-1.0.2/src/pg_csv.c000066400000000000000000000106311515045443700152200ustar00rootroot00000000000000// This is the top module, all SQL exposed functions will be in this file #define PG_PRELUDE_IMPL #include "pg_prelude.h" #include "aggs.h" PG_MODULE_MAGIC; // aggregate final function PG_FUNCTION_INFO_V1(csv_agg_finalfn); Datum csv_agg_finalfn(PG_FUNCTION_ARGS) { if (PG_ARGISNULL(0)) PG_RETURN_NULL(); CsvAggState *state = (CsvAggState *)PG_GETARG_POINTER(0); if (state->tupdesc != NULL) ReleaseTupleDesc(state->tupdesc); PG_RETURN_TEXT_P(cstring_to_text_with_len(state->accum_buf.data, state->accum_buf.len)); } // aggregate transition function PG_FUNCTION_INFO_V1(csv_agg_transfn); Datum csv_agg_transfn(PG_FUNCTION_ARGS) { CsvAggState *state = !PG_ARGISNULL(0) ? (CsvAggState *)PG_GETARG_POINTER(0) : NULL; HeapTupleHeader next = !PG_ARGISNULL(1) ? PG_GETARG_HEAPTUPLEHEADER(1) : NULL; // first call when the accumulator is NULL // pretty standard stuff, for example see the jsonb_agg transition function // https://github.com/postgres/postgres/blob/3c4e26a62c31ebe296e3aedb13ac51a7a35103bd/src/backend/utils/adt/jsonb.c#L1521 if (state == NULL) { MemoryContext aggctx, oldctx; if (!AggCheckCallContext(fcinfo, &aggctx)) elog(ERROR, "%s called in non‑aggregate context", __func__); // here we extend the lifetime of the CsvAggState until the aggregate finishes oldctx = MemoryContextSwitchTo(aggctx); state = palloc(sizeof(CsvAggState)); initStringInfo(&state->accum_buf); state->header_done = false; state->first_row = true; state->tupdesc = NULL; state->nullstr_len = 0; state->cached_nullstr = NULL; state->options = palloc(sizeof(CsvOptions)); // we'll parse the csv options only once HeapTupleHeader opts_hdr = PG_NARGS() >= 3 && !PG_ARGISNULL(2) ? PG_GETARG_HEAPTUPLEHEADER(2) : NULL; parse_csv_options(opts_hdr, state->options); if (state->options->nullstr) { state->cached_nullstr = text_to_cstring(state->options->nullstr); state->nullstr_len = VARSIZE_ANY_EXHDR(state->options->nullstr); } MemoryContextSwitchTo(oldctx); } if (next == NULL) PG_RETURN_POINTER(state); // skip NULL rows // build header and cache tupdesc once if (!state->header_done) { TupleDesc tdesc = lookup_rowtype_tupdesc(HeapTupleHeaderGetTypeId(next), HeapTupleHeaderGetTypMod(next)); if (state->options->bom) appendBinaryStringInfo(&state->accum_buf, BOM, sizeof(BOM)); // build header row if (state->options->header) { for (int i = 0; i < tdesc->natts; i++) { Form_pg_attribute att = TupleDescAttr(tdesc, i); if (att->attisdropped) // pg always keeps dropped columns, guard against this continue; if (i > 0) // only append delimiter after the first value appendStringInfoChar(&state->accum_buf, state->options->delimiter); char *cstr = NameStr(att->attname); csv_append_field(&state->accum_buf, cstr, strlen(cstr), state->options->delimiter); } appendStringInfoChar(&state->accum_buf, NEWLINE); } state->tupdesc = tdesc; state->header_done = true; } // build body int tuple_natts = state->tupdesc->natts; Datum *datums = (Datum *)palloc(mul_size(tuple_natts, sizeof(Datum))); bool *nulls = (bool *)palloc(mul_size(tuple_natts, sizeof(bool))); // extract the values of the next row heap_deform_tuple( &(HeapTupleData){ .t_len = HeapTupleHeaderGetDatumLength(next), .t_data = next, }, state->tupdesc, datums, nulls); // newline before every data row except the first // we do this to avoid trimming the last newline once we're done with all rows if (!state->first_row) appendStringInfoChar(&state->accum_buf, NEWLINE); state->first_row = false; // create next row for (int i = 0; i < tuple_natts; i++) { Form_pg_attribute att = TupleDescAttr(state->tupdesc, i); if (att->attisdropped) // pg always keeps dropped columns, guard against this continue; if (i > 0) appendStringInfoChar(&state->accum_buf, state->options->delimiter); if (nulls[i]) { if (state->cached_nullstr) csv_append_field(&state->accum_buf, state->cached_nullstr, state->nullstr_len, state->options->delimiter); } else { char *cstr = datum_to_cstring(datums[i], att->atttypid); csv_append_field(&state->accum_buf, cstr, strlen(cstr), state->options->delimiter); } } PG_RETURN_POINTER(state); } pg_csv-1.0.2/src/pg_prelude.h000066400000000000000000000031561515045443700160760ustar00rootroot00000000000000#ifndef PG_PRELUDE_H #define PG_PRELUDE_H // pragmas needed to pass compiling with -Wextra #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #pragma GCC diagnostic ignored "-Wsign-compare" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #pragma GCC diagnostic pop char *datum_to_cstring(Datum datum, Oid typeoid); #endif /* PG_PRELUDE_H */ #ifdef PG_PRELUDE_IMPL char *datum_to_cstring(Datum datum, Oid typeoid) { Oid out_func; bool is_varlena; getTypeOutputInfo(typeoid, &out_func, &is_varlena); return OidOutputFunctionCall(out_func, datum); } #endif /* PG_PRELUDE_IMPL */ pg_csv-1.0.2/test/000077500000000000000000000000001515045443700137625ustar00rootroot00000000000000pg_csv-1.0.2/test/expected/000077500000000000000000000000001515045443700155635ustar00rootroot00000000000000pg_csv-1.0.2/test/expected/00_init.out000066400000000000000000000015661515045443700175660ustar00rootroot00000000000000CREATE TABLE projects ( id integer , name text , project_name text , client_id integer , subclient_id int ); -- ensure these dropped column cases are tested ALTER TABLE projects DROP COLUMN project_name; ALTER TABLE projects DROP COLUMN subclient_id; INSERT INTO projects VALUES (1, 'Windows 7', 1); INSERT INTO projects VALUES (2, 'has,comma', 1); INSERT INTO projects VALUES (NULL, NULL, NULL); INSERT INTO projects VALUES (4, 'OSX', 2); INSERT INTO projects VALUES (NULL, 'has"quote', NULL); INSERT INTO projects VALUES (5, 'has,comma and "quote"', 7); INSERT INTO projects VALUES (6, E'has \n LF', 7); INSERT INTO projects VALUES (7, E'has \r CR', 8); INSERT INTO projects VALUES (8, E'has \r\n CRLF"', 8); create extension if not exists pg_csv; CREATE TABLE nasty ( "unusual"",names" INTEGER GENERATED ALWAYS AS IDENTITY, text TEXT ); INSERT INTO nasty (text) VALUES ('test'); pg_csv-1.0.2/test/expected/bom.out000066400000000000000000000014461515045443700170760ustar00rootroot00000000000000-- this is done to avoid failing on a pure psql change that happened on postgres 16 -- on pg <= 15 the BOM output adds one extra space, on pg 16 it doesn't \pset format unaligned \pset tuples_only on \echo -- include BOM (byte-order mark) SELECT csv_agg(x, csv_options(bom := true)) AS body FROM projects x; id,name,client_id 1,Windows 7,1 2,"has,comma",1 ,, 4,OSX,2 ,"has""quote", 5,"has,comma and ""quote""",7 6,"has LF",7 7,"has CR",8 8,"has CRLF""",8 \echo -- include BOM with custom delimiter SELECT csv_agg(x, csv_options(delimiter := ';', bom := true)) AS body FROM projects x; id;name;client_id 1;Windows 7;1 2;has,comma;1 ;; 4;OSX;2 ;"has""quote"; 5;"has,comma and ""quote""";7 6;"has LF";7 7;"has CR";8 8;"has CRLF""";8 \echo \pset format aligned \pset tuples_only off pg_csv-1.0.2/test/expected/delimiters.out000066400000000000000000000043531515045443700204620ustar00rootroot00000000000000-- semicolon delimiter SELECT csv_agg(x, csv_options(';')) AS body FROM projects x; body ------------------------------- id;name;client_id + 1;Windows 7;1 + 2;has,comma;1 + ;; + 4;OSX;2 + ;"has""quote"; + 5;"has,comma and ""quote""";7+ 6;"has + LF";7 + 7;"has \r CR";8 + 8;"has \r + CRLF""";8 (1 row) -- pipe delimiter, named params work too SELECT csv_agg(x, csv_options(delimiter := '|')) AS body FROM projects x; body ------------------------------- id|name|client_id + 1|Windows 7|1 + 2|has,comma|1 + || + 4|OSX|2 + |"has""quote"| + 5|"has,comma and ""quote"""|7+ 6|"has + LF"|7 + 7|"has \r CR"|8 + 8|"has \r + CRLF"""|8 (1 row) -- tab delimiter SELECT csv_agg(x, csv_options(E'\t')) AS body FROM projects x; body ------------------------------------------- id name client_id + 1 Windows 7 1 + 2 has,comma 1 + + 4 OSX 2 + "has""quote" + 5 "has,comma and ""quote""" 7+ 6 "has + LF" 7 + 7 "has \r CR" 8 + 8 "has \r + CRLF""" 8 (1 row) -- newline is forbidden as delimiter SELECT csv_agg(x, csv_options(E'\n')) AS body FROM projects x; ERROR: delimiter cannot be newline, carriage return or double quote -- double quote is forbidden as delimiter SELECT csv_agg(x, csv_options('"')) AS body FROM projects x; ERROR: delimiter cannot be newline, carriage return or double quote -- carriage return is forbidden as delimiter SELECT csv_agg(x, csv_options(E'\r')) AS body FROM projects x; ERROR: delimiter cannot be newline, carriage return or double quote pg_csv-1.0.2/test/expected/header.out000066400000000000000000000036501515045443700175500ustar00rootroot00000000000000-- header SELECT csv_agg(x, csv_options(header:=true)) AS body FROM projects x; body ------------------------------- id,name,client_id + 1,Windows 7,1 + 2,"has,comma",1 + ,, + 4,OSX,2 + ,"has""quote", + 5,"has,comma and ""quote""",7+ 6,"has + LF",7 + 7,"has \r CR",8 + 8,"has \r + CRLF""",8 (1 row) -- no header SELECT csv_agg(x, csv_options(header:=false)) AS body FROM projects x; body ------------------------------- 1,Windows 7,1 + 2,"has,comma",1 + ,, + 4,OSX,2 + ,"has""quote", + 5,"has,comma and ""quote""",7+ 6,"has + LF",7 + 7,"has \r CR",8 + 8,"has \r + CRLF""",8 (1 row) -- no header with delimiter SELECT csv_agg(x, csv_options(delimiter:='|', header:=false)) AS body FROM projects x; body ------------------------------- 1|Windows 7|1 + 2|has,comma|1 + || + 4|OSX|2 + |"has""quote"| + 5|"has,comma and ""quote"""|7+ 6|"has + LF"|7 + 7|"has \r CR"|8 + 8|"has \r + CRLF"""|8 (1 row) -- see bom.sql for an explanation of these settings \pset format unaligned \pset tuples_only on \echo -- no header with delimiter and BOM SELECT csv_agg(x, csv_options(delimiter:='|', header:=false, bom := true)) AS body FROM projects x; 1|Windows 7|1 2|has,comma|1 || 4|OSX|2 |"has""quote"| 5|"has,comma and ""quote"""|7 6|"has LF"|7 7|"has CR"|8 8|"has CRLF"""|8 \echo \pset format aligned \pset tuples_only off pg_csv-1.0.2/test/expected/nullstr.out000066400000000000000000000031331515045443700200170ustar00rootroot00000000000000-- custom null string SELECT csv_agg(x, csv_options(nullstr:='')) AS body FROM projects x; body ------------------------------- id,name,client_id + 1,Windows 7,1 + 2,"has,comma",1 + ,, + 4,OSX,2 + ,"has""quote", + 5,"has,comma and ""quote""",7+ 6,"has + LF",7 + 7,"has \r CR",8 + 8,"has \r + CRLF""",8 (1 row) -- custom null string with no header SELECT csv_agg(x, csv_options(nullstr:='NULL', header:=false)) AS body FROM projects x; body ------------------------------- 1,Windows 7,1 + 2,"has,comma",1 + NULL,NULL,NULL + 4,OSX,2 + NULL,"has""quote",NULL + 5,"has,comma and ""quote""",7+ 6,"has + LF",7 + 7,"has \r CR",8 + 8,"has \r + CRLF""",8 (1 row) -- custom null string with no header and delimiter SELECT csv_agg(x, csv_options(nullstr:='~', delimiter:='|', header:=false)) AS body FROM projects x; body ------------------------------- 1|Windows 7|1 + 2|has,comma|1 + ~|~|~ + 4|OSX|2 + ~|"has""quote"|~ + 5|"has,comma and ""quote"""|7+ 6|"has + LF"|7 + 7|"has \r CR"|8 + 8|"has \r + CRLF"""|8 (1 row) pg_csv-1.0.2/test/expected/postgrest.out000066400000000000000000000050101515045443700203420ustar00rootroot00000000000000-- recreate postgrest problem in https://github.com/PostgREST/postgrest/issues/3627 create function getproject(id int) returns setof projects language sql as $_$ select * from projects where id = $1; $_$; \echo -- this is postgREST CSV query, it produces the same result as our `csv_agg` (see ../expected/sanity.out) WITH pgrst_source AS ( SELECT "projects".* FROM "projects" ) SELECT (SELECT coalesce(string_agg(a.k, ','), '') FROM (SELECT json_object_keys(r)::text as k FROM (SELECT row_to_json(hh) as r from pgrst_source as hh limit 1) _) a) || E'\n' || coalesce(string_agg(substring(_postgrest_t::text, 2, length(_postgrest_t::text) - 2), E'\n'), '') AS body FROM ( SELECT * FROM pgrst_source ) _postgrest_t; body ------------------------------- id,name,client_id + 1,"Windows 7",1 + 2,"has,comma",1 + ,, + 4,OSX,2 + ,"has""quote", + 5,"has,comma and ""quote""",7+ 6,"has + LF",7 + 7,"has \r CR",8 + 8,"has \r + CRLF""",8 (1 row) -- postgREST CSV query with RPC and a filter, it selects columns in a particular order (client_id, id, name) -- and it produces out of order CSV header names WITH pgrst_source AS ( SELECT "pgrst_call"."client_id", "pgrst_call"."id", "pgrst_call"."name" FROM "getproject"("id" := 2) pgrst_call ) SELECT (SELECT coalesce(string_agg(a.k, ','), '') FROM ( SELECT json_object_keys(r)::text as k FROM (SELECT row_to_json(hh) as r from pgrst_source as hh limit 1 ) s ) a) || E'\n' || coalesce(string_agg(substring(_postgrest_t::text, 2, length(_postgrest_t::text) - 2), E'\n'), '') AS body FROM (SELECT "projects"."name", "projects"."id", "projects"."client_id" FROM "pgrst_source" AS "projects" ) _postgrest_t; body ------------------- client_id,id,name+ "has,comma",2,1 (1 row) -- same as above but with our csv_agg query with RPC and filter, it selects columns in a particular order (client_id, id, name) -- and it now produces correct order of CSV header names WITH pgrst_source AS ( SELECT "pgrst_call"."client_id", "pgrst_call"."id", "pgrst_call"."name" FROM "getproject"("id" := 2) pgrst_call ) SELECT csv_agg(_postgrest_t) AS body FROM (SELECT "projects"."name", "projects"."id", "projects"."client_id" FROM "pgrst_source" AS "projects") _postgrest_t; body ------------------- name,id,client_id+ "has,comma",2,1 (1 row) pg_csv-1.0.2/test/expected/sanity.out000066400000000000000000000013011515045443700176160ustar00rootroot00000000000000SELECT csv_agg(x) AS body FROM projects x; body ------------------------------- id,name,client_id + 1,Windows 7,1 + 2,"has,comma",1 + ,, + 4,OSX,2 + ,"has""quote", + 5,"has,comma and ""quote""",7+ 6,"has + LF",7 + 7,"has \r CR",8 + 8,"has \r + CRLF""",8 (1 row) -- proves that https://github.com/PostgREST/postgrest/issues/1371#issuecomment-519248984 is solved select csv_agg(x) from nasty x; csv_agg ------------------------ "unusual"",names",text+ 1,test (1 row) pg_csv-1.0.2/test/sql/000077500000000000000000000000001515045443700145615ustar00rootroot00000000000000pg_csv-1.0.2/test/sql/00_init.sql000066400000000000000000000015711515045443700165500ustar00rootroot00000000000000CREATE TABLE projects ( id integer , name text , project_name text , client_id integer , subclient_id int ); -- ensure these dropped column cases are tested ALTER TABLE projects DROP COLUMN project_name; ALTER TABLE projects DROP COLUMN subclient_id; INSERT INTO projects VALUES (1, 'Windows 7', 1); INSERT INTO projects VALUES (2, 'has,comma', 1); INSERT INTO projects VALUES (NULL, NULL, NULL); INSERT INTO projects VALUES (4, 'OSX', 2); INSERT INTO projects VALUES (NULL, 'has"quote', NULL); INSERT INTO projects VALUES (5, 'has,comma and "quote"', 7); INSERT INTO projects VALUES (6, E'has \n LF', 7); INSERT INTO projects VALUES (7, E'has \r CR', 8); INSERT INTO projects VALUES (8, E'has \r\n CRLF"', 8); create extension if not exists pg_csv; CREATE TABLE nasty ( "unusual"",names" INTEGER GENERATED ALWAYS AS IDENTITY, text TEXT ); INSERT INTO nasty (text) VALUES ('test'); pg_csv-1.0.2/test/sql/bom.sql000066400000000000000000000007561515045443700160670ustar00rootroot00000000000000-- this is done to avoid failing on a pure psql change that happened on postgres 16 -- on pg <= 15 the BOM output adds one extra space, on pg 16 it doesn't \pset format unaligned \pset tuples_only on \echo -- include BOM (byte-order mark) SELECT csv_agg(x, csv_options(bom := true)) AS body FROM projects x; \echo -- include BOM with custom delimiter SELECT csv_agg(x, csv_options(delimiter := ';', bom := true)) AS body FROM projects x; \echo \pset format aligned \pset tuples_only off pg_csv-1.0.2/test/sql/delimiters.sql000066400000000000000000000011371515045443700174450ustar00rootroot00000000000000-- semicolon delimiter SELECT csv_agg(x, csv_options(';')) AS body FROM projects x; -- pipe delimiter, named params work too SELECT csv_agg(x, csv_options(delimiter := '|')) AS body FROM projects x; -- tab delimiter SELECT csv_agg(x, csv_options(E'\t')) AS body FROM projects x; -- newline is forbidden as delimiter SELECT csv_agg(x, csv_options(E'\n')) AS body FROM projects x; -- double quote is forbidden as delimiter SELECT csv_agg(x, csv_options('"')) AS body FROM projects x; -- carriage return is forbidden as delimiter SELECT csv_agg(x, csv_options(E'\r')) AS body FROM projects x; pg_csv-1.0.2/test/sql/header.sql000066400000000000000000000011031515045443700165250ustar00rootroot00000000000000-- header SELECT csv_agg(x, csv_options(header:=true)) AS body FROM projects x; -- no header SELECT csv_agg(x, csv_options(header:=false)) AS body FROM projects x; -- no header with delimiter SELECT csv_agg(x, csv_options(delimiter:='|', header:=false)) AS body FROM projects x; -- see bom.sql for an explanation of these settings \pset format unaligned \pset tuples_only on \echo -- no header with delimiter and BOM SELECT csv_agg(x, csv_options(delimiter:='|', header:=false, bom := true)) AS body FROM projects x; \echo \pset format aligned \pset tuples_only off pg_csv-1.0.2/test/sql/nullstr.sql000066400000000000000000000005761515045443700170150ustar00rootroot00000000000000-- custom null string SELECT csv_agg(x, csv_options(nullstr:='')) AS body FROM projects x; -- custom null string with no header SELECT csv_agg(x, csv_options(nullstr:='NULL', header:=false)) AS body FROM projects x; -- custom null string with no header and delimiter SELECT csv_agg(x, csv_options(nullstr:='~', delimiter:='|', header:=false)) AS body FROM projects x; pg_csv-1.0.2/test/sql/postgrest.sql000066400000000000000000000036511515045443700173410ustar00rootroot00000000000000-- recreate postgrest problem in https://github.com/PostgREST/postgrest/issues/3627 create function getproject(id int) returns setof projects language sql as $_$ select * from projects where id = $1; $_$; \echo -- this is postgREST CSV query, it produces the same result as our `csv_agg` (see ../expected/sanity.out) WITH pgrst_source AS ( SELECT "projects".* FROM "projects" ) SELECT (SELECT coalesce(string_agg(a.k, ','), '') FROM (SELECT json_object_keys(r)::text as k FROM (SELECT row_to_json(hh) as r from pgrst_source as hh limit 1) _) a) || E'\n' || coalesce(string_agg(substring(_postgrest_t::text, 2, length(_postgrest_t::text) - 2), E'\n'), '') AS body FROM ( SELECT * FROM pgrst_source ) _postgrest_t; -- postgREST CSV query with RPC and a filter, it selects columns in a particular order (client_id, id, name) -- and it produces out of order CSV header names WITH pgrst_source AS ( SELECT "pgrst_call"."client_id", "pgrst_call"."id", "pgrst_call"."name" FROM "getproject"("id" := 2) pgrst_call ) SELECT (SELECT coalesce(string_agg(a.k, ','), '') FROM ( SELECT json_object_keys(r)::text as k FROM (SELECT row_to_json(hh) as r from pgrst_source as hh limit 1 ) s ) a) || E'\n' || coalesce(string_agg(substring(_postgrest_t::text, 2, length(_postgrest_t::text) - 2), E'\n'), '') AS body FROM (SELECT "projects"."name", "projects"."id", "projects"."client_id" FROM "pgrst_source" AS "projects" ) _postgrest_t; -- same as above but with our csv_agg query with RPC and filter, it selects columns in a particular order (client_id, id, name) -- and it now produces correct order of CSV header names WITH pgrst_source AS ( SELECT "pgrst_call"."client_id", "pgrst_call"."id", "pgrst_call"."name" FROM "getproject"("id" := 2) pgrst_call ) SELECT csv_agg(_postgrest_t) AS body FROM (SELECT "projects"."name", "projects"."id", "projects"."client_id" FROM "pgrst_source" AS "projects") _postgrest_t; pg_csv-1.0.2/test/sql/sanity.sql000066400000000000000000000002611515045443700166100ustar00rootroot00000000000000SELECT csv_agg(x) AS body FROM projects x; -- proves that https://github.com/PostgREST/postgrest/issues/1371#issuecomment-519248984 is solved select csv_agg(x) from nasty x;