Lab 2 - Unix Pipes

Instructions

Intro

In this lab you'll use the pipe system call to connect the output of one process with the input of another. This is a simple pipeline that you'll build using the fork, exec, and pipe system calls.

Your code will execute two commands (ls and tr) in such a way that the output of the first command (ls) becomes the input of the second command (tr). Both commands should be child processes of the main program by calling fork twice. These commands are hardcoded in shell.cpp to be ls and tr. The final output should display the output of ls as all caps. I.e.,

$ ls -al / | tr a-z A-Z

TOTAL 17
DRWXR-XR-X   22 ROOT  WHEEL   704 NOV 22 02:17 .
DRWXR-XR-X   22 ROOT  WHEEL   704 NOV 22 02:17 ..
----------    1 ROOT  ADMIN     0 NOV 22 02:17 .FILE
DRWXR-XR-X    2 ROOT  WHEEL    64 NOV 22 02:17 .NOFOLLOW
DRWXR-XR-X    2 ROOT  WHEEL    64 NOV 22 02:17 .RESOLVE
DRWXR-XR-X    2 ROOT  WHEEL    64 NOV 22 02:17 .VOL
LRWXR-XR-X    1 ROOT  ADMIN    36 NOV 22 02:17 .VOLUMEICON.ICNS -> SYSTEM/VOLUMES/DATA/.VOLUMEICON.ICNS
DRWXRWXR-X  133 ROOT  ADMIN  4256 JAN 28 10:26 APPLICATIONS
DRWXR-XR-X@  39 ROOT  WHEEL  1248 NOV 22 02:17 BIN
DRWXR-XR-X    2 ROOT  WHEEL    64 FEB 26  2022 CORES
DR-XR-XR-X    4 ROOT  WHEEL  8275 JAN 20 08:18 DEV
LRWXR-XR-X@   1 ROOT  WHEEL    11 NOV 22 02:17 ETC -> PRIVATE/ETC
LRWXR-XR-X    1 ROOT  WHEEL    25 JAN 20 08:19 HOME -> /SYSTEM/VOLUMES/DATA/HOME
DRWXR-XR-X   77 ROOT  WHEEL  2464 JAN 15 21:07 LIBRARY
DRWXR-XR-X    7 ROOT  WHEEL   224 OCT 14 11:24 OPT
DRWXR-XR-X    6 ROOT  WHEEL   192 JAN 20 08:19 PRIVATE
DRWXR-XR-X@  76 ROOT  WHEEL  2432 NOV 22 02:17 SBIN
DRWXR-XR-X@  10 ROOT  WHEEL   320 NOV 22 02:17 SYSTEM
LRWXR-XR-X@   1 ROOT  WHEEL    11 NOV 22 02:17 TMP -> PRIVATE/TMP
DRWXR-XR-X   10 ROOT  ADMIN   320 JAN  7 22:46 USERS
DRWXR-XR-X@  11 ROOT  WHEEL   352 NOV 22 02:17 USR
LRWXR-XR-X@   1 ROOT  WHEEL    11 NOV 22 02:17 VAR -> PRIVATE/VAR
DRWXR-XR-X    3 ROOT  WHEEL    96 JAN 28 08:25 VOLUMES

1. Task

Implement a C++ program that mimics the Unix pipeline ls -al / | tr a-z A-Z. You can use fork(), pipe(), dup2(), and execvp() to create two child processes and connect them via a pipe.

Introduction to fork(), exec(), pipe(), and wait()

/*
    ============================================================
    Introduction to fork(), exec(), pipe(), and wait()
    ============================================================
    This program is a teaching example that demonstrates how
    basic UNIX system calls work. It is NOT meant to solve
    a particular problem — instead, it shows three small demos:

      1. fork()  → how a parent and child process are created.
      2. exec()  → how a process can replace itself with another
                   program (for example, running "ls").
      3. pipe()  → how two processes can communicate by passing
                   data through a pipe (child writes, parent reads).

    Each section includes clear printouts so you can see the
    difference between parent and child processes, and how data
    moves between them. Use this code as a learning reference
    to understand system calls, not as a final solution to a lab.
*/

#include <unistd.h>   // fork, execvp, pipe, read, write, close
#include <sys/wait.h> // wait, waitpid
#include <iostream>   // std::cout, std::cerr

using namespace std;

int main() {
    // ============================================================
    // DEMO 1: fork()
    // ============================================================
    cout << "=== DEMO: fork() ===" << endl;

    // fork() creates a new process by duplicating the current one
    // - Parent gets the child’s PID (a positive number)
    // - Child gets 0
    pid_t pid = fork();

    if (pid == -1) {
        cerr << "fork failed\n";
        return 1;
    }

    if (pid == 0) {
        // This branch runs in the CHILD process
        cout << "Hello from the CHILD process! (pid=" << getpid() << ")\n";
        return 0; // child exits here
    } else {
        // This branch runs in the PARENT process
        cout << "Hello from the PARENT process! (pid=" << getpid()
             << "), child=" << pid << "\n";

        // wait() makes the parent pause until the child finishes
        wait(nullptr);
    }

    // ============================================================
    // DEMO 2: exec()
    // ============================================================
    cout << "\n=== DEMO: exec() ===" << endl;
    cout << "Now the child will be REPLACED by another program (\"ls\")\n";

    pid = fork();
    if (pid == -1) {
        cerr << "fork failed\n";
        return 1;
    }

    if (pid == 0) {
        // In the CHILD process
        // Prepare arguments for "ls -1"
        // Note: argv must end with nullptr
        char* args[] = {(char*)"ls", (char*)"-1", nullptr};

        // execvp() REPLACES this process with "ls"
        execvp(args[0], args);

        // If execvp returns, something went wrong
        cerr << "exec failed\n";
        return 1;
    } else {
        // PARENT waits again for the child to finish
        wait(nullptr);
    }

    // ============================================================
    // DEMO 3: pipe()
    // ============================================================
    cout << "\n=== DEMO: pipe() ===" << endl;

    // A pipe is a unidirectional data channel
    // fds[0] = read end, fds[1] = write end
    int fds[2];
    if (pipe(fds) == -1) {
        cerr << "pipe failed\n";
        return 1;
    }

    pid = fork();
    if (pid == -1) {
        cerr << "fork failed\n";
        return 1;
    }

    if (pid == 0) {
        // CHILD process writes into the pipe

        close(fds[0]); // close unused read end

        const char msg[] = "Message from CHILD process\n";
        // write data into the pipe
        write(fds[1], msg, sizeof(msg));

        close(fds[1]); // close write end when done
        return 0;
    } else {
        // PARENT process reads from the pipe

        close(fds[1]); // close unused write end

        char buffer[100];              // space to store message
        int n = read(fds[0], buffer, sizeof(buffer));
        buffer[n] = '\0';              // null-terminate string

        cout << "Parent received: " << buffer;

        close(fds[0]); // close read end
        wait(nullptr); // wait for child
    }

    // ============================================================
    // End of demo
    // ============================================================
    cout << "\n=== END OF DEMO ===" << endl;
    return 0;
}

2. Starter Code

You will be given the file shell.cpp that contains the skeleton of the implementation and a makefile to compile it. In shell.cpp, you have the arrays “cmd1” and “cmd2”, which you can use directly with execvp.

In the main function, you should create an unnamed pipe using the pipe() system call. The system call pipe() will do that. When you call fork() to create the child processes, both processes inherit both ends of the pipe. So, you need to close one end of the pipe in each child. You also need to connect the stdout of ls with the stdin of tr.

/****************
LE2: Introduction to Unnamed Pipes
****************/
#include <unistd.h> // pipe, fork, dup2, execvp, close
using namespace std;

int main () {
    // lists all the files in the root directory in the long format
    char* cmd1[] = {(char*) "ls", (char*) "-al", (char*) "/", nullptr};
    // translates all input from lowercase to uppercase
    char* cmd2[] = {(char*) "tr", (char*) "a-z", (char*) "A-Z", nullptr};

    // TODO: add functionality
    // Create pipe

    // Create child to run first command
    // In child, redirect output to write end of pipe
    // Close the read end of the pipe on the child side.
    // In child, execute the command

    // Create another child to run second command
    // In child, redirect input to the read end of the pipe
    // Close the write end of the pipe on the child side.
    // Execute the second command.

    // Reset the input and output file descriptors of the parent.
}

3. How to do the lab

This lab is about understanding how a Unix pipeline works under the hood—the same mechanism you use every time you type something like

ls | tr
cat file.txt | grep hello

into the shell. Instead of the shell connecting programs through pipes, your program becomes a “mini shell” and creates a pipe (a one-way channel for bytes), between the two programs. Your program:

Starts two child processes using fork(). One's intended to become ls while the other is intended to become tr.
It uses the dup2() system call to rebind the stdout of the first command to the write end of the pipe, and the stdin of the second command to the read end of the pipe.
The forked child processes then exec() the two programs: ls and tr.

Note that the mechanism highlights some degree of elegance about the fork() system call. Instead of providing a whole range of options to precisely specify the attributes of the child process, the child changes its own attributes using standard system calls, and then calls exec().

By the end of the lab, you'll see the output of ls -al / printed on the terminal in ALL CAPS, proof that data really flowed from one process to another. Learning how to do this is a useful skill that you might use later for constructing longer pipelines, implementing shells, and for process management.

Process

1. Create pipe

    // Create pipe
    // fds[0] = read end, fds[1] = write end
    int pipefds[2];
    if (pipe(pipefds) == -1) {
        cerr << "pipe failed\n";
        return 1;
    }

	int pipefds[2]

Integer array got populated with the file descriptors that you can use

	pipe(pipefds)

The OS picks two free entries from your file descriptor table, lets say 7 and 11
It will return which end is read (7) and which is write (11)
Now your file descriptor table is populated
Now you can say write(11, "...", #bytes)

2. Create child to run first command

    // Create child to run first command
    pid_t ls_pid = fork();    // parent gets the child's PID - child gets 0
    if (ls_pid == -1) {
        cerr << "fork failed\n";
        return 1;
    }

    // In child, redirect output to write end of pipe
    if (ls_pid == 0) {
        dup2(pipefds[1], STDOUT_FILENO);    // STDOUT_FILENO == 1 (standard output)

        // Close the read end of the pipe on the child side.
        close(pipefds[0]);                  // close unused read end
        close(pipefds[1]);                  // also closed redundant write end

        // In child, execute the command    // execvp():
        execvp(cmd1[0], cmd1);              // 1st arg takes the name of the command
                                            // 2nd arg takes the list of arguments
    }

What is `STDOUT_FILENO`?

STDOUT_FILENO is a constant defined by Unix that represents the file descriptor number for standard output.

STDOUT_FILENO == 1

Similarly:

STDIN_FILENO  == 0   // standard input
STDOUT_FILENO == 1   // standard output
STDERR_FILENO == 2   // standard error

They come from:

#include <unistd.h>

So when you write:

dup2(pipefd[1], STDOUT_FILENO);

You're really saying:

dup2(pipefd[1], 1);

When ls runs normally, it prints to file descriptor 1 (stdout), which points to your terminal

That’s what this line does:

dup2(pipefd[1], STDOUT_FILENO);

"Hey kernel, make stdout (fd 1) go into the pipe instead of the terminal."

This line:

dup2(pipefd[0], STDIN_FILENO);

"Make stdin (fd 0) read from the pipe instead of the keyboard."

Close the duplicated fd after `dup2()` in each child

After:

dup2(pipefds[1], STDOUT_FILENO);

You should usually also do:

close(pipefds[1]);

because stdout (fd 1) now points to that same underlying pipe endpoint, and keeping the original open is redundant.

Same for the other child:

dup2(pipefds[0], STDIN_FILENO);
close(pipefds[0]);

Why it matters:

avoids extra open references
reduces the risk of EOF not being delivered when expected
cleaner / more correct pattern

3. Create another child to run second command

    // Create another child to run second command
    pid_t tr_pid = fork();
    if (tr_pid == -1) {
        cerr << "fork failed\n";
        return 1;
    }

    // In child, redirect input to the read end of the pipe
    if (tr_pid == 0) {
        dup2(pipefds[0], STDIN_FILENO);     // STDIN_FILENO == 0 (standard input)

        // Close the write end of the pipe on the child side.
        close(pipefds[1]);                  // close unused write end
        close(pipefds[0]);                  // also closed redundant read end

        // Execute the second command.
        execvp(cmd2[0], cmd2);
    }

The `exec` family (there are many):

Function	Args style	PATH search
execl	list	NO
execlp	list	YES
execv	array	NO
execvp	array	YES

Example with execlp:

execlp(cmd, arg0, arg1, ..., NULL);

This is easier for:
- 1–2 commands
- Teaching purposes
- Simple pipelines

Example with execvp:

char *args[] = {"ls", "-l", NULL};
execvp("ls", args);

execvp uses an argument array
You use this when:
- Commands are parsed dynamically
- You're building a shell
- Arguments come from user input
Since in this lab we are given the dynamic array of the commands with the arguments included we must use execvp.

4. Reset input and output file descriptors of the parent

    // Reset the input and output file descriptors of the parent.
    close(pipefds[0]);      // close both ends of the pipe
    close(pipefds[1]);

    // Parent waits for both children before exit
    int status;
    waitpid(ls_pid, &status, 0);
    waitpid(tr_pid, &status, 0);

    return 0;

To finalize the program first with close both ends of the pipe
Then the parent waits for both children before exit in order to avoid zombie processes.

Final solution

/****************
LE2: Introduction to Unnamed Pipes
****************/
#include <unistd.h>     // pipe, fork, dup2, execvp, close
#include <iostream>     // std::cout, std::cerr
#include <sys/wait.h>   // wait, waitpid

using namespace std;

int main () {
    // lists all the files in the root directory in the long format
    char* cmd1[] = {(char*) "ls", (char*) "-al", (char*) "/", nullptr};
    // translates all input from lowercase to uppercase
    char* cmd2[] = {(char*) "tr", (char*) "a-z", (char*) "A-Z", nullptr};

    // Create pipe
    int pipefds[2];   // fds[0] = read end, fds[1] = write end
    if (pipe(pipefds) == -1) {
        cerr << "pipe failed\n";
        return 1;
    }

    // Create child to run first command
    pid_t ls_pid = fork();    // parent gets the child's PID - child gets 0
    if (ls_pid == -1) {
        cerr << "fork failed\n";
        return 1;
    }

    // In child, redirect output to write end of pipe
    if (ls_pid == 0) {
        dup2(pipefds[1], STDOUT_FILENO);    // STDOUT_FILENO == 1 (standard output)

        // Close the read end of the pipe on the child side.
        close(pipefds[0]);                  // close unused read end
        close(pipefds[1]);                  // also closed redundant write end

        // In child, execute the command    // execvp():
        execvp(cmd1[0], cmd1);              // 1st arg takes the name of the command
        cerr << "exec failed\n";            // 2nd arg takes the list of arguments
        return 1;
    }

    // Create another child to run second command
    pid_t tr_pid = fork();
    if (tr_pid == -1) {
        cerr << "fork failed\n";
        return 1;
    }

    // In child, redirect input to the read end of the pipe
    if (tr_pid == 0) {
        dup2(pipefds[0], STDIN_FILENO);     // STDIN_FILENO == 0 (standard input)

        // Close the write end of the pipe on the child side.
        close(pipefds[1]);                  // close unused write end
        close(pipefds[0]);                  // also closed redundant read end

        // Execute the second command.
        execvp(cmd2[0], cmd2);
        cerr << "exec failed\n";
        return 1;
    }

    // Reset the input and output file descriptors of the parent.
    close(pipefds[0]);      // close both ends of the pipe
    close(pipefds[1]);

    // Parent waits for both children before exit
    int status;
    waitpid(ls_pid, &status, 0);
    waitpid(tr_pid, &status, 0);

    return 0;
}

Note we also handled exec failures.