Lab 2 - Unix Pipes

Instructions

Intro

In this lab you'll use the pipe system call to connect the output of one process with the input of another. This is a simple pipeline that you'll build using the forkexec, and pipe system calls.

Your code will execute two commands (ls and tr) in such a way that the output of the first command (ls) becomes the input of the second command (tr). Both commands should be child processes of the main program by calling fork twice. These commands are hardcoded in shell.cpp to be ls and tr. The final output should display the output of ls as all caps. I.e.,

$ ls -al / | tr a-z A-Z

TOTAL 17
DRWXR-XR-X   22 ROOT  WHEEL   704 NOV 22 02:17 .
DRWXR-XR-X   22 ROOT  WHEEL   704 NOV 22 02:17 ..
----------    1 ROOT  ADMIN     0 NOV 22 02:17 .FILE
DRWXR-XR-X    2 ROOT  WHEEL    64 NOV 22 02:17 .NOFOLLOW
DRWXR-XR-X    2 ROOT  WHEEL    64 NOV 22 02:17 .RESOLVE
DRWXR-XR-X    2 ROOT  WHEEL    64 NOV 22 02:17 .VOL
LRWXR-XR-X    1 ROOT  ADMIN    36 NOV 22 02:17 .VOLUMEICON.ICNS -> SYSTEM/VOLUMES/DATA/.VOLUMEICON.ICNS
DRWXRWXR-X  133 ROOT  ADMIN  4256 JAN 28 10:26 APPLICATIONS
DRWXR-XR-X@  39 ROOT  WHEEL  1248 NOV 22 02:17 BIN
DRWXR-XR-X    2 ROOT  WHEEL    64 FEB 26  2022 CORES
DR-XR-XR-X    4 ROOT  WHEEL  8275 JAN 20 08:18 DEV
LRWXR-XR-X@   1 ROOT  WHEEL    11 NOV 22 02:17 ETC -> PRIVATE/ETC
LRWXR-XR-X    1 ROOT  WHEEL    25 JAN 20 08:19 HOME -> /SYSTEM/VOLUMES/DATA/HOME
DRWXR-XR-X   77 ROOT  WHEEL  2464 JAN 15 21:07 LIBRARY
DRWXR-XR-X    7 ROOT  WHEEL   224 OCT 14 11:24 OPT
DRWXR-XR-X    6 ROOT  WHEEL   192 JAN 20 08:19 PRIVATE
DRWXR-XR-X@  76 ROOT  WHEEL  2432 NOV 22 02:17 SBIN
DRWXR-XR-X@  10 ROOT  WHEEL   320 NOV 22 02:17 SYSTEM
LRWXR-XR-X@   1 ROOT  WHEEL    11 NOV 22 02:17 TMP -> PRIVATE/TMP
DRWXR-XR-X   10 ROOT  ADMIN   320 JAN  7 22:46 USERS
DRWXR-XR-X@  11 ROOT  WHEEL   352 NOV 22 02:17 USR
LRWXR-XR-X@   1 ROOT  WHEEL    11 NOV 22 02:17 VAR -> PRIVATE/VAR
DRWXR-XR-X    3 ROOT  WHEEL    96 JAN 28 08:25 VOLUMES

1. Task

Implement a C++ program that mimics the Unix pipeline ls -al / | tr a-z A-Z. You can use fork()pipe()dup2(), and execvp() to create two child processes and connect them via a pipe.

Introduction to fork(), exec(), pipe(), and wait()

/*
    ============================================================
    Introduction to fork(), exec(), pipe(), and wait()
    ============================================================
    This program is a teaching example that demonstrates how
    basic UNIX system calls work. It is NOT meant to solve
    a particular problem — instead, it shows three small demos:

      1. fork()  → how a parent and child process are created.
      2. exec()  → how a process can replace itself with another
                   program (for example, running "ls").
      3. pipe()  → how two processes can communicate by passing
                   data through a pipe (child writes, parent reads).

    Each section includes clear printouts so you can see the
    difference between parent and child processes, and how data
    moves between them. Use this code as a learning reference
    to understand system calls, not as a final solution to a lab.
*/

#include <unistd.h>   // fork, execvp, pipe, read, write, close
#include <sys/wait.h> // wait, waitpid
#include <iostream>   // std::cout, std::cerr

using namespace std;

int main() {
    // ============================================================
    // DEMO 1: fork()
    // ============================================================
    cout << "=== DEMO: fork() ===" << endl;

    // fork() creates a new process by duplicating the current one
    // - Parent gets the child’s PID (a positive number)
    // - Child gets 0
    pid_t pid = fork();

    if (pid == -1) {
        cerr << "fork failed\n";
        return 1;
    }

    if (pid == 0) {
        // This branch runs in the CHILD process
        cout << "Hello from the CHILD process! (pid=" << getpid() << ")\n";
        return 0; // child exits here
    } else {
        // This branch runs in the PARENT process
        cout << "Hello from the PARENT process! (pid=" << getpid()
             << "), child=" << pid << "\n";

        // wait() makes the parent pause until the child finishes
        wait(nullptr);
    }

    // ============================================================
    // DEMO 2: exec()
    // ============================================================
    cout << "\n=== DEMO: exec() ===" << endl;
    cout << "Now the child will be REPLACED by another program (\"ls\")\n";

    pid = fork();
    if (pid == -1) {
        cerr << "fork failed\n";
        return 1;
    }

    if (pid == 0) {
        // In the CHILD process
        // Prepare arguments for "ls -1"
        // Note: argv must end with nullptr
        char* args[] = {(char*)"ls", (char*)"-1", nullptr};

        // execvp() REPLACES this process with "ls"
        execvp(args[0], args);

        // If execvp returns, something went wrong
        cerr << "exec failed\n";
        return 1;
    } else {
        // PARENT waits again for the child to finish
        wait(nullptr);
    }

    // ============================================================
    // DEMO 3: pipe()
    // ============================================================
    cout << "\n=== DEMO: pipe() ===" << endl;

    // A pipe is a unidirectional data channel
    // fds[0] = read end, fds[1] = write end
    int fds[2];
    if (pipe(fds) == -1) {
        cerr << "pipe failed\n";
        return 1;
    }

    pid = fork();
    if (pid == -1) {
        cerr << "fork failed\n";
        return 1;
    }

    if (pid == 0) {
        // CHILD process writes into the pipe

        close(fds[0]); // close unused read end

        const char msg[] = "Message from CHILD process\n";
        // write data into the pipe
        write(fds[1], msg, sizeof(msg));

        close(fds[1]); // close write end when done
        return 0;
    } else {
        // PARENT process reads from the pipe

        close(fds[1]); // close unused write end

        char buffer[100];              // space to store message
        int n = read(fds[0], buffer, sizeof(buffer));
        buffer[n] = '\0';              // null-terminate string

        cout << "Parent received: " << buffer;

        close(fds[0]); // close read end
        wait(nullptr); // wait for child
    }

    // ============================================================
    // End of demo
    // ============================================================
    cout << "\n=== END OF DEMO ===" << endl;
    return 0;
}

2. Starter Code

You will be given the file shell.cpp that contains the skeleton of the implementation and a makefile to compile it. In shell.cpp, you have the arrays “cmd1” and “cmd2”, which you can use directly with execvp.

In the main function, you should create an unnamed pipe using the pipe() system call. The system call pipe() will do that. When you call fork() to create the child processes, both processes inherit both ends of the pipe. So, you need to close one end of the pipe in each child. You also need to connect the stdout of ls with the stdin of tr.

/****************
LE2: Introduction to Unnamed Pipes
****************/
#include <unistd.h> // pipe, fork, dup2, execvp, close
using namespace std;

int main () {
    // lists all the files in the root directory in the long format
    char* cmd1[] = {(char*) "ls", (char*) "-al", (char*) "/", nullptr};
    // translates all input from lowercase to uppercase
    char* cmd2[] = {(char*) "tr", (char*) "a-z", (char*) "A-Z", nullptr};

    // TODO: add functionality
    // Create pipe

    // Create child to run first command
    // In child, redirect output to write end of pipe
    // Close the read end of the pipe on the child side.
    // In child, execute the command

    // Create another child to run second command
    // In child, redirect input to the read end of the pipe
    // Close the write end of the pipe on the child side.
    // Execute the second command.

    // Reset the input and output file descriptors of the parent.
}

3. How to do the lab

This lab is about understanding how a Unix pipeline works under the hood—the same mechanism you use every time you type something like

ls | tr
cat file.txt | grep hello

into the shell. Instead of the shell connecting programs through pipes, your program becomes a “mini shell” and creates a pipe (a one-way channel for bytes), between the two programs. Your program:

Note that the mechanism highlights some degree of elegance about the fork() system call. Instead of providing a whole range of options to precisely specify the attributes of the child process, the child changes its own attributes using standard system calls, and then calls exec().

By the end of the lab, you'll see the output of ls -al / printed on the terminal in ALL CAPS, proof that data really flowed from one process to another. Learning how to do this is a useful skill that you might use later for constructing longer pipelines, implementing shells, and for process management.

Process

1. Create pipe

    // Create pipe
    // fds[0] = read end, fds[1] = write end
    int pipefds[2];
    if (pipe(pipefds) == -1) {
        cerr << "pipe failed\n";
        return 1;
    }
	int pipefds[2]
	pipe(pipefds)

2. Create child to run first command

    // Create child to run first command
    pid_t ls_pid = fork();    // parent gets the child's PID - child gets 0
    if (ls_pid == -1) {
        cerr << "fork failed\n";
        return 1;
    }

    // In child, redirect output to write end of pipe
    if (ls_pid == 0) {
        dup2(pipefds[1], STDOUT_FILENO);    // STDOUT_FILENO == 1 (standard output)

        // Close the read end of the pipe on the child side.
        close(pipefds[0]);                  // close unused read end
        close(pipefds[1]);                  // also closed redundant write end

        // In child, execute the command    // execvp():
        execvp(cmd1[0], cmd1);              // 1st arg takes the name of the command
                                            // 2nd arg takes the list of arguments
    }
What is STDOUT_FILENO?

STDOUT_FILENO is a constant defined by Unix that represents the file descriptor number for standard output.

STDOUT_FILENO == 1

Similarly:

STDIN_FILENO  == 0   // standard input
STDOUT_FILENO == 1   // standard output
STDERR_FILENO == 2   // standard error

They come from:

#include <unistd.h>

So when you write:

dup2(pipefd[1], STDOUT_FILENO);

You're really saying:

dup2(pipefd[1], 1);

That’s what this line does:

dup2(pipefd[1], STDOUT_FILENO);

This line:

dup2(pipefd[0], STDIN_FILENO);
Close the duplicated fd after dup2() in each child

After:

dup2(pipefds[1], STDOUT_FILENO);

You should usually also do:

close(pipefds[1]);

because stdout (fd 1) now points to that same underlying pipe endpoint, and keeping the original open is redundant.

Same for the other child:

dup2(pipefds[0], STDIN_FILENO);
close(pipefds[0]);

Why it matters:

3. Create another child to run second command

    // Create another child to run second command
    pid_t tr_pid = fork();
    if (tr_pid == -1) {
        cerr << "fork failed\n";
        return 1;
    }

    // In child, redirect input to the read end of the pipe
    if (tr_pid == 0) {
        dup2(pipefds[0], STDIN_FILENO);     // STDIN_FILENO == 0 (standard input)

        // Close the write end of the pipe on the child side.
        close(pipefds[1]);                  // close unused write end
        close(pipefds[0]);                  // also closed redundant read end

        // Execute the second command.
        execvp(cmd2[0], cmd2);
    }
The exec family (there are many):
Function Args style PATH search
execl list NO
execlp list YES
execv array NO
execvp array YES

Example with execlp:

execlp(cmd, arg0, arg1, ..., NULL);

Example with execvp:

char *args[] = {"ls", "-l", NULL};
execvp("ls", args);

4. Reset input and output file descriptors of the parent

    // Reset the input and output file descriptors of the parent.
    close(pipefds[0]);      // close both ends of the pipe
    close(pipefds[1]);

    // Parent waits for both children before exit
    int status;
    waitpid(ls_pid, &status, 0);
    waitpid(tr_pid, &status, 0);

    return 0;

Final solution

/****************
LE2: Introduction to Unnamed Pipes
****************/
#include <unistd.h>     // pipe, fork, dup2, execvp, close
#include <iostream>     // std::cout, std::cerr
#include <sys/wait.h>   // wait, waitpid

using namespace std;

int main () {
    // lists all the files in the root directory in the long format
    char* cmd1[] = {(char*) "ls", (char*) "-al", (char*) "/", nullptr};
    // translates all input from lowercase to uppercase
    char* cmd2[] = {(char*) "tr", (char*) "a-z", (char*) "A-Z", nullptr};

    // Create pipe
    int pipefds[2];   // fds[0] = read end, fds[1] = write end
    if (pipe(pipefds) == -1) {
        cerr << "pipe failed\n";
        return 1;
    }

    // Create child to run first command
    pid_t ls_pid = fork();    // parent gets the child's PID - child gets 0
    if (ls_pid == -1) {
        cerr << "fork failed\n";
        return 1;
    }

    // In child, redirect output to write end of pipe
    if (ls_pid == 0) {
        dup2(pipefds[1], STDOUT_FILENO);    // STDOUT_FILENO == 1 (standard output)

        // Close the read end of the pipe on the child side.
        close(pipefds[0]);                  // close unused read end
        close(pipefds[1]);                  // also closed redundant write end

        // In child, execute the command    // execvp():
        execvp(cmd1[0], cmd1);              // 1st arg takes the name of the command
        cerr << "exec failed\n";            // 2nd arg takes the list of arguments
        return 1;
    }

    // Create another child to run second command
    pid_t tr_pid = fork();
    if (tr_pid == -1) {
        cerr << "fork failed\n";
        return 1;
    }

    // In child, redirect input to the read end of the pipe
    if (tr_pid == 0) {
        dup2(pipefds[0], STDIN_FILENO);     // STDIN_FILENO == 0 (standard input)

        // Close the write end of the pipe on the child side.
        close(pipefds[1]);                  // close unused write end
        close(pipefds[0]);                  // also closed redundant read end

        // Execute the second command.
        execvp(cmd2[0], cmd2);
        cerr << "exec failed\n";
        return 1;
    }

    // Reset the input and output file descriptors of the parent.
    close(pipefds[0]);      // close both ends of the pipe
    close(pipefds[1]);

    // Parent waits for both children before exit
    int status;
    waitpid(ls_pid, &status, 0);
    waitpid(tr_pid, &status, 0);

    return 0;
}