Data Visualization with Python

The data visualization notebook can be downloaded from here.

The soltuion for the visualization exercises can be found on here


The data files needed for this tutorials can be found on the /data/data_visualization directory.

Introduction to SQL

Data Definition Language


CREATE DATABASE Examples;
CREATE TABLE Students(
 ID INTEGER,
 FirstName VARCHAR(50),
 LastName VARCHAR(50),
 PRIMARY KEY (ID)
);
CREATE TABLE Classes(
 ID INTEGER,
 AcademicYear INTEGER,
 Class VARCHAR(2),
 PRIMARY KEY (ID, AcademicYear)
);

ALTER TABLE Students
ADD COLUMN  DOB DATE NOT NULL;
                        

Data Manipulation Language


INSERT INTO Students 
(`ID`,`FirstName`,`LastName`,`DOB`)
VALUES 
(1001,'John','Smith','1/1/2000');

INSERT INTO Students 
(`ID`,`FirstName`,`LastName`)
VALUES 
(1001,'John','Smith’);

UPDATE Students
SET
`FirstName` = 'Caroline',
`LastName` = 'Herschel',
`DOB` = '2/2/2000'
WHERE `ID` = 1002;

DELET FROM Students
WHERE `ID` = 1003;
                        
                        

Database Query using SELECT


SELECT `emp_no`,`birth_date`,`first_name`,`last_name`,`gender`,`hire_date` FROM `employees`;
SELECT * FROM employees.employees;
SELECT * FROM employees.employees where gender='F';
SELECT * FROM employees.employees where gender='F' and hire_date<'1988-1-1';
SELECT * FROM employees.employees where gender='F' and First_Name='Gao';
SELECT * FROM employees.employees where gender='F' and First_Name like 'G%';
SELECT * FROM employees.employees where gender='F' and First_Name like '%G%';
SELECT * FROM employees.employees where birth_date>='1963-1-1' and birth_date<'1964-1-1';
SELECT * FROM employees.employees where birth_date>='1963-1-1' and birth_date<'1964-1-1' 
order by birth_date;

SELECT * FROM employees inner join dept_emp on employees.emp_no=dept_emp.emp_no;
SELECT * FROM employees inner join dept_emp on employees.emp_no=dept_emp.emp_no 
Order by employees.emp_no;

SELECT * FROM employees inner join dept_emp on employees.emp_no=dept_emp.emp_no 
where dept_emp.dept_no='d008'
Order by employees.emp_no;


SELECT * FROM employees inner join dept_emp on employees.emp_no=dept_emp.emp_no 
where dept_emp.dept_no='d008' and employees.gender='M'
Order by employees.emp_no;

SELECT employees.emp_no,employees.birth_date,employees.first_name,
employees.last_name,employees.gender,employees.hire_date,dept_emp.dept_no,
dept_emp.from_date,dept_emp.to_date,departments.dept_name 
FROM employees inner join dept_emp on employees.emp_no=dept_emp.emp_no 
inner join departments on  dept_emp.dept_no=departments.dept_no 
where dept_emp.dept_no='d008'
Order by employees.emp_no;
                        

Aggregate Functions


SELECT count(*) FROM employees ; 

SELECT min(birth_date) , max(birth_date) FROM employees ;

SELECT min(birth_date) , max(birth_date) FROM employees where gender='F' ;

SELECT count(*) FROM employees where gender='F' ;

select sum(salaries.salary) 
from employees inner join salaries on employees.emp_no=salaries.emp_no 
where employees.emp_no='10001';

select avg(salaries.salary) 
from employees inner join salaries on employees.emp_no=salaries.emp_no 
where employees.emp_no='10001';


SELECT departments.dept_name, count(*) 
FROM employees inner join dept_emp on employees.emp_no=dept_emp.emp_no 
inner join departments on  dept_emp.dept_no=departments.dept_no 
group by departments.dept_name;


select employees.emp_no,employees.first_name,employees.last_name, avg(salaries.salary)
 from employees inner join salaries on employees.emp_no=salaries.emp_no
group by employees.emp_no,employees.first_name,employees.last_name

select employees.emp_no,employees.first_name,employees.last_name, avg(salaries.salary)
 from employees inner join salaries on employees.emp_no=salaries.emp_no
group by employees.emp_no,employees.first_name,employees.last_name
having avg(salaries.salary)>50000
                        

Introduction to HPC

Sample PBS Script

                        #!/bin/sh -f
# Remarks: A line beginning with # is a comment.
# A line beginning with #PBS is a PBS directive.
# PBS directives must come first; 
# any directives after the first executable statement are ignored.
############### The PBS directives #########################

# Set the name of the job (up to 15 characters, no blank spaces, start with alphanumeric character)

#PBS -N TestJob

# By default, the standard output and error streams are sent to files in the current working directory 
# with names:
# 	job_name.osequence_number  <-  output stream
# 	job_name.esequence_number  <-  error stream
# where job_name is the name of the job and sequence_number is the job number 
# assigned when the job is submitted. 
# Use the directives below to change the files to which the standard output and error streams are sent.

#    #PBS -o stdout_file
#    #PBS -e stderr_file

# Specify the maximum wall clock time. Format:   hhhh:mm:ss   hours:minutes:seconds
# Be sure to specify a reasonable value here. 
# If the job does not finish by the time reached, the job is terminated.

#PBS -l walltime=6:00:00

# Specify the queue.
#PBS -q gstar

# Specify the maximum amount of physical memory required. 
# kb for kilobytes, mb for megabytes, gb for gigabytes. 
# Take some care in setting this value.  
# Setting it too large can result in your job waiting in the queue 
# for sufficient resources to become available.

#PBS -l mem=512mb

# Specify the number of nodes requested and the number of processors per node.

#PBS -l nodes=1:ppn=1

##########################################
#   Output some useful job information.  #
##########################################

echo ------------------------------------------------------
echo -n 'Job is running on node '; cat $PBS_NODEFILE
echo ------------------------------------------------------
echo PBS: qsub is running on $PBS_O_HOST
echo PBS: originating queue is $PBS_O_QUEUE
echo PBS: executing queue is $PBS_QUEUE
echo PBS: working directory is $PBS_O_WORKDIR
echo PBS: execution mode is $PBS_ENVIRONMENT
echo PBS: job identifier is $PBS_JOBID
echo PBS: job name is $PBS_JOBNAME
echo PBS: node file is $PBS_NODEFILE
echo PBS: current home directory is $PBS_O_HOME
echo PBS: PATH = $PBS_O_PATH
echo ------------------------------------------------------

Submitting batch jobs

qsub pbsfile.sh

Submitting Interactive jobs

qsub -I -l nodes=1:ppn=1 -l mem=512mb

Get the expected start time

showstart JobID

Check Job Status

checkjob JobID
qstat -f JobID

Show the current queue status

qstat
showq

Introduction to Source Control (git and github)

The source control and github tutorial can be found here.