Data Visualization with Python
The data visualization notebook can be downloaded from here.
The soltuion for the visualization exercises can be found on here
The data files needed for this tutorials can be found on the
/data/data_visualization
directory.
Introduction to SQL
Data Definition Language
CREATE DATABASE Examples;
CREATE TABLE Students(
ID INTEGER,
FirstName VARCHAR(50),
LastName VARCHAR(50),
PRIMARY KEY (ID)
);
CREATE TABLE Classes(
ID INTEGER,
AcademicYear INTEGER,
Class VARCHAR(2),
PRIMARY KEY (ID, AcademicYear)
);
ALTER TABLE Students
ADD COLUMN DOB DATE NOT NULL;
Data Manipulation Language
INSERT INTO Students
(`ID`,`FirstName`,`LastName`,`DOB`)
VALUES
(1001,'John','Smith','1/1/2000');
INSERT INTO Students
(`ID`,`FirstName`,`LastName`)
VALUES
(1001,'John','Smith’);
UPDATE Students
SET
`FirstName` = 'Caroline',
`LastName` = 'Herschel',
`DOB` = '2/2/2000'
WHERE `ID` = 1002;
DELET FROM Students
WHERE `ID` = 1003;
Database Query using SELECT
SELECT `emp_no`,`birth_date`,`first_name`,`last_name`,`gender`,`hire_date` FROM `employees`;
SELECT * FROM employees.employees;
SELECT * FROM employees.employees where gender='F';
SELECT * FROM employees.employees where gender='F' and hire_date<'1988-1-1';
SELECT * FROM employees.employees where gender='F' and First_Name='Gao';
SELECT * FROM employees.employees where gender='F' and First_Name like 'G%';
SELECT * FROM employees.employees where gender='F' and First_Name like '%G%';
SELECT * FROM employees.employees where birth_date>='1963-1-1' and birth_date<'1964-1-1';
SELECT * FROM employees.employees where birth_date>='1963-1-1' and birth_date<'1964-1-1'
order by birth_date;
SELECT * FROM employees inner join dept_emp on employees.emp_no=dept_emp.emp_no;
SELECT * FROM employees inner join dept_emp on employees.emp_no=dept_emp.emp_no
Order by employees.emp_no;
SELECT * FROM employees inner join dept_emp on employees.emp_no=dept_emp.emp_no
where dept_emp.dept_no='d008'
Order by employees.emp_no;
SELECT * FROM employees inner join dept_emp on employees.emp_no=dept_emp.emp_no
where dept_emp.dept_no='d008' and employees.gender='M'
Order by employees.emp_no;
SELECT employees.emp_no,employees.birth_date,employees.first_name,
employees.last_name,employees.gender,employees.hire_date,dept_emp.dept_no,
dept_emp.from_date,dept_emp.to_date,departments.dept_name
FROM employees inner join dept_emp on employees.emp_no=dept_emp.emp_no
inner join departments on dept_emp.dept_no=departments.dept_no
where dept_emp.dept_no='d008'
Order by employees.emp_no;
Aggregate Functions
SELECT count(*) FROM employees ;
SELECT min(birth_date) , max(birth_date) FROM employees ;
SELECT min(birth_date) , max(birth_date) FROM employees where gender='F' ;
SELECT count(*) FROM employees where gender='F' ;
select sum(salaries.salary)
from employees inner join salaries on employees.emp_no=salaries.emp_no
where employees.emp_no='10001';
select avg(salaries.salary)
from employees inner join salaries on employees.emp_no=salaries.emp_no
where employees.emp_no='10001';
SELECT departments.dept_name, count(*)
FROM employees inner join dept_emp on employees.emp_no=dept_emp.emp_no
inner join departments on dept_emp.dept_no=departments.dept_no
group by departments.dept_name;
select employees.emp_no,employees.first_name,employees.last_name, avg(salaries.salary)
from employees inner join salaries on employees.emp_no=salaries.emp_no
group by employees.emp_no,employees.first_name,employees.last_name
select employees.emp_no,employees.first_name,employees.last_name, avg(salaries.salary)
from employees inner join salaries on employees.emp_no=salaries.emp_no
group by employees.emp_no,employees.first_name,employees.last_name
having avg(salaries.salary)>50000
Introduction to HPC
Sample PBS Script
#!/bin/sh -f
# Remarks: A line beginning with # is a comment.
# A line beginning with #PBS is a PBS directive.
# PBS directives must come first;
# any directives after the first executable statement are ignored.
############### The PBS directives #########################
# Set the name of the job (up to 15 characters, no blank spaces, start with alphanumeric character)
#PBS -N TestJob
# By default, the standard output and error streams are sent to files in the current working directory
# with names:
# job_name.osequence_number <- output stream
# job_name.esequence_number <- error stream
# where job_name is the name of the job and sequence_number is the job number
# assigned when the job is submitted.
# Use the directives below to change the files to which the standard output and error streams are sent.
# #PBS -o stdout_file
# #PBS -e stderr_file
# Specify the maximum wall clock time. Format: hhhh:mm:ss hours:minutes:seconds
# Be sure to specify a reasonable value here.
# If the job does not finish by the time reached, the job is terminated.
#PBS -l walltime=6:00:00
# Specify the queue.
#PBS -q gstar
# Specify the maximum amount of physical memory required.
# kb for kilobytes, mb for megabytes, gb for gigabytes.
# Take some care in setting this value.
# Setting it too large can result in your job waiting in the queue
# for sufficient resources to become available.
#PBS -l mem=512mb
# Specify the number of nodes requested and the number of processors per node.
#PBS -l nodes=1:ppn=1
##########################################
# Output some useful job information. #
##########################################
echo ------------------------------------------------------
echo -n 'Job is running on node '; cat $PBS_NODEFILE
echo ------------------------------------------------------
echo PBS: qsub is running on $PBS_O_HOST
echo PBS: originating queue is $PBS_O_QUEUE
echo PBS: executing queue is $PBS_QUEUE
echo PBS: working directory is $PBS_O_WORKDIR
echo PBS: execution mode is $PBS_ENVIRONMENT
echo PBS: job identifier is $PBS_JOBID
echo PBS: job name is $PBS_JOBNAME
echo PBS: node file is $PBS_NODEFILE
echo PBS: current home directory is $PBS_O_HOME
echo PBS: PATH = $PBS_O_PATH
echo ------------------------------------------------------
Submitting batch jobs
qsub pbsfile.sh
Submitting Interactive jobs
qsub -I -l nodes=1:ppn=1 -l mem=512mb
Get the expected start time
showstart JobID
Check Job Status
checkjob JobID
qstat -f JobID
Show the current queue status
qstat
showq
Introduction to Source Control (git and github)
The source control and github tutorial can be found here.