/*
    xd -- yet another heXDump with Japanese support

    revision history:
    	0.0: Oct. 20, 2010 by Dai ISHIJIMA
	1.0: Aug.  3, 2018 (aka. xd2)

*/

#include <stdio.h>
#include <stdlib.h>
#include "codeconv.h"
#include "ucs2jis.h"

#define YES 1
#define NO  0

#define NDUMP 16
#define BUFLEN ((NDUMP) * 2)

#define SJIS 's'
#define EUC 'e'
#define UTF8 'w'
#define UCS2 'u'
#define JIS 'j'
#define UNKNOWN (EOF)

#define EOS '\0'

#define RANKSIZ 8

#define shift --argc; ++argv
char *prog;

void initbuf(int len, int *buf)
{
    int i;

    for (i = 0; i < len; i++) {
	buf[i] = EOF;
    }
}


int readbuf(FILE *fp, int len, int *buf)
{
    int ch;
    int i;

    /* ХåեȾʬ˥ԡ */
    for (i = 0; i < len / 2; i++) {
	buf[i] = buf[i + len / 2];
    }
    for (; i < len; i++) {
	buf[i] = EOF;
    }
    for (i = 0; i < len; i++) {
	if (buf[i] == EOF) {
	    break;
	}
    }
    while ((i < len) && ((ch = getc(fp)) != EOF)) {
	buf[i] = ch;
	++i;
    }
    for (; i < len; i++) {
	buf[i] = EOF;
    }
    return(buf[0]);
}


void dumphex(int len, int *buf)
{
    int i;

    for (i = 0; i < len; i++) {
	if (buf[i] == EOF) {
	    printf("   ");
	}
	else {
	    printf("%02x ", buf[i]);
	}
	if (i == (len / 2) - 1) {
	    printf(" ");
	}
    }
}


/* JIS X0201, X208ʸѴƽ */
void mbput(int len, int ch, int code)
{
    int outch;

    if (code == SJIS) {
	if (len == 1) { /* Ⱦ */
	    if ((' ' <= lobyte(ch)) && (lobyte(ch) <= '~')) {
		printf("%c", lobyte(ch));
	    }
	    else if ((0xa1 <= lobyte(ch)) && (lobyte(ch) <= 0xdf)) {
		/* Ⱦѥ */
		printf("%c", ch);
	    }
	    else {
		printf(".");
	    }
	}
	else if (len == 2) {
	    outch = jtos(ch);
	    printf("%c%c", hibyte(outch), lobyte(outch));
	}
    }
    else if (code == EUC) {
	if (len == 1) { /* Ⱦ */
	    if ((' ' <= lobyte(ch)) && (lobyte(ch) <= '~')) {
		printf("%c", lobyte(ch));
	    }
	    else if ((0xa1 <= lobyte(ch)) && (lobyte(ch) <= 0xdf)) {
		/* Ⱦѥ */
		printf("%c", 0x8e);
		printf("%c", lobyte(ch));
	    }
	    else {
		printf(".");
	    }
	}
	else if (len == 2) {
	    outch = jtoe(ch);
	    printf("%c%c", hibyte(outch), lobyte(outch));
	}
    }
    else {
	if (len > 1) {
	    if ((' ' <= hibyte(ch)) && (hibyte(ch) <= '~')) {
		printf("%c", hibyte(ch));
	    }
		else {
		    printf(".");
		}
	}
	if ((' ' <= lobyte(ch)) && (lobyte(ch) <= '~')) {
	    printf("%c", lobyte(ch));
	}
	else {
	    printf(".");
	}
    }
}


int mbdump(int *buf, char *coderank, int persist, int *code, int outcode)
{
    int mblen;
    int jischar;
    
    mblen = 0;
    if (*code == SJIS) {
	if (iskana(*buf)) {
	    mbput(1, *buf, outcode);
	    mblen = 1;
	}
	else {
	    jischar = stoj(hilo(*buf, *(buf + 1)));
	    if (validjis(jischar)) {
		mbput(2, jischar, outcode);
		mblen = 2;
	    }
	    else {
		*code = UNKNOWN;
		mblen = 0;
	    }
	}
    }
    else if (*code == EUC) {
	if (*buf == 0x8e) {
	    if (iskana(*(buf + 1))) {
		mbput(1, *(buf + 1), outcode);
		mblen = 2;
	    }
	    else {
		*code = UNKNOWN;
		mblen = 0;
	    }
	}
	else {
	    jischar = etoj(hilo(*buf, *(buf + 1)));
	    if (validjis(jischar)) {
		mbput(2, jischar, outcode);
		mblen = 2;
	    }
	    else {
		*code = UNKNOWN;
		mblen = 0;
	    }
	}
    }
    else if (*code == UTF8) {
	if (*buf >= 0xc0) {
	    jischar = utoj(buf, &mblen);
	    if (iskana(jischar)) {
		mbput(1, jischar, outcode);
	    }
	    else {
		if (validjis(jischar)) {
		    mbput(2, jischar, outcode);
		    if (mblen > 2) {
			printf(" ");
		    }
		}
		else {
		    mblen = 0;
		    *code = UNKNOWN;
		}
	    }
	}
	else {
	    mblen = 0;
	    *code = UNKNOWN;
	}
    }
    if ((*code == UNKNOWN) && (mblen == 0)) {
	while (*coderank != EOS) {
	    if (*coderank == SJIS) {
		/*
		if (iskana(*buf)) {
		    *code = SJIS;
		    mbput(1, *buf, outcode);
		    mblen = 1;
		}
		*/
		if (issjis(*buf)) {
		    jischar = stoj(hilo(*buf, *(buf + 1)));
		    if (validjis(jischar)) {
			*code = SJIS;
			mbput(2, jischar, outcode);
			mblen = 2;
		    }
		}
	    }
	    else if (*coderank == EUC) {
		/*
		if ((*buf == 0x8e) && (iskana(*(buf + 1)))) {
		    *code = EUC;
		    mbput(1, *buf, outcode);
		    mblen = 2;
		}
		*/
		if ((*buf >= 0xa1) && (*(buf + 1) >= 0xa1)) {
		    jischar = etoj(hilo(*buf, *(buf + 1)));
		    if (validjis(jischar)) {
			*code = EUC;
			mbput(2, jischar, outcode);
			mblen = 2;
		    }
		}
	    }
	    else if (*coderank == UTF8) {
		if (*buf >= 0xc0) {
		    jischar = utoj(buf, &mblen);
		    if (iskana(jischar)) {
			mbput(1, jischar, outcode);
			*code = UTF8;
		    }
		    else {
			if (validjis(jischar)) {
			    mbput(2, jischar, outcode);
			    if (mblen > 2) {
				printf(" ");
			    }
			    *code = UTF8;
			}
			else {
			    mblen = 0;
			    *code = UNKNOWN;
			}
		    }
		}
		else {
		    mblen = 0;
		    *code = UNKNOWN;
		}
	    }
	    if (mblen > 0) {
		break;
	    }
	    ++coderank;
	}
    }
    return(mblen);
}


void dumpchar(int len, int *buf, char *coderank, int persist, int outcode)
{
    static int ofs = 0;
    int i;
    int mblen;
    static int code = UNKNOWN;

    for (i = 0; i < ofs; i++) {
	printf(" ");
    }
    for (; i < len; i++) {
	if (buf[i] == EOF) {
	    printf(" ");
	}
	else if (buf[i] < ' ') {
	    printf(".");
	    code = UNKNOWN;
	}
	else if ((' ' <= buf[i]) && (buf[i] <= '~')) {
	    printf("%c", buf[i]);

	}
	else if (buf[i] == 0x7f) {
	    printf(".");
	}
	else {
	    mblen = mbdump(&buf[i], coderank, persist, &code, outcode);
	    if (mblen > 1) {
		i += (mblen - 1);
	    }
	    if (mblen == 0) {
		printf(".");
	    }
	}
    }
    ofs = i - len;
    fflush(stdout);
}


void dump(FILE *fp, char *coderank, int persist, int outcode)
{
    int buf[BUFLEN];
    int ofs = 0;
    
    initbuf(BUFLEN, buf);
    while (readbuf(fp, BUFLEN, buf) != EOF) {
	printf("%04x ", ofs);
	dumphex(NDUMP, buf);
	printf("  ");
	dumpchar(NDUMP, buf, coderank, persist, outcode);
	printf("\n");
	ofs +=  NDUMP;
    }
}


void usecode(char *coderank, int code)
{
    while (*coderank != EOS) {
	if (*coderank == code) {
	    break;
	}
	++coderank;
    }
    if (*coderank == EOS) {
	*coderank = code;
	++coderank;
	*coderank = EOS;
    }
}


void usage()
{
    fprintf(stderr, "Usage: %s [-[seuj]]\n", prog);
}


int main(int argc, char *argv[])
{
    char coderank[RANKSIZ];
    char outcode = EUC;
    int persist = NO;
    FILE *fp;

    prog = *argv;
    shift;
    coderank[0] = EOS;
    persist = NO;
    outcode = EUC;
    ucs2init();
    while ((argc > 0) && (argv[0][0] == '-') && (argv[0][1] != EOS)) {
	if (argv[0][1] == 's') {
	    usecode(coderank, SJIS);
	}
	else if (argv[0][1] == 'e') {
	    usecode(coderank, EUC);
	}
	else if (argv[0][1] == 'w') {
	    usecode(coderank, UTF8);
	}
	else if (argv[0][1] == 'j') {
	    usecode(coderank, JIS);
	}
	else if (argv[0][1] == 'S') {
	    outcode = SJIS;
	}
	else if (argv[0][1] == 'E') {
	    outcode = EUC;
	}
	else if (argv[0][1] == 'W') {
	    outcode = UTF8;
	}
	else if (argv[0][1] == 'p') {
	    persist = !persist;
	}
	else {
	    usage();
	    exit(0);
	}
	shift;
    }
    if (coderank[0] == EOS) {
	usecode(coderank, SJIS);
	usecode(coderank, EUC);
	usecode(coderank, UTF8);
	usecode(coderank, JIS);
    }
    if (argc > 0) {
	while (argc > 0) {
	    if (argv[0][0] == '-') {
		dump(stdin, coderank, persist, outcode);
	    }
	    else {
		if ((fp = fopen(*argv, "r")) == NULL) {
		    fprintf(stderr, "%s: can't open %s\n", prog, *argv);
		    exit(1);
		}
		dump(fp, coderank, persist, outcode);
		fclose(fp);
	    }
	    shift;
	}
    }
    else {
	dump(stdin, coderank, persist, outcode);
    }
    exit(0);
}

/* Local Variables: */
/* compile-command:"cc -Wall -o xd xd.c codeconv.c ucs2jis.c" */
/* End: */
