From 5d9a101507334f66242fc306359833df67adb3f8 Mon Sep 17 00:00:00 2001 From: nckcard Date: Wed, 2 Jul 2025 16:39:10 -0700 Subject: [PATCH] gitignore fixes --- .gitignore | 169 +----------------- data/.gitignore | 4 + data/README.md | 1 + .../pretrained_language_models/.gitignore | 4 + .../core/patch/openfst/src/lib/flags.cc | 166 +++++++++++++++++ 5 files changed, 179 insertions(+), 165 deletions(-) create mode 100644 data/.gitignore create mode 100644 data/README.md create mode 100644 language_model/pretrained_language_models/.gitignore create mode 100644 language_model/runtime/core/patch/openfst/src/lib/flags.cc diff --git a/.gitignore b/.gitignore index dfb8c8e..8b8d6b3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,170 +1,9 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/latest/usage/project/#working-with-version-control -.pdm.toml -.pdm-python -.pdm-build/ - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ - -# ignore data folder -data/* +# ignore python bytecode files +*.pyc +*.egg-info # ignore rdb files *.rdb +# ignore ds_store files .DS_Store \ No newline at end of file diff --git a/data/.gitignore b/data/.gitignore new file mode 100644 index 0000000..e1191d2 --- /dev/null +++ b/data/.gitignore @@ -0,0 +1,4 @@ +# ignore everything in this folder except my README.md and myself +* +!README.md +!/.gitignore \ No newline at end of file diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000..b634fd5 --- /dev/null +++ b/data/README.md @@ -0,0 +1 @@ +Data can be downloaded from Dryad, [here](https://datadryad.org/stash/dataset/doi:10.5061/dryad.dncjsxm85). Please download this data and place it in the `data` directory before running the code. Be sure to unzip `t15_copyTask_neuralData.zip` and `t15_pretrained_rnn_baseline.zip`. \ No newline at end of file diff --git a/language_model/pretrained_language_models/.gitignore b/language_model/pretrained_language_models/.gitignore new file mode 100644 index 0000000..7919e7a --- /dev/null +++ b/language_model/pretrained_language_models/.gitignore @@ -0,0 +1,4 @@ +# ignore everything in this folder except a few things +* +!openwebtext_1gram_lm_sil +!/.gitignore \ No newline at end of file diff --git a/language_model/runtime/core/patch/openfst/src/lib/flags.cc b/language_model/runtime/core/patch/openfst/src/lib/flags.cc new file mode 100644 index 0000000..95f7e2e --- /dev/null +++ b/language_model/runtime/core/patch/openfst/src/lib/flags.cc @@ -0,0 +1,166 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Google-style flag handling definitions. + +#include + +#if _MSC_VER +#include +#include +#endif + +#include +#include + +static const char *private_tmpdir = getenv("TMPDIR"); + +// DEFINE_int32(v, 0, "verbosity level"); +// DEFINE_bool(help, false, "show usage information"); +// DEFINE_bool(helpshort, false, "show brief usage information"); +#ifndef _MSC_VER +DEFINE_string(tmpdir, private_tmpdir ? private_tmpdir : "/tmp", + "temporary directory"); +#else +DEFINE_string(tmpdir, private_tmpdir ? private_tmpdir : getenv("TEMP"), + "temporary directory"); +#endif // !_MSC_VER + +using namespace std; + +static string flag_usage; +static string prog_src; + +// Sets prog_src to src. +static void SetProgSrc(const char *src) { + prog_src = src; +#if _MSC_VER + // This common code is invoked by all FST binaries, and only by them. Switch + // stdin and stdout into "binary" mode, so that 0x0A won't be translated into + // a 0x0D 0x0A byte pair in a pipe or a shell redirect. Other streams are + // already using ios::binary where binary files are read or written. + // Kudos to @daanzu for the suggested fix. + // https://github.com/kkm000/openfst/issues/20 + // https://github.com/kkm000/openfst/pull/23 + // https://github.com/kkm000/openfst/pull/32 + _setmode(_fileno(stdin), O_BINARY); + _setmode(_fileno(stdout), O_BINARY); +#endif + // Remove "-main" in src filename. Flags are defined in fstx.cc but SetFlags() + // is called in fstx-main.cc, which results in a filename mismatch in + // ShowUsageRestrict() below. + static constexpr char kMainSuffix[] = "-main.cc"; + const int prefix_length = prog_src.size() - strlen(kMainSuffix); + if (prefix_length > 0 && prog_src.substr(prefix_length) == kMainSuffix) { + prog_src.erase(prefix_length, strlen("-main")); + } +} + +void SetFlags(const char *usage, int *argc, char ***argv, + bool remove_flags, const char *src) { + flag_usage = usage; + SetProgSrc(src); + + int index = 1; + for (; index < *argc; ++index) { + string argval = (*argv)[index]; + if (argval[0] != '-' || argval == "-") break; + while (argval[0] == '-') argval = argval.substr(1); // Removes initial '-'. + string arg = argval; + string val = ""; + // Splits argval (arg=val) into arg and val. + auto pos = argval.find("="); + if (pos != string::npos) { + arg = argval.substr(0, pos); + val = argval.substr(pos + 1); + } + auto bool_register = FlagRegister::GetRegister(); + if (bool_register->SetFlag(arg, val)) + continue; + auto string_register = FlagRegister::GetRegister(); + if (string_register->SetFlag(arg, val)) + continue; + auto int32_register = FlagRegister::GetRegister(); + if (int32_register->SetFlag(arg, val)) + continue; + auto int64_register = FlagRegister::GetRegister(); + if (int64_register->SetFlag(arg, val)) + continue; + auto double_register = FlagRegister::GetRegister(); + if (double_register->SetFlag(arg, val)) + continue; + LOG(FATAL) << "SetFlags: Bad option: " << (*argv)[index]; + } + if (remove_flags) { + for (auto i = 0; i < *argc - index; ++i) { + (*argv)[i + 1] = (*argv)[i + index]; + } + *argc -= index - 1; + } + // if (FLAGS_help) { + // ShowUsage(true); + // exit(1); + // } + // if (FLAGS_helpshort) { + // ShowUsage(false); + // exit(1); + // } +} + +// If flag is defined in file 'src' and 'in_src' true or is not +// defined in file 'src' and 'in_src' is false, then print usage. +static void +ShowUsageRestrict(const std::set> &usage_set, + const string &src, bool in_src, bool show_file) { + string old_file; + bool file_out = false; + bool usage_out = false; + for (const auto &pair : usage_set) { + const auto &file = pair.first; + const auto &usage = pair.second; + bool match = file == src; + if ((match && !in_src) || (!match && in_src)) continue; + if (file != old_file) { + if (show_file) { + if (file_out) cout << "\n"; + cout << "Flags from: " << file << "\n"; + file_out = true; + } + old_file = file; + } + cout << usage << "\n"; + usage_out = true; + } + if (usage_out) cout << "\n"; +} + +void ShowUsage(bool long_usage) { + std::set> usage_set; + cout << flag_usage << "\n"; + auto bool_register = FlagRegister::GetRegister(); + bool_register->GetUsage(&usage_set); + auto string_register = FlagRegister::GetRegister(); + string_register->GetUsage(&usage_set); + auto int32_register = FlagRegister::GetRegister(); + int32_register->GetUsage(&usage_set); + auto int64_register = FlagRegister::GetRegister(); + int64_register->GetUsage(&usage_set); + auto double_register = FlagRegister::GetRegister(); + double_register->GetUsage(&usage_set); + if (!prog_src.empty()) { + cout << "PROGRAM FLAGS:\n\n"; + ShowUsageRestrict(usage_set, prog_src, true, false); + } + if (!long_usage) return; + if (!prog_src.empty()) cout << "LIBRARY FLAGS:\n\n"; + ShowUsageRestrict(usage_set, prog_src, false, true); +}